1418 lines
46 KiB
C
1418 lines
46 KiB
C
/*
|
|
* Kneron Model API Manager
|
|
*
|
|
* Copyright (C) 2019 Kneron, Inc. All rights reserved.
|
|
*
|
|
*/
|
|
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include "project.h"
|
|
|
|
#include "base.h"
|
|
#include "kdrv_ipc.h" /*for NCPU triggering */
|
|
#include "kdrv_clock.h" /* for kdrv_delay_us() */
|
|
#include "kdev_flash.h"
|
|
|
|
#include "kmdw_ipc.h"
|
|
#include "kmdw_model.h"
|
|
#include "kmdw_console.h" /*for dbg_msg */
|
|
#include "kmdw_memxfer.h" /*for flash access */
|
|
#include "kmdw_memory.h"
|
|
#include "kmdw_utils_crc.h"
|
|
|
|
#define DEBUG 0
|
|
#define OUTPUT_MEM_ADDR2_SIZE 0x100000 /* 1MB, for DME parallel buffer */
|
|
#define OUTPUT_MEM_ADDR3_SIZE 0x5000 /* for MBSSD anchor data */
|
|
|
|
#define FLAG_KMDW_MODEL_ABORT BIT(29) // Event flag to notify abort
|
|
#define FLAG_KMDW_MODEL_FROM_NCPU BIT(30) // Event flag to know NCPU is done
|
|
#define FLAG_KMDW_MODEL_FROM_NPU BIT(28) // Event flag to know NPU is done
|
|
|
|
#define MODEL_INF_TIMEOUT (2000) // timeout milli-secs for waiting npcu response
|
|
|
|
#define KDP_FLASH_FW_INFO_SIZE 0x1000
|
|
|
|
#ifdef EMBED_CMP_NPU
|
|
/* the following is for specific dense model wt/cmd mem modification */
|
|
/*================================================*/
|
|
#define WT_DATA_SIZE_BYTE 272
|
|
|
|
#define CONF_QUEUE 0
|
|
#define GETW_QUEUE 3
|
|
|
|
#define CONF_GETW0_CMD_OFFSET 0x0038
|
|
#define CONF_WDMA0_DST0_CMD_OFFSET 0x00f0
|
|
|
|
#define ACL_NPU_GETW0 0x2e
|
|
#define ACH_NPU_GETW0 0x2f
|
|
#define ACL_NPU_WDMA0_DST0 0x36
|
|
#define ACH_NPU_WDMA0_DST0 0x37
|
|
|
|
#define MASK_2 0x0003
|
|
#define MASK_10 0x03FF
|
|
#define MASK_16 0x00FFFF
|
|
|
|
#define VAL_ACL(x) (((x)&0xffff))
|
|
#define VAL_ACH(x) (((x) >> 16) & 0xffff)
|
|
|
|
#define SetBitsVal(tgt, val, mask, offset) \
|
|
((tgt) &= ~((mask) << (offset))); \
|
|
((tgt) |= (((val) & (mask)) << (offset)))
|
|
|
|
/*================================================*/
|
|
#endif // EMBED_CMP_NPU
|
|
|
|
|
|
extern const struct s_kdp_memxfer kdp_memxfer_module;
|
|
|
|
/* Type of Operations */
|
|
enum {
|
|
NODE_TYPE_IN,
|
|
NODE_TYPE_CPU,
|
|
NODE_TYPE_OUTPUT,
|
|
NODE_TYPE_DATA,
|
|
NODE_TYPE_SUPER,
|
|
NODE_TYPE_INPUT
|
|
};
|
|
|
|
/* Structures of Data Nodes */
|
|
struct super_node_s {
|
|
uint32_t node_id;
|
|
uint32_t addr;
|
|
uint32_t row_start;
|
|
uint32_t col_start;
|
|
uint32_t ch_start;
|
|
uint32_t row_length;
|
|
uint32_t col_length;
|
|
uint32_t ch_length;
|
|
};
|
|
|
|
struct data_node_s {
|
|
uint32_t node_id;
|
|
uint32_t supernum;
|
|
uint32_t data_format;
|
|
uint32_t data_radix;
|
|
uint32_t data_scale;
|
|
uint32_t row_start;
|
|
uint32_t col_start;
|
|
uint32_t ch_start;
|
|
uint32_t row_length;
|
|
uint32_t col_length;
|
|
uint32_t ch_length;
|
|
struct super_node_s node_list[1];
|
|
};
|
|
|
|
/* Structure of Input Operation */
|
|
struct in_node_s {
|
|
uint32_t node_id;
|
|
uint32_t next_npu;
|
|
};
|
|
|
|
/* Structure of Output Operation */
|
|
struct out_node_s {
|
|
uint32_t node_id;
|
|
uint32_t supernum;
|
|
uint32_t data_format;
|
|
uint32_t row_start;
|
|
uint32_t col_start;
|
|
uint32_t ch_start;
|
|
uint32_t row_length;
|
|
uint32_t col_length;
|
|
uint32_t ch_length;
|
|
uint32_t output_index;
|
|
uint32_t output_radix;
|
|
uint32_t output_scale;
|
|
struct super_node_s node_list[1];
|
|
};
|
|
|
|
/* Structure of CPU Operation */
|
|
struct cpu_node_s {
|
|
uint32_t node_id;
|
|
uint32_t input_datanode_num;
|
|
uint32_t op_type;
|
|
/* There will be more parameter here for cpu operation */
|
|
uint32_t in_num_row;
|
|
uint32_t in_num_col;
|
|
uint32_t in_num_ch;
|
|
uint32_t out_num_row;
|
|
uint32_t out_num_col;
|
|
uint32_t out_num_ch;
|
|
uint32_t h_pad;
|
|
uint32_t w_pad;
|
|
uint32_t kernel_h;
|
|
uint32_t kernel_w;
|
|
uint32_t stride_h;
|
|
uint32_t stride_w;
|
|
struct data_node_s output_datanode;
|
|
struct data_node_s input_datanode[1];
|
|
};
|
|
|
|
/* Structure of CNN Header in setup.bin */
|
|
struct cnn_header_s {
|
|
uint32_t crc;
|
|
uint32_t version;
|
|
uint32_t key_offset;
|
|
uint32_t model_type;
|
|
uint32_t app_type;
|
|
uint32_t dram_start;
|
|
uint32_t dram_size;
|
|
uint32_t input_row;
|
|
uint32_t input_col;
|
|
uint32_t input_channel;
|
|
uint32_t cmd_start;
|
|
uint32_t cmd_size;
|
|
uint32_t weight_start;
|
|
uint32_t weight_size;
|
|
uint32_t input_start;
|
|
uint32_t input_size;
|
|
uint32_t input_radix;
|
|
uint32_t output_nums;
|
|
};
|
|
|
|
typedef struct {
|
|
uint32_t n_model_source; // 0: not set, 1: from flash, 2: from ddr
|
|
uint32_t n_model_count; // model count
|
|
struct kdp_model_s p_model_info[KMDW_MODEL_MAX_MODEL_COUNT]; // save model info generated by compiler
|
|
uint8_t pn_is_model_loaded_table[KMDW_MODEL_MAX_MODEL_COUNT]; // flag table to indicate if model is loaded
|
|
uint32_t n_ddr_addr_model_end; // DDR address of model end = user data start
|
|
|
|
int32_t n_model_slot_index; // scpu_to_ncpu->model_slot_index
|
|
} kmdw_model_data_t;
|
|
|
|
static kmdw_model_data_t s_model_data = {0};
|
|
|
|
typedef struct {
|
|
int32_t raw_img_idx;
|
|
osEventFlagsId_t evt_caller; // event to know/control ncpu
|
|
uint32_t caller_e;
|
|
osEventFlagsId_t evt_result; // event to know/control npu
|
|
uint32_t result_e;
|
|
} kmdw_img_data_t;
|
|
|
|
// ptr to the buf for uploaded fw info from host
|
|
static kmdw_model_fw_info_t *s_fw_info_buf_p = NULL;
|
|
// ptr to the buf for uploaded fw info from flash
|
|
static bool s_model_loaded_from_flash = false;
|
|
|
|
static kmdw_img_data_t s_img_data[IPC_IMAGE_ACTIVE_MAX] = {0};
|
|
static int32_t s_current_ipc_idx = 0;
|
|
static int32_t s_next_ipc_idx = 0;
|
|
|
|
static bool ModelFromDDR = false; // check model is from flash : false, ddr : true
|
|
|
|
/* ############################
|
|
* ## Static Functions ##
|
|
* ############################ */
|
|
/**
|
|
* @brief init ddr space for s_fw_info_buf_p
|
|
*
|
|
*/
|
|
static void _init_fw_info_buf(void)
|
|
{
|
|
if (NULL == s_fw_info_buf_p) {
|
|
s_fw_info_buf_p = (kmdw_model_fw_info_t*)kmdw_ddr_reserve(KDP_FLASH_FW_INFO_SIZE);
|
|
|
|
if (NULL == s_fw_info_buf_p)
|
|
critical_msg("insufficent memory for reading fw_info from flash\n");
|
|
}
|
|
}
|
|
|
|
|
|
#ifdef EMBED_CMP_NPU
|
|
static inline void udt_conf_cmd(void *cmd_addr, int reg_idx, uint16_t val16b, int queue)
|
|
{
|
|
uint32_t val = 0x80000000;
|
|
|
|
SetBitsVal(val, queue, MASK_2, 26);
|
|
SetBitsVal(val, reg_idx, MASK_10, 16);
|
|
SetBitsVal(val, val16b, MASK_16, 0);
|
|
|
|
memcpy(cmd_addr, &val, sizeof(val));
|
|
}
|
|
|
|
static void udt_npu_model_mem(uint32_t wt_addr, uint32_t out_addr, void *cmd_addr)
|
|
{
|
|
uint32_t *dst = (uint32_t *)((char *)cmd_addr + CONF_GETW0_CMD_OFFSET);
|
|
udt_conf_cmd(dst, ACL_NPU_GETW0, VAL_ACL(wt_addr), GETW_QUEUE);
|
|
udt_conf_cmd(dst + 1, ACH_NPU_GETW0, VAL_ACH(wt_addr), GETW_QUEUE);
|
|
|
|
dst = (uint32_t *)((char *)cmd_addr + CONF_WDMA0_DST0_CMD_OFFSET);
|
|
udt_conf_cmd(dst, ACL_NPU_WDMA0_DST0, VAL_ACL(out_addr), CONF_QUEUE);
|
|
udt_conf_cmd(dst + 1, ACH_NPU_WDMA0_DST0, VAL_ACH(out_addr), CONF_QUEUE);
|
|
}
|
|
#endif // EMBED_CMP_NPU
|
|
|
|
/**
|
|
* @brief load fw info from flash
|
|
* @return 0: OK, -1: fail
|
|
* @note NULL means failed; non-zero ptr means OK
|
|
*/
|
|
static kmdw_model_fw_info_t* _load_flash_model_info(void)
|
|
{
|
|
//load model from flash once and reuse loaded data, until reload
|
|
if (false == s_model_loaded_from_flash ) {
|
|
s_model_loaded_from_flash = true;
|
|
kdp_memxfer_module.flash_to_ddr((uint32_t)s_fw_info_buf_p, FLASH_MODEL_FW_INFO_ADDR, KDP_FLASH_FW_INFO_SIZE);
|
|
}
|
|
|
|
return s_fw_info_buf_p;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief reset s_model_data
|
|
*/
|
|
static void _reset_model_data(void)
|
|
{
|
|
s_model_data.n_model_count = 0;
|
|
s_model_data.n_model_source = 0;
|
|
memset( s_model_data.p_model_info, 0, sizeof(s_model_data.p_model_info));
|
|
memset( s_model_data.pn_is_model_loaded_table, 0 , sizeof(s_model_data.pn_is_model_loaded_table));
|
|
|
|
//can't reset the following variable which maintains DDR boundary for model
|
|
//n_last_model_space_end_addr
|
|
return;
|
|
}
|
|
|
|
/**
|
|
* @brief check flash read with timeout_ms
|
|
* @param timeout_ms timeout in ms
|
|
* @return flash ready ready time in ms
|
|
* -1 means timeout hit
|
|
*/
|
|
//static int32_t _flash_wait_ready(int timeout_ms)
|
|
//{
|
|
// kdev_flash_status_t flash_status;
|
|
// int i;
|
|
|
|
// for (i = 0; i < timeout_ms; i++) {
|
|
// flash_status = kdev_flash_get_status();
|
|
// if (flash_status.busy == 0) break;
|
|
// kdrv_delay_us(1*1000);
|
|
// }
|
|
// if (i == timeout_ms) i = -1; // we have timed out
|
|
// return i;
|
|
//}
|
|
|
|
/**
|
|
* @brief convert modeltype to modelInfo array index
|
|
* @param model_type_p: model type (defined in model_type.h)
|
|
* @return modelInfo model index (starts from 0)
|
|
* -1 means not such modeltype in flash
|
|
*/
|
|
static int8_t _get_model_info_array_index_by_model_type(uint32_t model_type_p)
|
|
{
|
|
int i;
|
|
for(i=0 ; i < s_model_data.n_model_count; i++) {
|
|
if(s_model_data.p_model_info[i].model_type == model_type_p)
|
|
return i;
|
|
}
|
|
|
|
return -1;
|
|
}
|
|
|
|
/**
|
|
* @brief get fw info extension data from fw_info ptr
|
|
* @param[in] fw_info_p the ptr to fw_info
|
|
* @return the ptr to fw_info_ext
|
|
*/
|
|
static kmdw_model_fw_info_ext_t*
|
|
_get_fw_info_ext_by_fw_info(kmdw_model_fw_info_t* fw_info_p)
|
|
{
|
|
if(NULL == fw_info_p)
|
|
return NULL;
|
|
else {
|
|
kmdw_model_fw_info_ext_t* ret = NULL;
|
|
uint32_t count;
|
|
uint32_t offset;
|
|
|
|
count = fw_info_p->model_count;
|
|
offset = sizeof(struct kdp_model_s) * count;
|
|
ret = (kmdw_model_fw_info_ext_t *)((uint32_t)fw_info_p->models + offset);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* @brief load model information generated by compiler
|
|
* @param [in] is_model_from_ddr: if model is from ddr/host command
|
|
* @param [in] is_reload : is force reload
|
|
* @return model count
|
|
* 0 means no model is loaded in this call
|
|
*/
|
|
static int32_t _load_model_info(bool from_ddr, bool reload)
|
|
{
|
|
if (s_model_data.n_model_count && !reload) {
|
|
return s_model_data.n_model_count;
|
|
}
|
|
|
|
if (reload) {
|
|
_reset_model_data();
|
|
s_model_loaded_from_flash = false;
|
|
}
|
|
|
|
kmdw_model_fw_info_t *model_info_p = NULL;
|
|
kmdw_model_fw_info_ext_t *model_info2_p = NULL;
|
|
|
|
// load model Info
|
|
if (from_ddr) {
|
|
model_info_p = s_fw_info_buf_p;
|
|
model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
|
|
|
|
if((NULL == model_info_p) || (NULL == model_info2_p) ) {
|
|
s_model_data.n_model_count = 0;
|
|
return 0;
|
|
}
|
|
|
|
// Use the version number for new fw_info structure. Model number is in use for dynamic model execution (DME)
|
|
//if (*(uint32_t*)(base_addr + 8) == 0) {
|
|
// return 0; //error, model_info is not ready
|
|
//}
|
|
|
|
// get model count
|
|
s_model_data.n_model_count = model_info_p->model_count;
|
|
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
|
|
|
|
if(0 == s_model_data.n_model_count) {
|
|
info_msg("[info] model is not in DDR!!\n");
|
|
return 0;
|
|
} else if (s_model_data.n_model_count > KMDW_MODEL_MAX_MODEL_COUNT) {
|
|
info_msg("[ERR] model count is over MAX limit=%d!!\n", KMDW_MODEL_MAX_MODEL_COUNT);
|
|
s_model_data.n_model_count = 0;
|
|
return 0;
|
|
} else {
|
|
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
|
|
}
|
|
|
|
// get model info
|
|
memcpy(s_model_data.p_model_info, (const void*)model_info_p->models,
|
|
sizeof(struct kdp_model_s)*s_model_data.n_model_count);
|
|
|
|
// get ddr model end addr
|
|
s_model_data.n_ddr_addr_model_end = model_info2_p->model_dram_addr_end;
|
|
if (s_model_data.n_ddr_addr_model_end >= kmdw_ddr_get_heap_tail()) {
|
|
err_msg("modelInfo: DDR end address: 0x%x over (>=) boundary 0x%x\n", s_model_data.n_ddr_addr_model_end, kmdw_ddr_get_heap_tail());
|
|
return 0;
|
|
} else {
|
|
dbg_msg("modelInfo: DDR end address: 0x%x\n", s_model_data.n_ddr_addr_model_end);
|
|
}
|
|
|
|
// set model source
|
|
s_model_data.n_model_source = 2; // from ddr
|
|
|
|
} else { // models are stored in flash
|
|
|
|
model_info_p = _load_flash_model_info(); // this function updates data on s_fw_info_buf_p
|
|
model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
|
|
|
|
if((NULL == model_info_p) || (NULL == model_info2_p) ) {
|
|
s_model_data.n_model_count = 0;
|
|
return 0;
|
|
}
|
|
|
|
// get model count
|
|
s_model_data.n_model_count = model_info_p->model_count;
|
|
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
|
|
|
|
if (s_model_data.n_model_count == 0xFFFFFFFF) {
|
|
err_msg("[info] model is not in flash!!\n");
|
|
s_model_data.n_model_count = 0;
|
|
return 0;
|
|
} else if (s_model_data.n_model_count > KMDW_MODEL_MAX_MODEL_COUNT) {
|
|
info_msg("[ERR] model count is over MAX limit=%d!!\n", KMDW_MODEL_MAX_MODEL_COUNT);
|
|
s_model_data.n_model_count = 0;
|
|
return 0;
|
|
} else {
|
|
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
|
|
}
|
|
|
|
// get model info
|
|
//FIXME, why need to clone to s_fw_info_buf_p
|
|
//memcpy(s_fw_info_buf_p, (void *)model_info_p, KDP_FLASH_FW_INFO_SIZE);
|
|
|
|
memcpy(s_model_data.p_model_info, model_info_p->models, sizeof(struct kdp_model_s)*s_model_data.n_model_count);
|
|
|
|
// get ddr model end addr
|
|
s_model_data.n_ddr_addr_model_end = model_info2_p->model_dram_addr_end;
|
|
|
|
if (s_model_data.n_ddr_addr_model_end >= kmdw_ddr_get_heap_tail()) {
|
|
err_msg("modelInfo: DDR end address: 0x%x over (>=) boundary 0x%x\n", s_model_data.n_ddr_addr_model_end, kmdw_ddr_get_heap_tail());
|
|
return 0;
|
|
} else {
|
|
dbg_msg("modelInfo: DDR end address: 0x%x\n", s_model_data.n_ddr_addr_model_end);
|
|
}
|
|
|
|
// set model source
|
|
s_model_data.n_model_source = 1; // from flash
|
|
}
|
|
|
|
// for support of dynamic model execution
|
|
*(uint32_t*)(((char*)s_fw_info_buf_p) + 8) = 0; //trick: we will check the work to see if model_info is uploaded
|
|
|
|
return s_model_data.n_model_count;
|
|
}
|
|
|
|
/**
|
|
* @brief load specific model by model info index (the order in flash)
|
|
* @param model_index_p: model info index
|
|
* @return 0: model not ready, 1: model is loaded
|
|
*/
|
|
static int32_t _load_model(uint8_t model_index_p/*starts from 0*/)
|
|
{
|
|
uint32_t ddr_addr_models_head; //start point = the 1st model's cmd.bin
|
|
uint32_t ddr_addr_offset;
|
|
uint32_t flash_addr;
|
|
uint32_t len_to_load;
|
|
|
|
struct kdp_model_s *p_model;
|
|
|
|
if(s_model_data.n_model_count == 0)
|
|
return 0; // model info is not ready
|
|
|
|
if(s_model_data.pn_is_model_loaded_table[model_index_p] == 1 )
|
|
return 1; //model has been loaded
|
|
else
|
|
s_model_data.pn_is_model_loaded_table[model_index_p] = 1;
|
|
|
|
//load model with (index=model_index_p) from flash to DDR
|
|
ddr_addr_models_head = s_model_data.p_model_info[0].cmd_mem_addr; //start point = the 1st model's cmd.bin
|
|
|
|
//load cmd + weight + setup together
|
|
p_model = &(s_model_data.p_model_info[model_index_p]);
|
|
ddr_addr_offset = p_model->cmd_mem_addr - ddr_addr_models_head;
|
|
|
|
flash_addr = FLASH_MODEL_ALL_ADDR + ddr_addr_offset;
|
|
|
|
len_to_load = ALIGN16(p_model->cmd_mem_len) +
|
|
ALIGN16(p_model->weight_mem_len) +
|
|
ALIGN16(p_model->setup_mem_len);
|
|
|
|
//model from flash to ddr
|
|
kdp_memxfer_module.flash_to_ddr(p_model->cmd_mem_addr, flash_addr, len_to_load);
|
|
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* @brief prepare ouptut_mem_addr2 for ncpu/npu parallel mode inference
|
|
*
|
|
* @return 0:OK, -1:Fail
|
|
*/
|
|
static int32_t _prepare_output_mem_addr2(void)
|
|
{
|
|
/* Allocate parallel output buffer , if caller not provide buf*/
|
|
struct scpu_to_ncpu_s* comm_out = kmdw_ipc_get_output();
|
|
uint32_t addr_parallel = comm_out->output_mem_addr2;
|
|
|
|
if (addr_parallel == 0) {
|
|
//TODO, dynamic allocate memory for output_mem_addr2
|
|
//uint32_t addr1 = comm_out->models[model_idx].output_mem_addr;
|
|
//uint32_t addr2 = comm_out->models[model_idx].buf_addr;
|
|
//uint32_t len = comm_out->models[model_idx].output_mem_len;
|
|
//if (addr1 == addr2) {
|
|
// // Old memory layout, use working buffer length
|
|
// len = comm_out->models[model_idx].buf_len;
|
|
//}
|
|
|
|
// reserve more space for larger model output (ty_608x608 need 620160)
|
|
uint32_t len = OUTPUT_MEM_ADDR2_SIZE;
|
|
|
|
addr_parallel = kmdw_ddr_reserve(len);
|
|
if (addr_parallel == 0) {
|
|
err_msg("Error ddr allocation ncpu/npu parallel buffer, len %d\n", len);
|
|
return -1; //error
|
|
}
|
|
comm_out->output_mem_addr2 = addr_parallel;
|
|
comm_out->output_mem_len2 = len;
|
|
|
|
dbg_msg("allocated Parallel buffer: len %d, addr 0x%x", len, addr_parallel);
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
#ifdef KL520
|
|
/**
|
|
* @brief prepare ouptut_mem_addr3 for MBSSD network
|
|
*
|
|
* @param [in] model_type model id
|
|
* @return 0:OK, -1:Fail
|
|
*/
|
|
static int32_t _prepare_output_mem_addr3(uint32_t model_type)
|
|
{
|
|
|
|
if (model_type == KNERON_FD_MBSSD_200_200_3 ||
|
|
model_type == KNERON_FD_MASK_MBSSD_200_200_3 ||
|
|
model_type == KNERON_OD_MBSSD ||
|
|
model_type == KNERON_PD_MBSSD ||
|
|
model_type == KNERON_CAR_DETECTION_MBSSD_224_416_3) {
|
|
|
|
uint32_t *pMemAddr3;
|
|
uint32_t len = OUTPUT_MEM_ADDR3_SIZE;
|
|
struct scpu_to_ncpu_s* comm_out = kmdw_ipc_get_output();
|
|
|
|
switch (model_type) {
|
|
case KNERON_FD_MBSSD_200_200_3 :
|
|
case KNERON_FD_MASK_MBSSD_200_200_3 :
|
|
{
|
|
static uint32_t mem_addr3_fdssd = 0;
|
|
pMemAddr3 = &mem_addr3_fdssd;
|
|
break;
|
|
}
|
|
case KNERON_OD_MBSSD :
|
|
{
|
|
static uint32_t mem_addr3_odssd = 0;
|
|
pMemAddr3 = &mem_addr3_odssd;
|
|
break;
|
|
}
|
|
case KNERON_PD_MBSSD :
|
|
{
|
|
static uint32_t mem_addr3_pdssd = 0;
|
|
pMemAddr3 = &mem_addr3_pdssd;
|
|
break;
|
|
}
|
|
case KNERON_CAR_DETECTION_MBSSD_224_416_3 :
|
|
{
|
|
static uint32_t mem_addr3_vdssd = 0;
|
|
pMemAddr3 = &mem_addr3_vdssd;
|
|
break;
|
|
}
|
|
default :
|
|
break;
|
|
}
|
|
if (*pMemAddr3 == 0) {
|
|
*pMemAddr3 = kmdw_ddr_reserve(len*sizeof(uint32_t));
|
|
if (*pMemAddr3 == 0) {
|
|
err_msg("Error ddr allocation fail for MBSSD network, mem_addr3 len %d\n", len);
|
|
return -1; //error
|
|
}
|
|
*(uint32_t*)(*pMemAddr3) = 0;
|
|
}
|
|
comm_out->output_mem_addr3 = *pMemAddr3;
|
|
}
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
/**
|
|
* @brief specify model information, load model info, load model
|
|
* @param [in] model_type_p: model unique ID defined by Kneron
|
|
* @param [in] model_from_ddr: is model from ddr or host command
|
|
* @return model_slot_index(requested by NCPU/NPU)
|
|
* -1 : model not found
|
|
*/
|
|
static int32_t _config_model(uint32_t model_type, bool model_from_ddr)
|
|
{
|
|
int model_info_idx; //limitation (hard coded in flash)
|
|
int model_idx;
|
|
|
|
//check if model info is loaded
|
|
if( 0 == _load_model_info(model_from_ddr, false/*reload*/)) {
|
|
return -1;
|
|
}
|
|
|
|
if( model_from_ddr == 0 ) {
|
|
//FIXME, should remove application related code
|
|
/* Special model not in DDR but in ncpu */
|
|
if (model_type == KNERON_2D_LIVENESS_224_224_3) {
|
|
model_idx = 3;
|
|
model_info_idx = 4;
|
|
|
|
goto model_common;
|
|
}
|
|
|
|
model_info_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if(model_info_idx == -1) {
|
|
err_msg("[ERR] model_type[%d] is not found in flash\n", model_type);
|
|
return -1;
|
|
}
|
|
_load_model(model_info_idx);
|
|
|
|
// FIXME: need to remove the following hard code
|
|
model_idx = model_info_idx;
|
|
|
|
} else {
|
|
model_info_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if(model_info_idx == -1) {
|
|
err_msg("[ERR] model_type[%d] is not found in DDR\n", model_type);
|
|
return -1;
|
|
}
|
|
model_idx = model_info_idx;
|
|
}
|
|
|
|
model_common:
|
|
s_model_data.n_model_slot_index = model_idx;
|
|
|
|
kmdw_ipc_set_model(s_model_data.p_model_info, model_info_idx, model_idx);
|
|
|
|
|
|
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(s_img_data[s_current_ipc_idx].raw_img_idx);
|
|
|
|
if (raw_img->inf_format & IMAGE_FORMAT_PARALLEL_PROC) {
|
|
if (-1 == _prepare_output_mem_addr2() ) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
#ifdef KL520
|
|
if ( -1 == _prepare_output_mem_addr3(model_type) ) {
|
|
return -1;
|
|
}
|
|
|
|
#else
|
|
struct scpu_to_ncpu_s* p_comm_out = kmdw_ipc_get_output();
|
|
|
|
if (NULL == p_comm_out->output_mem_addr3) {
|
|
uint32_t len = 0x5000;
|
|
p_comm_out->output_mem_addr3 = kmdw_ddr_reserve(len*sizeof(uint32_t));
|
|
if(NULL == p_comm_out->output_mem_addr3) {
|
|
critical_msg("kmdw_model: failed to malloc comm_out->output_mem_addr3\n");
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
if (NULL == p_comm_out->output_mem_addr4) {
|
|
uint32_t len = 8 * (1 << 20);
|
|
p_comm_out->output_mem_addr4 = kmdw_ddr_reserve(len);
|
|
if (NULL == p_comm_out->output_mem_addr4) {
|
|
critical_msg("kmdw_model: failed to malloc comm_out->output_mem_addr4\n");
|
|
return -1;
|
|
}
|
|
}
|
|
#endif
|
|
|
|
kmdw_ipc_set_model_active(model_idx);
|
|
|
|
return model_idx;
|
|
}
|
|
|
|
|
|
/**
|
|
* @brief run model according to config settings
|
|
* @return status defined in NCPU
|
|
* @note !!! must be called after kapp_config_model_image()
|
|
*/
|
|
static int32_t _run_model(void)
|
|
{
|
|
int active_idx = s_current_ipc_idx;
|
|
int raw_img_idx = s_img_data[active_idx].raw_img_idx;
|
|
struct kdp_img_raw_s *p_raw_image = kmdw_model_get_raw_img(raw_img_idx);
|
|
uint32_t flags, wait_evt;
|
|
uint32_t is_abort = 0;
|
|
|
|
// Start time for ncpu/npu round trip
|
|
p_raw_image->tick_start = osKernelGetTickCount();
|
|
|
|
if (s_img_data[active_idx].evt_caller == NULL)
|
|
s_img_data[active_idx].evt_caller = osEventFlagsNew(0);
|
|
|
|
if(!s_img_data[active_idx].evt_caller)
|
|
err_msg("<Run-Model> active_idx=%d, osEventFlagsNew evt_caller failure\n",active_idx);
|
|
|
|
// set notify for job done
|
|
if (s_img_data[active_idx].evt_result) {
|
|
/* Result event already set. Let's do local event for parallel. */
|
|
wait_evt = FLAG_KMDW_MODEL_FROM_NPU;
|
|
} else {
|
|
wait_evt = FLAG_KMDW_MODEL_FROM_NCPU;
|
|
}
|
|
|
|
dbg_msg("<Run-Model> wait %d[%d] evt %x\n", raw_img_idx, active_idx, wait_evt);
|
|
|
|
//assign caller event before triggering ncpu/npu
|
|
s_img_data[active_idx].caller_e = wait_evt;
|
|
|
|
//trigger ncpu/npu
|
|
kmdw_ipc_trigger_int(CMD_RUN_NPU);
|
|
|
|
//check abort signal
|
|
flags = osEventFlagsWait(s_img_data[active_idx].evt_caller,
|
|
FLAG_KMDW_MODEL_ABORT,
|
|
osFlagsWaitAll, 0);
|
|
if( flags != osFlagsErrorResource ) {
|
|
osEventFlagsClear(s_img_data[active_idx].evt_caller, FLAG_KMDW_MODEL_ABORT);
|
|
is_abort = 1;
|
|
}
|
|
|
|
uint32_t wait_timeout = (kmdw_ipc_get_output()->kp_dbg_checkpoinots == 0x0) ? MODEL_INF_TIMEOUT : osWaitForever;
|
|
|
|
//wait for finish of current task
|
|
flags = osEventFlagsWait(s_img_data[active_idx].evt_caller,
|
|
wait_evt,
|
|
osFlagsNoClear, wait_timeout);
|
|
|
|
if(flags == osFlagsErrorTimeout){
|
|
err_msg("[%s] osEventFlagsWait flag 0x%08x timeout\n", __FUNCTION__, wait_evt);
|
|
return IMAGE_STATE_TIMEOUT;
|
|
} else if (flags != wait_evt)
|
|
dbg_msg("[%s] 1+ events 0x%08x (%d[%d] expected)\n", __FUNCTION__, flags, wait_evt, active_idx);
|
|
else
|
|
dbg_msg("[DBG][%s] got: raw_img_idx[active_idx]=%d[%d]\n", __FUNCTION__, raw_img_idx, active_idx);
|
|
|
|
osEventFlagsClear(s_img_data[active_idx].evt_caller, wait_evt);
|
|
|
|
if( 1 == is_abort ) {
|
|
dbg_msg("[DBG][%s] abort after n_model_slot_index = %d\n", __FUNCTION__, s_model_data.n_model_slot_index);
|
|
return KMDW_MODEL_RUN_RC_ABORT; //abort
|
|
}
|
|
|
|
return kmdw_ipc_get_input()->result.postproc.img_result.status;
|
|
}
|
|
|
|
__weak osStatus_t kmdw_fifoq_manager_result_enqueue(void *result_buf, int buf_size, bool preempt)
|
|
{
|
|
return osOK;
|
|
}
|
|
|
|
static void _ipc_handler(struct kdp_img_raw_s *p_raw_image, int state)
|
|
{
|
|
int ipc_idx;
|
|
|
|
if(state == 0x999) // FIXME, very workaround
|
|
{
|
|
kmdw_ipc_get_input()->kp_dbg_status = 0x0;
|
|
osStatus_t sts = kmdw_fifoq_manager_result_enqueue(kmdw_ipc_get_output()->kp_dbg_buffer, 0, false);
|
|
if(sts != osOK)
|
|
kmdw_printf("send dbg data failed in ipc, err %d\n", sts);
|
|
}
|
|
else if (state == IMAGE_STATE_RECEIVING) {
|
|
ipc_idx = p_raw_image->ref_idx;
|
|
|
|
// End time for ncpu/npu round trip
|
|
p_raw_image->tick_end = osKernelGetSysTimerCount();
|
|
|
|
if (s_img_data[ipc_idx].evt_result) {
|
|
dbg_msg("[done: post: P] ipc_idx: %d, result_e: %d (ram %x)\n", ipc_idx, s_img_data[ipc_idx].result_e, p_raw_image);
|
|
osEventFlagsSet(s_img_data[ipc_idx].evt_result, s_img_data[ipc_idx].result_e);
|
|
} else {
|
|
dbg_msg("[done: post: S] ipc_idx: %d, caller_e: %x.\n", ipc_idx, s_img_data[ipc_idx].caller_e);
|
|
osEventFlagsSet(s_img_data[ipc_idx].evt_caller, s_img_data[ipc_idx].caller_e);
|
|
}
|
|
} else if (state == IMAGE_STATE_ACTIVE){
|
|
ipc_idx = s_current_ipc_idx;
|
|
dbg_msg("[done: npu: P] ipc_idx: %d, caller_e: %x\n", ipc_idx, s_img_data[ipc_idx].caller_e);
|
|
osEventFlagsSet(s_img_data[ipc_idx].evt_caller, s_img_data[ipc_idx].caller_e);
|
|
} else {
|
|
err_msg("[ERR] wrong state: %d (ipc_idx %d)\n", state, ipc_idx);
|
|
}
|
|
}
|
|
|
|
/* ############################
|
|
* ## Public Functions ##
|
|
* ############################ */
|
|
|
|
void kmdw_model_init(void)
|
|
{
|
|
kmdw_ipc_initialize(_ipc_handler);
|
|
|
|
_init_fw_info_buf();
|
|
s_fw_info_buf_p->model_count = 0;
|
|
}
|
|
|
|
int32_t kmdw_model_load_model(int8_t model_info_index_p)
|
|
{
|
|
int32_t ret = 0;
|
|
|
|
if(1 != s_model_data.n_model_source || // check if s_model_data is not according to flash
|
|
0 == s_model_data.n_model_count) {
|
|
if(0 == _load_model_info(false/*from ddr*/, true/*reload*/))
|
|
return 0; //error, no model is loaded
|
|
}
|
|
|
|
// load all models
|
|
if (KMDW_MODEL_ALL_MODELS == model_info_index_p) {
|
|
uint8_t i;
|
|
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
|
|
ret = _load_model(i);
|
|
if( 0 == ret) {
|
|
err_msg("[ERR] %s : failed to load model array index:%d\n", __FUNCTION__, i);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
// Very slow if turn it on. Maybe hardware support is needed.
|
|
// Add a new compiler directive if CRC32 method is also used in other scenarios (ex: check FW image)
|
|
#if ENABLE_CRC32
|
|
// check CRC value of all_models.bin
|
|
kmdw_model_fw_info_t *model_info_p = _load_flash_model_info();
|
|
kmdw_model_fw_info_ext_t *model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
|
|
|
|
// cmd_mem_addr of first model is the start address of all_models.bin
|
|
uint8_t *addr = (uint8_t *)s_model_data.p_model_info[0].cmd_mem_addr;
|
|
|
|
uint32_t crc32 = kmdw_utils_crc_gen_crc32(addr, model_info2_p->model_total_size);
|
|
|
|
dbg_msg("[%s] crc32 calculated: 0x%x\n", __FUNCTION__, crc32);
|
|
dbg_msg("[%s] crc32 read from flash: 0x%x\n", __FUNCTION__, model_info2_p->model_checksum);
|
|
dbg_msg("[%s] model start address: 0x%x\n", __FUNCTION__, s_model_data.p_model_info[0].cmd_mem_addr);
|
|
dbg_msg("[%s] model total size: %d\n", __FUNCTION__, model_info2_p->model_total_size);
|
|
|
|
if (crc32 != model_info2_p->model_checksum)
|
|
{
|
|
err_msg("[ERR] %s: all models.bin CRC check failed\n", __FUNCTION__);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
return s_model_data.n_model_count;
|
|
} else { // load specific model
|
|
ret = _load_model(model_info_index_p);
|
|
return ret;
|
|
}
|
|
}
|
|
|
|
int32_t kmdw_model_reload_model_info(bool from_ddr)
|
|
{
|
|
return _load_model_info(from_ddr, true/*reload*/);
|
|
}
|
|
|
|
int32_t kmdw_model_refresh_models(void) // reload all the models from flash again
|
|
{
|
|
uint8_t i;
|
|
|
|
// forcedly update s_model_data which might be poluted by model upload from host
|
|
if(0 == _load_model_info(false/*from ddr*/, true/*reload*/))
|
|
return 0; //error, no model is loaded
|
|
|
|
int ret;
|
|
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
|
|
if (s_model_data.pn_is_model_loaded_table[i]) { // if previously loaded
|
|
s_model_data.pn_is_model_loaded_table[i] = 0;
|
|
ret = _load_model(i); // reload the model again
|
|
if ( 0 == ret) {
|
|
err_msg("[ERR] %s : failed to load model array index:%d\n", __FUNCTION__, i);
|
|
return 0;
|
|
}
|
|
}
|
|
}
|
|
return s_model_data.n_model_count;
|
|
}
|
|
|
|
int32_t kmdw_model_config_result(osEventFlagsId_t result_evt, uint32_t result_evt_flag)
|
|
{
|
|
int active_idx = s_current_ipc_idx;
|
|
|
|
s_img_data[active_idx].evt_result = result_evt;
|
|
s_img_data[active_idx].result_e = result_evt_flag;
|
|
|
|
return 0;
|
|
}
|
|
|
|
void kmdw_model_config_img(struct kdp_img_cfg *img_cfg, void *ext_param)
|
|
{
|
|
int act_img_idx = img_cfg->image_buf_active_index;
|
|
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(act_img_idx);
|
|
|
|
s_current_ipc_idx = s_next_ipc_idx;
|
|
|
|
if (img_cfg->inf_format & IMAGE_FORMAT_PARALLEL_PROC)
|
|
s_next_ipc_idx = !s_next_ipc_idx;
|
|
|
|
kmdw_ipc_set_image_active(act_img_idx);
|
|
s_img_data[s_current_ipc_idx].raw_img_idx = act_img_idx;
|
|
|
|
raw_img->state = IMAGE_STATE_ACTIVE;
|
|
raw_img->seq_num = act_img_idx;
|
|
|
|
raw_img->ref_idx = s_current_ipc_idx;
|
|
raw_img->num_image = img_cfg->num_image;
|
|
raw_img->inf_format = img_cfg->inf_format;
|
|
|
|
for (int i = 0; i < img_cfg->num_image; i++) {
|
|
raw_img->image_list[i].input_row = img_cfg->image_list[i].input_row;
|
|
raw_img->image_list[i].input_col = img_cfg->image_list[i].input_col;
|
|
raw_img->image_list[i].input_channel = img_cfg->image_list[i].input_channel;
|
|
raw_img->image_list[i].format = img_cfg->image_list[i].format;
|
|
raw_img->image_list[i].image_mem_addr = img_cfg->image_list[i].image_mem_addr;
|
|
raw_img->image_list[i].image_mem_len = img_cfg->image_list[i].image_mem_len;
|
|
|
|
memcpy(&(raw_img->image_list[i].params_s), &(img_cfg->image_list[i].params_s), sizeof(parameter_t));
|
|
}
|
|
|
|
if (ext_param == NULL) {
|
|
memset(raw_img->ext_params, 0, MAX_PARAMS_LEN * 4);
|
|
} else {
|
|
memcpy(raw_img->ext_params, ext_param, MAX_PARAMS_LEN * 4);
|
|
}
|
|
}
|
|
|
|
struct kdp_img_raw_s* kmdw_model_get_raw_img(int idx)
|
|
{
|
|
struct scpu_to_ncpu_s *comm_out = kmdw_ipc_get_output();
|
|
return &(comm_out->raw_images[idx]);
|
|
}
|
|
|
|
int kmdw_model_run(const char *tag, void *output, uint32_t model_type, bool model_from_ddr)
|
|
{
|
|
int model_idx = _config_model(model_type, model_from_ddr);
|
|
if (model_idx < 0) {
|
|
return KMDW_MODEL_RUN_RC_ABORT;
|
|
}
|
|
|
|
int img_idx = s_img_data[s_current_ipc_idx].raw_img_idx;
|
|
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(img_idx);
|
|
|
|
raw_img->results[model_idx].result_mem_addr = (uint32_t)output;
|
|
|
|
dbg_msg("[INFO] %s:\n", tag);
|
|
dbg_msg(" model_idx = %d\n", model_idx);
|
|
dbg_msg(" model type = %d\n", model_type);
|
|
dbg_msg(" ref_idx = %d\n", raw_img->ref_idx);
|
|
dbg_msg(" inf_format = 0x%X\n", raw_img->inf_format);
|
|
dbg_msg(" output addr = 0x%x\n", raw_img->results[model_idx].result_mem_addr);
|
|
dbg_msg(" ext_params(first 4)= %d/%d/%d/%d\n", raw_img->ext_params[0], raw_img->ext_params[1],
|
|
raw_img->ext_params[2], raw_img->ext_params[3]);
|
|
|
|
for (int i = 0; i < raw_img->num_image; i++) {
|
|
dbg_msg(" image index: %d\n", i);
|
|
dbg_msg(" (row/col/ch) = %d/%d/%d\n", raw_img->image_list[i].input_row,
|
|
raw_img->image_list[i].input_col,
|
|
raw_img->image_list[i].input_channel);
|
|
dbg_msg(" image format = 0x%x\n", raw_img->image_list[i].format);
|
|
dbg_msg(" crop(tp/bt/lf/rt) = %d/%d/%d/%d\n", raw_img->image_list[i].params_s.crop_top,
|
|
raw_img->image_list[i].params_s.crop_bottom,
|
|
raw_img->image_list[i].params_s.crop_left,
|
|
raw_img->image_list[i].params_s.crop_right);
|
|
dbg_msg(" image addr = 0x%x\n", raw_img->image_list[i].image_mem_addr);
|
|
}
|
|
|
|
return _run_model();
|
|
}
|
|
|
|
void kmdw_model_abort(void)
|
|
{
|
|
int active_idx = s_current_ipc_idx;
|
|
|
|
if( 0 == s_img_data[active_idx].evt_caller)
|
|
return;
|
|
|
|
osEventFlagsSet(s_img_data[active_idx].evt_caller, FLAG_KMDW_MODEL_ABORT);
|
|
}
|
|
|
|
struct kdp_model_s* kmdw_model_get_model_info(int model_idx_p)
|
|
{
|
|
if (s_model_data.n_model_count == 0) {
|
|
return NULL;
|
|
} else if (model_idx_p >= s_model_data.n_model_count) {
|
|
return NULL;
|
|
} else {
|
|
return &(s_model_data.p_model_info[model_idx_p]);
|
|
}
|
|
}
|
|
|
|
void kmdw_model_get_run_time(int img_idx, kmdw_model_run_time_t *run_time/*out*/)
|
|
{
|
|
struct kdp_img_raw_s *p_raw_image;
|
|
|
|
if (run_time == NULL)
|
|
return;
|
|
|
|
p_raw_image = kmdw_model_get_raw_img(img_idx);
|
|
|
|
run_time->round_trip_time = p_raw_image->tick_end - p_raw_image->tick_start;
|
|
run_time->pre_proc_time = p_raw_image->tick_end_pre - p_raw_image->tick_start_pre;
|
|
run_time->npu_proc_time = p_raw_image->tick_end_npu - p_raw_image->tick_start_npu;
|
|
run_time->post_proc_time = p_raw_image->tick_end_post - p_raw_image->tick_start_post;
|
|
}
|
|
|
|
|
|
|
|
int kmdw_model_is_model_loaded(uint32_t model_type)
|
|
{
|
|
if (_get_model_info_array_index_by_model_type(model_type) == -1)
|
|
return 0;
|
|
else
|
|
return 1;
|
|
}
|
|
|
|
uint32_t *kmdw_model_get_all_model_info(bool trust_ddr_data)
|
|
{
|
|
static uint32_t *s_p_model_id_list = NULL; //[model_count, id0, id1, id2 ...]
|
|
|
|
kmdw_model_fw_info_t *fw_info_ptr = NULL;
|
|
|
|
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
|
|
|
|
if (fw_info_ptr) {
|
|
|
|
if (NULL == s_p_model_id_list)
|
|
s_p_model_id_list = (uint32_t *)calloc(1+KMDW_MODEL_MAX_MODEL_COUNT, sizeof(uint32_t));
|
|
|
|
if (NULL == s_p_model_id_list) {
|
|
err_msg("[ERR] insufficent memory for model id list\n");
|
|
} else {
|
|
int i;
|
|
uint32_t model_id;
|
|
|
|
s_p_model_id_list[0] = fw_info_ptr->model_count;
|
|
dbg_msg("%s:\n", __FUNCTION__);
|
|
dbg_msg("Model Count = %d\n", s_p_model_id_list[0]);
|
|
|
|
for (i = 0 ; i < s_p_model_id_list[0]; i++) {
|
|
model_id = fw_info_ptr->models[i].model_type;
|
|
dbg_msg("Extract Model ID %d\n", model_id);
|
|
|
|
s_p_model_id_list[i+1] = model_id;
|
|
}
|
|
}
|
|
return s_p_model_id_list;
|
|
} else {
|
|
return NULL;
|
|
}
|
|
|
|
}
|
|
|
|
uint32_t kmdw_model_get_crc(bool trust_ddr_data)
|
|
{
|
|
uint32_t ret = 0;
|
|
kmdw_model_fw_info_t *fw_info_ptr;
|
|
kmdw_model_fw_info_ext_t *fw_info_ext_ptr;
|
|
|
|
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
|
|
fw_info_ext_ptr = _get_fw_info_ext_by_fw_info(fw_info_ptr);
|
|
|
|
if (fw_info_ext_ptr) {
|
|
ret = fw_info_ext_ptr->model_checksum;
|
|
}
|
|
|
|
dbg_msg("%s = 0x%x\n", __FUNCTION__, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
|
|
kmdw_model_fw_info_t *kmdw_model_get_fw_info(bool trust_ddr_data)
|
|
{
|
|
uint32_t model_cnt;
|
|
kmdw_model_fw_info_t *fw_info_ptr = s_fw_info_buf_p;
|
|
|
|
if (false == trust_ddr_data) {
|
|
if ((0 >= s_model_data.n_model_count) ||
|
|
((1 != s_model_data.n_model_source) && (2 != s_model_data.n_model_source))) {
|
|
fw_info_ptr = NULL;
|
|
} else {
|
|
model_cnt = fw_info_ptr->model_count;
|
|
|
|
if ((0 == model_cnt) || (model_cnt > KMDW_MODEL_MAX_MODEL_COUNT)) {
|
|
fw_info_ptr = NULL;
|
|
}
|
|
}
|
|
}
|
|
|
|
return fw_info_ptr;
|
|
}
|
|
|
|
uint32_t kmdw_model_get_model_end_addr(bool trust_ddr_data)
|
|
{
|
|
uint32_t ret = 0;
|
|
kmdw_model_fw_info_t* fw_info_ptr;
|
|
kmdw_model_fw_info_ext_t* fw_info_ext_ptr = NULL;
|
|
|
|
if (0 != s_model_data.n_ddr_addr_model_end) {
|
|
ret = s_model_data.n_ddr_addr_model_end;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
|
|
fw_info_ext_ptr = _get_fw_info_ext_by_fw_info(fw_info_ptr);
|
|
|
|
if (fw_info_ext_ptr) {
|
|
ret = fw_info_ext_ptr->model_dram_addr_end;
|
|
}
|
|
|
|
FUNC_OUT:
|
|
|
|
dbg_msg("%s = 0x%x\n", __FUNCTION__, ret);
|
|
|
|
return ret;
|
|
}
|
|
|
|
void kmdw_model_set_location(bool model_inddr)
|
|
{
|
|
ModelFromDDR = model_inddr;
|
|
}
|
|
|
|
bool kmdw_model_get_location(void)
|
|
{
|
|
return ModelFromDDR;
|
|
}
|
|
|
|
int kmdw_model_get_input_tensor_num(uint32_t model_type)
|
|
{
|
|
int model_idx = 0;
|
|
|
|
model_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if (model_idx >= 0) {
|
|
/******************************************************************
|
|
* KL520 only support single input model
|
|
******************************************************************/
|
|
return 1;
|
|
} else {
|
|
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int kmdw_model_get_input_tensor_info(uint32_t model_type, uint32_t tensor_idx, kmdw_model_tensor_descriptor_t *tensor_info)
|
|
{
|
|
int ret = 1;
|
|
int model_idx = 0;
|
|
uint32_t p_setup_bin = 0;
|
|
struct cnn_header_s *target_input_node = NULL;
|
|
|
|
if (NULL == tensor_info) {
|
|
err_msg("[%s] NULL tensor_info pointer\n", __FUNCTION__);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
model_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if (model_idx >= 0) {
|
|
struct kdp_model_s *p_model_info = kmdw_model_get_model_info(model_idx);
|
|
|
|
if (NULL != p_model_info) {
|
|
p_setup_bin = p_model_info->setup_mem_addr;
|
|
} else {
|
|
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
|
|
return 0;
|
|
}
|
|
} else {
|
|
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
if (tensor_idx >= 1) {
|
|
err_msg("[%s] tensor index out of range %d\n", __FUNCTION__, tensor_idx);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
target_input_node = (struct cnn_header_s *)p_setup_bin;
|
|
|
|
tensor_info->index = 1;
|
|
tensor_info->shape_npu_len = 4;
|
|
tensor_info->shape_npu[0] = 1;
|
|
tensor_info->shape_npu[1] = target_input_node->input_channel;
|
|
tensor_info->shape_npu[2] = target_input_node->input_row;
|
|
tensor_info->shape_npu[3] = target_input_node->input_col;
|
|
tensor_info->data_layout = DATA_FMT_4W4C8B;
|
|
tensor_info->scale = 1.0;
|
|
tensor_info->radix = target_input_node->input_radix;
|
|
|
|
FUNC_OUT:
|
|
return ret;
|
|
}
|
|
|
|
int kmdw_model_get_output_tensor_num(uint32_t model_type)
|
|
{
|
|
int model_idx = 0;
|
|
struct kdp_model_s *p_model_info = NULL;
|
|
|
|
model_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if (model_idx >= 0) {
|
|
p_model_info = kmdw_model_get_model_info(model_idx);
|
|
} else {
|
|
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
|
|
return 0;
|
|
}
|
|
|
|
/******************************************************************
|
|
* legacy setup.bin model
|
|
******************************************************************/
|
|
if (NULL != p_model_info) {
|
|
return ((struct cnn_header_s *)p_model_info->setup_mem_addr)->output_nums;
|
|
} else {
|
|
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
|
|
return 0;
|
|
}
|
|
}
|
|
|
|
int kmdw_model_get_output_tensor_info(uint32_t model_type, uint32_t tensor_idx, kmdw_model_tensor_descriptor_t *tensor_info)
|
|
{
|
|
int ret = 1;
|
|
int model_idx = 0;
|
|
uint32_t p_setup_bin = 0;
|
|
uint32_t node_num = 0;
|
|
uint32_t setup_buff_offset = sizeof(struct cnn_header_s);
|
|
uint32_t setup_buff_size = 0;
|
|
struct out_node_s *target_output_node = NULL;
|
|
struct out_node_s *output_node = NULL;
|
|
|
|
if (NULL == tensor_info) {
|
|
err_msg("[%s] NULL tensor_info pointer\n", __FUNCTION__);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
model_idx = _get_model_info_array_index_by_model_type(model_type);
|
|
if (model_idx >= 0) {
|
|
struct kdp_model_s *p_model_info = kmdw_model_get_model_info(model_idx);
|
|
|
|
if (NULL != p_model_info) {
|
|
p_setup_bin = p_model_info->setup_mem_addr;
|
|
setup_buff_size = p_model_info->setup_mem_len;
|
|
} else {
|
|
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
|
|
return 0;
|
|
}
|
|
} else {
|
|
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
node_num = ((struct cnn_header_s *)p_setup_bin)->output_nums;
|
|
|
|
if (tensor_idx >= node_num) {
|
|
err_msg("[%s] tensor index out of range %d\n", __FUNCTION__, tensor_idx);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
while ((setup_buff_offset < setup_buff_size) && (NULL == target_output_node)) {
|
|
uintptr_t node_buff = (uintptr_t)p_setup_bin + setup_buff_offset;
|
|
uint32_t node_id = *(uint32_t *)node_buff;
|
|
uint32_t node_offset = 0;
|
|
|
|
switch (node_id) {
|
|
case NODE_TYPE_IN:
|
|
// NPU IN Signal NODE
|
|
dbg_msg("current node is an NPU IN Signal NODE\n");
|
|
node_offset = sizeof(struct in_node_s);
|
|
break;
|
|
case NODE_TYPE_CPU:
|
|
// CPU NODE
|
|
dbg_msg("current node is a CPU NODE\n");
|
|
node_offset = sizeof(struct cpu_node_s) - (2 * sizeof(struct data_node_s));
|
|
break;
|
|
case NODE_TYPE_OUTPUT:
|
|
// OUTPUT NODE
|
|
dbg_msg("current node is a output NODE\n");
|
|
output_node = (struct out_node_s *)node_buff;
|
|
node_offset = sizeof(struct out_node_s) - (sizeof(struct super_node_s));
|
|
|
|
if (output_node->output_index == tensor_idx)
|
|
target_output_node = output_node;
|
|
break;
|
|
case NODE_TYPE_DATA:
|
|
// NPU DATA NODE
|
|
dbg_msg("current node is an network data NODE\n");
|
|
node_offset = sizeof(struct data_node_s) - sizeof(struct super_node_s);
|
|
break;
|
|
case NODE_TYPE_SUPER:
|
|
// NPU SUPER NODE
|
|
dbg_msg("current node is an network super NODE\n");
|
|
node_offset = sizeof(struct super_node_s);
|
|
break;
|
|
default:
|
|
// Unknown NODE
|
|
err_msg("[%s] unknown node type: %d\n", __FUNCTION__, node_id);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
setup_buff_offset += node_offset;
|
|
}
|
|
|
|
if (NULL == target_output_node) {
|
|
err_msg("[%s] can not find target index node %d\n", __FUNCTION__, tensor_idx);
|
|
ret = 0;
|
|
goto FUNC_OUT;
|
|
}
|
|
|
|
tensor_info->index = target_output_node->output_index;
|
|
tensor_info->shape_npu_len = 4;
|
|
tensor_info->shape_npu[0] = 1;
|
|
tensor_info->shape_npu[1] = target_output_node->ch_length;
|
|
tensor_info->shape_npu[2] = target_output_node->row_length;
|
|
tensor_info->shape_npu[3] = target_output_node->col_length;
|
|
tensor_info->data_layout = target_output_node->data_format;
|
|
tensor_info->scale = *(float *)&(target_output_node->output_scale);
|
|
tensor_info->radix = target_output_node->output_radix;
|
|
|
|
FUNC_OUT:
|
|
return ret;
|
|
}
|
|
|
|
#ifdef EMBED_CMP_NPU
|
|
|
|
int8_t kmdw_model_add_update_model(uint32_t model_type,
|
|
int cmd_len, int wt_len, int input_len, int output_len, int setup_len,
|
|
uint32_t cmd_mem_addr, uint32_t wt_mem_addr,
|
|
uint32_t input_mem_addr, uint32_t output_mem_addr, uint32_t setup_mem_addr)
|
|
{
|
|
int model_info_idx = _get_model_info_array_index_from_model_type(model_type);
|
|
|
|
if (model_info_idx < 0) {
|
|
int model_count = s_model_data.n_model_count + 1;
|
|
s_model_data.n_model_count = model_count;
|
|
model_info_idx = model_count - 1;
|
|
|
|
s_model_data.p_model_info[model_info_idx].model_type = model_type;
|
|
|
|
s_model_data.p_model_info[model_info_idx].cmd_mem_addr = cmd_mem_addr;
|
|
s_model_data.p_model_info[model_info_idx].cmd_mem_len = cmd_len;
|
|
|
|
s_model_data.p_model_info[model_info_idx].weight_mem_len = wt_len;
|
|
|
|
s_model_data.p_model_info[model_info_idx].input_mem_addr = input_mem_addr;
|
|
s_model_data.p_model_info[model_info_idx].input_mem_len = input_len;
|
|
|
|
s_model_data.p_model_info[model_info_idx].output_mem_len = output_len;
|
|
s_model_data.p_model_info[model_info_idx].buf_len = output_len;
|
|
|
|
s_model_data.p_model_info[model_info_idx].setup_mem_addr = setup_mem_addr;
|
|
s_model_data.p_model_info[model_info_idx].setup_mem_len = setup_len;
|
|
|
|
s_model_data.pn_is_model_loaded_table[model_info_idx] = 1;
|
|
}
|
|
|
|
s_model_data.p_model_info[model_info_idx].weight_mem_addr = wt_mem_addr;
|
|
s_model_data.p_model_info[model_info_idx].output_mem_addr = output_mem_addr;
|
|
s_model_data.p_model_info[model_info_idx].buf_addr = output_mem_addr;
|
|
|
|
dbg_msg("[%s] model cmd addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].cmd_mem_addr);
|
|
dbg_msg("[%s] model wt addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].weight_mem_addr);
|
|
dbg_msg("[%s] model input addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].input_mem_addr);
|
|
dbg_msg("[%s] model output addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].output_mem_addr);
|
|
dbg_msg("[%s] model buf addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].buf_addr);
|
|
dbg_msg("[%s] model setup addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].setup_mem_addr);
|
|
|
|
udt_npu_model_mem(wt_mem_addr, output_mem_addr, (void *)cmd_mem_addr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
#endif // EMBED_CMP_NPU
|
|
|
|
#if DEBUG
|
|
|
|
void kmdw_model_dump_model_info(void)
|
|
{
|
|
struct kdp_model_s *p_modelInfo = 0;
|
|
uint8_t i;
|
|
|
|
dbg_msg("Model info Count = %d\n", s_model_data.n_model_count);
|
|
|
|
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
|
|
p_modelInfo = &(kmdw_model_data.p_model_info[i]);
|
|
dbg_msg("Model(%2d) model_type(%3d)/version(%5d):\n",
|
|
(i+1),
|
|
p_modelInfo->model_type, p_modelInfo->model_version);
|
|
|
|
dbg_msg("input[%x](sz:%d) -> cmd[%x](sz:%d),weight[%x](sz:%d),setup[%x](sz:%d),buf[%x](sz:%d) -> out[%x](sz:%d)\n",
|
|
(i+1),
|
|
p_modelInfo->input_mem_addr, p_modelInfo->input_mem_len,
|
|
p_modelInfo->cmd_mem_addr, p_modelInfo->cmd_mem_len,
|
|
p_modelInfo->weight_mem_addr,p_modelInfo->weight_mem_len,
|
|
p_modelInfo->setup_mem_addr, p_modelInfo->setup_mem_len,
|
|
p_modelInfo->buf_addr, p_modelInfo->buf_len,
|
|
p_modelInfo->output_mem_addr,p_modelInfo->output_mem_len);
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
#endif // DEBUG
|