KL520_SDK_2.2/mdw/model/kmdw_model.c
2025-12-17 15:55:25 +08:00

1418 lines
46 KiB
C

/*
* Kneron Model API Manager
*
* Copyright (C) 2019 Kneron, Inc. All rights reserved.
*
*/
#include <stdlib.h>
#include <string.h>
#include "project.h"
#include "base.h"
#include "kdrv_ipc.h" /*for NCPU triggering */
#include "kdrv_clock.h" /* for kdrv_delay_us() */
#include "kdev_flash.h"
#include "kmdw_ipc.h"
#include "kmdw_model.h"
#include "kmdw_console.h" /*for dbg_msg */
#include "kmdw_memxfer.h" /*for flash access */
#include "kmdw_memory.h"
#include "kmdw_utils_crc.h"
#define DEBUG 0
#define OUTPUT_MEM_ADDR2_SIZE 0x100000 /* 1MB, for DME parallel buffer */
#define OUTPUT_MEM_ADDR3_SIZE 0x5000 /* for MBSSD anchor data */
#define FLAG_KMDW_MODEL_ABORT BIT(29) // Event flag to notify abort
#define FLAG_KMDW_MODEL_FROM_NCPU BIT(30) // Event flag to know NCPU is done
#define FLAG_KMDW_MODEL_FROM_NPU BIT(28) // Event flag to know NPU is done
#define MODEL_INF_TIMEOUT (2000) // timeout milli-secs for waiting npcu response
#define KDP_FLASH_FW_INFO_SIZE 0x1000
#ifdef EMBED_CMP_NPU
/* the following is for specific dense model wt/cmd mem modification */
/*================================================*/
#define WT_DATA_SIZE_BYTE 272
#define CONF_QUEUE 0
#define GETW_QUEUE 3
#define CONF_GETW0_CMD_OFFSET 0x0038
#define CONF_WDMA0_DST0_CMD_OFFSET 0x00f0
#define ACL_NPU_GETW0 0x2e
#define ACH_NPU_GETW0 0x2f
#define ACL_NPU_WDMA0_DST0 0x36
#define ACH_NPU_WDMA0_DST0 0x37
#define MASK_2 0x0003
#define MASK_10 0x03FF
#define MASK_16 0x00FFFF
#define VAL_ACL(x) (((x)&0xffff))
#define VAL_ACH(x) (((x) >> 16) & 0xffff)
#define SetBitsVal(tgt, val, mask, offset) \
((tgt) &= ~((mask) << (offset))); \
((tgt) |= (((val) & (mask)) << (offset)))
/*================================================*/
#endif // EMBED_CMP_NPU
extern const struct s_kdp_memxfer kdp_memxfer_module;
/* Type of Operations */
enum {
NODE_TYPE_IN,
NODE_TYPE_CPU,
NODE_TYPE_OUTPUT,
NODE_TYPE_DATA,
NODE_TYPE_SUPER,
NODE_TYPE_INPUT
};
/* Structures of Data Nodes */
struct super_node_s {
uint32_t node_id;
uint32_t addr;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
};
struct data_node_s {
uint32_t node_id;
uint32_t supernum;
uint32_t data_format;
uint32_t data_radix;
uint32_t data_scale;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
struct super_node_s node_list[1];
};
/* Structure of Input Operation */
struct in_node_s {
uint32_t node_id;
uint32_t next_npu;
};
/* Structure of Output Operation */
struct out_node_s {
uint32_t node_id;
uint32_t supernum;
uint32_t data_format;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
uint32_t output_index;
uint32_t output_radix;
uint32_t output_scale;
struct super_node_s node_list[1];
};
/* Structure of CPU Operation */
struct cpu_node_s {
uint32_t node_id;
uint32_t input_datanode_num;
uint32_t op_type;
/* There will be more parameter here for cpu operation */
uint32_t in_num_row;
uint32_t in_num_col;
uint32_t in_num_ch;
uint32_t out_num_row;
uint32_t out_num_col;
uint32_t out_num_ch;
uint32_t h_pad;
uint32_t w_pad;
uint32_t kernel_h;
uint32_t kernel_w;
uint32_t stride_h;
uint32_t stride_w;
struct data_node_s output_datanode;
struct data_node_s input_datanode[1];
};
/* Structure of CNN Header in setup.bin */
struct cnn_header_s {
uint32_t crc;
uint32_t version;
uint32_t key_offset;
uint32_t model_type;
uint32_t app_type;
uint32_t dram_start;
uint32_t dram_size;
uint32_t input_row;
uint32_t input_col;
uint32_t input_channel;
uint32_t cmd_start;
uint32_t cmd_size;
uint32_t weight_start;
uint32_t weight_size;
uint32_t input_start;
uint32_t input_size;
uint32_t input_radix;
uint32_t output_nums;
};
typedef struct {
uint32_t n_model_source; // 0: not set, 1: from flash, 2: from ddr
uint32_t n_model_count; // model count
struct kdp_model_s p_model_info[KMDW_MODEL_MAX_MODEL_COUNT]; // save model info generated by compiler
uint8_t pn_is_model_loaded_table[KMDW_MODEL_MAX_MODEL_COUNT]; // flag table to indicate if model is loaded
uint32_t n_ddr_addr_model_end; // DDR address of model end = user data start
int32_t n_model_slot_index; // scpu_to_ncpu->model_slot_index
} kmdw_model_data_t;
static kmdw_model_data_t s_model_data = {0};
typedef struct {
int32_t raw_img_idx;
osEventFlagsId_t evt_caller; // event to know/control ncpu
uint32_t caller_e;
osEventFlagsId_t evt_result; // event to know/control npu
uint32_t result_e;
} kmdw_img_data_t;
// ptr to the buf for uploaded fw info from host
static kmdw_model_fw_info_t *s_fw_info_buf_p = NULL;
// ptr to the buf for uploaded fw info from flash
static bool s_model_loaded_from_flash = false;
static kmdw_img_data_t s_img_data[IPC_IMAGE_ACTIVE_MAX] = {0};
static int32_t s_current_ipc_idx = 0;
static int32_t s_next_ipc_idx = 0;
static bool ModelFromDDR = false; // check model is from flash : false, ddr : true
/* ############################
* ## Static Functions ##
* ############################ */
/**
* @brief init ddr space for s_fw_info_buf_p
*
*/
static void _init_fw_info_buf(void)
{
if (NULL == s_fw_info_buf_p) {
s_fw_info_buf_p = (kmdw_model_fw_info_t*)kmdw_ddr_reserve(KDP_FLASH_FW_INFO_SIZE);
if (NULL == s_fw_info_buf_p)
critical_msg("insufficent memory for reading fw_info from flash\n");
}
}
#ifdef EMBED_CMP_NPU
static inline void udt_conf_cmd(void *cmd_addr, int reg_idx, uint16_t val16b, int queue)
{
uint32_t val = 0x80000000;
SetBitsVal(val, queue, MASK_2, 26);
SetBitsVal(val, reg_idx, MASK_10, 16);
SetBitsVal(val, val16b, MASK_16, 0);
memcpy(cmd_addr, &val, sizeof(val));
}
static void udt_npu_model_mem(uint32_t wt_addr, uint32_t out_addr, void *cmd_addr)
{
uint32_t *dst = (uint32_t *)((char *)cmd_addr + CONF_GETW0_CMD_OFFSET);
udt_conf_cmd(dst, ACL_NPU_GETW0, VAL_ACL(wt_addr), GETW_QUEUE);
udt_conf_cmd(dst + 1, ACH_NPU_GETW0, VAL_ACH(wt_addr), GETW_QUEUE);
dst = (uint32_t *)((char *)cmd_addr + CONF_WDMA0_DST0_CMD_OFFSET);
udt_conf_cmd(dst, ACL_NPU_WDMA0_DST0, VAL_ACL(out_addr), CONF_QUEUE);
udt_conf_cmd(dst + 1, ACH_NPU_WDMA0_DST0, VAL_ACH(out_addr), CONF_QUEUE);
}
#endif // EMBED_CMP_NPU
/**
* @brief load fw info from flash
* @return 0: OK, -1: fail
* @note NULL means failed; non-zero ptr means OK
*/
static kmdw_model_fw_info_t* _load_flash_model_info(void)
{
//load model from flash once and reuse loaded data, until reload
if (false == s_model_loaded_from_flash ) {
s_model_loaded_from_flash = true;
kdp_memxfer_module.flash_to_ddr((uint32_t)s_fw_info_buf_p, FLASH_MODEL_FW_INFO_ADDR, KDP_FLASH_FW_INFO_SIZE);
}
return s_fw_info_buf_p;
}
/**
* @brief reset s_model_data
*/
static void _reset_model_data(void)
{
s_model_data.n_model_count = 0;
s_model_data.n_model_source = 0;
memset( s_model_data.p_model_info, 0, sizeof(s_model_data.p_model_info));
memset( s_model_data.pn_is_model_loaded_table, 0 , sizeof(s_model_data.pn_is_model_loaded_table));
//can't reset the following variable which maintains DDR boundary for model
//n_last_model_space_end_addr
return;
}
/**
* @brief check flash read with timeout_ms
* @param timeout_ms timeout in ms
* @return flash ready ready time in ms
* -1 means timeout hit
*/
//static int32_t _flash_wait_ready(int timeout_ms)
//{
// kdev_flash_status_t flash_status;
// int i;
// for (i = 0; i < timeout_ms; i++) {
// flash_status = kdev_flash_get_status();
// if (flash_status.busy == 0) break;
// kdrv_delay_us(1*1000);
// }
// if (i == timeout_ms) i = -1; // we have timed out
// return i;
//}
/**
* @brief convert modeltype to modelInfo array index
* @param model_type_p: model type (defined in model_type.h)
* @return modelInfo model index (starts from 0)
* -1 means not such modeltype in flash
*/
static int8_t _get_model_info_array_index_by_model_type(uint32_t model_type_p)
{
int i;
for(i=0 ; i < s_model_data.n_model_count; i++) {
if(s_model_data.p_model_info[i].model_type == model_type_p)
return i;
}
return -1;
}
/**
* @brief get fw info extension data from fw_info ptr
* @param[in] fw_info_p the ptr to fw_info
* @return the ptr to fw_info_ext
*/
static kmdw_model_fw_info_ext_t*
_get_fw_info_ext_by_fw_info(kmdw_model_fw_info_t* fw_info_p)
{
if(NULL == fw_info_p)
return NULL;
else {
kmdw_model_fw_info_ext_t* ret = NULL;
uint32_t count;
uint32_t offset;
count = fw_info_p->model_count;
offset = sizeof(struct kdp_model_s) * count;
ret = (kmdw_model_fw_info_ext_t *)((uint32_t)fw_info_p->models + offset);
return ret;
}
}
/**
* @brief load model information generated by compiler
* @param [in] is_model_from_ddr: if model is from ddr/host command
* @param [in] is_reload : is force reload
* @return model count
* 0 means no model is loaded in this call
*/
static int32_t _load_model_info(bool from_ddr, bool reload)
{
if (s_model_data.n_model_count && !reload) {
return s_model_data.n_model_count;
}
if (reload) {
_reset_model_data();
s_model_loaded_from_flash = false;
}
kmdw_model_fw_info_t *model_info_p = NULL;
kmdw_model_fw_info_ext_t *model_info2_p = NULL;
// load model Info
if (from_ddr) {
model_info_p = s_fw_info_buf_p;
model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
if((NULL == model_info_p) || (NULL == model_info2_p) ) {
s_model_data.n_model_count = 0;
return 0;
}
// Use the version number for new fw_info structure. Model number is in use for dynamic model execution (DME)
//if (*(uint32_t*)(base_addr + 8) == 0) {
// return 0; //error, model_info is not ready
//}
// get model count
s_model_data.n_model_count = model_info_p->model_count;
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
if(0 == s_model_data.n_model_count) {
info_msg("[info] model is not in DDR!!\n");
return 0;
} else if (s_model_data.n_model_count > KMDW_MODEL_MAX_MODEL_COUNT) {
info_msg("[ERR] model count is over MAX limit=%d!!\n", KMDW_MODEL_MAX_MODEL_COUNT);
s_model_data.n_model_count = 0;
return 0;
} else {
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
}
// get model info
memcpy(s_model_data.p_model_info, (const void*)model_info_p->models,
sizeof(struct kdp_model_s)*s_model_data.n_model_count);
// get ddr model end addr
s_model_data.n_ddr_addr_model_end = model_info2_p->model_dram_addr_end;
if (s_model_data.n_ddr_addr_model_end >= kmdw_ddr_get_heap_tail()) {
err_msg("modelInfo: DDR end address: 0x%x over (>=) boundary 0x%x\n", s_model_data.n_ddr_addr_model_end, kmdw_ddr_get_heap_tail());
return 0;
} else {
dbg_msg("modelInfo: DDR end address: 0x%x\n", s_model_data.n_ddr_addr_model_end);
}
// set model source
s_model_data.n_model_source = 2; // from ddr
} else { // models are stored in flash
model_info_p = _load_flash_model_info(); // this function updates data on s_fw_info_buf_p
model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
if((NULL == model_info_p) || (NULL == model_info2_p) ) {
s_model_data.n_model_count = 0;
return 0;
}
// get model count
s_model_data.n_model_count = model_info_p->model_count;
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
if (s_model_data.n_model_count == 0xFFFFFFFF) {
err_msg("[info] model is not in flash!!\n");
s_model_data.n_model_count = 0;
return 0;
} else if (s_model_data.n_model_count > KMDW_MODEL_MAX_MODEL_COUNT) {
info_msg("[ERR] model count is over MAX limit=%d!!\n", KMDW_MODEL_MAX_MODEL_COUNT);
s_model_data.n_model_count = 0;
return 0;
} else {
dbg_msg("[DBG] model info: model count:%d\n", s_model_data.n_model_count);
}
// get model info
//FIXME, why need to clone to s_fw_info_buf_p
//memcpy(s_fw_info_buf_p, (void *)model_info_p, KDP_FLASH_FW_INFO_SIZE);
memcpy(s_model_data.p_model_info, model_info_p->models, sizeof(struct kdp_model_s)*s_model_data.n_model_count);
// get ddr model end addr
s_model_data.n_ddr_addr_model_end = model_info2_p->model_dram_addr_end;
if (s_model_data.n_ddr_addr_model_end >= kmdw_ddr_get_heap_tail()) {
err_msg("modelInfo: DDR end address: 0x%x over (>=) boundary 0x%x\n", s_model_data.n_ddr_addr_model_end, kmdw_ddr_get_heap_tail());
return 0;
} else {
dbg_msg("modelInfo: DDR end address: 0x%x\n", s_model_data.n_ddr_addr_model_end);
}
// set model source
s_model_data.n_model_source = 1; // from flash
}
// for support of dynamic model execution
*(uint32_t*)(((char*)s_fw_info_buf_p) + 8) = 0; //trick: we will check the work to see if model_info is uploaded
return s_model_data.n_model_count;
}
/**
* @brief load specific model by model info index (the order in flash)
* @param model_index_p: model info index
* @return 0: model not ready, 1: model is loaded
*/
static int32_t _load_model(uint8_t model_index_p/*starts from 0*/)
{
uint32_t ddr_addr_models_head; //start point = the 1st model's cmd.bin
uint32_t ddr_addr_offset;
uint32_t flash_addr;
uint32_t len_to_load;
struct kdp_model_s *p_model;
if(s_model_data.n_model_count == 0)
return 0; // model info is not ready
if(s_model_data.pn_is_model_loaded_table[model_index_p] == 1 )
return 1; //model has been loaded
else
s_model_data.pn_is_model_loaded_table[model_index_p] = 1;
//load model with (index=model_index_p) from flash to DDR
ddr_addr_models_head = s_model_data.p_model_info[0].cmd_mem_addr; //start point = the 1st model's cmd.bin
//load cmd + weight + setup together
p_model = &(s_model_data.p_model_info[model_index_p]);
ddr_addr_offset = p_model->cmd_mem_addr - ddr_addr_models_head;
flash_addr = FLASH_MODEL_ALL_ADDR + ddr_addr_offset;
len_to_load = ALIGN16(p_model->cmd_mem_len) +
ALIGN16(p_model->weight_mem_len) +
ALIGN16(p_model->setup_mem_len);
//model from flash to ddr
kdp_memxfer_module.flash_to_ddr(p_model->cmd_mem_addr, flash_addr, len_to_load);
return 1;
}
/**
* @brief prepare ouptut_mem_addr2 for ncpu/npu parallel mode inference
*
* @return 0:OK, -1:Fail
*/
static int32_t _prepare_output_mem_addr2(void)
{
/* Allocate parallel output buffer , if caller not provide buf*/
struct scpu_to_ncpu_s* comm_out = kmdw_ipc_get_output();
uint32_t addr_parallel = comm_out->output_mem_addr2;
if (addr_parallel == 0) {
//TODO, dynamic allocate memory for output_mem_addr2
//uint32_t addr1 = comm_out->models[model_idx].output_mem_addr;
//uint32_t addr2 = comm_out->models[model_idx].buf_addr;
//uint32_t len = comm_out->models[model_idx].output_mem_len;
//if (addr1 == addr2) {
// // Old memory layout, use working buffer length
// len = comm_out->models[model_idx].buf_len;
//}
// reserve more space for larger model output (ty_608x608 need 620160)
uint32_t len = OUTPUT_MEM_ADDR2_SIZE;
addr_parallel = kmdw_ddr_reserve(len);
if (addr_parallel == 0) {
err_msg("Error ddr allocation ncpu/npu parallel buffer, len %d\n", len);
return -1; //error
}
comm_out->output_mem_addr2 = addr_parallel;
comm_out->output_mem_len2 = len;
dbg_msg("allocated Parallel buffer: len %d, addr 0x%x", len, addr_parallel);
}
return 0;
}
#ifdef KL520
/**
* @brief prepare ouptut_mem_addr3 for MBSSD network
*
* @param [in] model_type model id
* @return 0:OK, -1:Fail
*/
static int32_t _prepare_output_mem_addr3(uint32_t model_type)
{
if (model_type == KNERON_FD_MBSSD_200_200_3 ||
model_type == KNERON_FD_MASK_MBSSD_200_200_3 ||
model_type == KNERON_OD_MBSSD ||
model_type == KNERON_PD_MBSSD ||
model_type == KNERON_CAR_DETECTION_MBSSD_224_416_3) {
uint32_t *pMemAddr3;
uint32_t len = OUTPUT_MEM_ADDR3_SIZE;
struct scpu_to_ncpu_s* comm_out = kmdw_ipc_get_output();
switch (model_type) {
case KNERON_FD_MBSSD_200_200_3 :
case KNERON_FD_MASK_MBSSD_200_200_3 :
{
static uint32_t mem_addr3_fdssd = 0;
pMemAddr3 = &mem_addr3_fdssd;
break;
}
case KNERON_OD_MBSSD :
{
static uint32_t mem_addr3_odssd = 0;
pMemAddr3 = &mem_addr3_odssd;
break;
}
case KNERON_PD_MBSSD :
{
static uint32_t mem_addr3_pdssd = 0;
pMemAddr3 = &mem_addr3_pdssd;
break;
}
case KNERON_CAR_DETECTION_MBSSD_224_416_3 :
{
static uint32_t mem_addr3_vdssd = 0;
pMemAddr3 = &mem_addr3_vdssd;
break;
}
default :
break;
}
if (*pMemAddr3 == 0) {
*pMemAddr3 = kmdw_ddr_reserve(len*sizeof(uint32_t));
if (*pMemAddr3 == 0) {
err_msg("Error ddr allocation fail for MBSSD network, mem_addr3 len %d\n", len);
return -1; //error
}
*(uint32_t*)(*pMemAddr3) = 0;
}
comm_out->output_mem_addr3 = *pMemAddr3;
}
return 0;
}
#endif
/**
* @brief specify model information, load model info, load model
* @param [in] model_type_p: model unique ID defined by Kneron
* @param [in] model_from_ddr: is model from ddr or host command
* @return model_slot_index(requested by NCPU/NPU)
* -1 : model not found
*/
static int32_t _config_model(uint32_t model_type, bool model_from_ddr)
{
int model_info_idx; //limitation (hard coded in flash)
int model_idx;
//check if model info is loaded
if( 0 == _load_model_info(model_from_ddr, false/*reload*/)) {
return -1;
}
if( model_from_ddr == 0 ) {
//FIXME, should remove application related code
/* Special model not in DDR but in ncpu */
if (model_type == KNERON_2D_LIVENESS_224_224_3) {
model_idx = 3;
model_info_idx = 4;
goto model_common;
}
model_info_idx = _get_model_info_array_index_by_model_type(model_type);
if(model_info_idx == -1) {
err_msg("[ERR] model_type[%d] is not found in flash\n", model_type);
return -1;
}
_load_model(model_info_idx);
// FIXME: need to remove the following hard code
model_idx = model_info_idx;
} else {
model_info_idx = _get_model_info_array_index_by_model_type(model_type);
if(model_info_idx == -1) {
err_msg("[ERR] model_type[%d] is not found in DDR\n", model_type);
return -1;
}
model_idx = model_info_idx;
}
model_common:
s_model_data.n_model_slot_index = model_idx;
kmdw_ipc_set_model(s_model_data.p_model_info, model_info_idx, model_idx);
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(s_img_data[s_current_ipc_idx].raw_img_idx);
if (raw_img->inf_format & IMAGE_FORMAT_PARALLEL_PROC) {
if (-1 == _prepare_output_mem_addr2() ) {
return -1;
}
}
#ifdef KL520
if ( -1 == _prepare_output_mem_addr3(model_type) ) {
return -1;
}
#else
struct scpu_to_ncpu_s* p_comm_out = kmdw_ipc_get_output();
if (NULL == p_comm_out->output_mem_addr3) {
uint32_t len = 0x5000;
p_comm_out->output_mem_addr3 = kmdw_ddr_reserve(len*sizeof(uint32_t));
if(NULL == p_comm_out->output_mem_addr3) {
critical_msg("kmdw_model: failed to malloc comm_out->output_mem_addr3\n");
return -1;
}
}
if (NULL == p_comm_out->output_mem_addr4) {
uint32_t len = 8 * (1 << 20);
p_comm_out->output_mem_addr4 = kmdw_ddr_reserve(len);
if (NULL == p_comm_out->output_mem_addr4) {
critical_msg("kmdw_model: failed to malloc comm_out->output_mem_addr4\n");
return -1;
}
}
#endif
kmdw_ipc_set_model_active(model_idx);
return model_idx;
}
/**
* @brief run model according to config settings
* @return status defined in NCPU
* @note !!! must be called after kapp_config_model_image()
*/
static int32_t _run_model(void)
{
int active_idx = s_current_ipc_idx;
int raw_img_idx = s_img_data[active_idx].raw_img_idx;
struct kdp_img_raw_s *p_raw_image = kmdw_model_get_raw_img(raw_img_idx);
uint32_t flags, wait_evt;
uint32_t is_abort = 0;
// Start time for ncpu/npu round trip
p_raw_image->tick_start = osKernelGetTickCount();
if (s_img_data[active_idx].evt_caller == NULL)
s_img_data[active_idx].evt_caller = osEventFlagsNew(0);
if(!s_img_data[active_idx].evt_caller)
err_msg("<Run-Model> active_idx=%d, osEventFlagsNew evt_caller failure\n",active_idx);
// set notify for job done
if (s_img_data[active_idx].evt_result) {
/* Result event already set. Let's do local event for parallel. */
wait_evt = FLAG_KMDW_MODEL_FROM_NPU;
} else {
wait_evt = FLAG_KMDW_MODEL_FROM_NCPU;
}
dbg_msg("<Run-Model> wait %d[%d] evt %x\n", raw_img_idx, active_idx, wait_evt);
//assign caller event before triggering ncpu/npu
s_img_data[active_idx].caller_e = wait_evt;
//trigger ncpu/npu
kmdw_ipc_trigger_int(CMD_RUN_NPU);
//check abort signal
flags = osEventFlagsWait(s_img_data[active_idx].evt_caller,
FLAG_KMDW_MODEL_ABORT,
osFlagsWaitAll, 0);
if( flags != osFlagsErrorResource ) {
osEventFlagsClear(s_img_data[active_idx].evt_caller, FLAG_KMDW_MODEL_ABORT);
is_abort = 1;
}
uint32_t wait_timeout = (kmdw_ipc_get_output()->kp_dbg_checkpoinots == 0x0) ? MODEL_INF_TIMEOUT : osWaitForever;
//wait for finish of current task
flags = osEventFlagsWait(s_img_data[active_idx].evt_caller,
wait_evt,
osFlagsNoClear, wait_timeout);
if(flags == osFlagsErrorTimeout){
err_msg("[%s] osEventFlagsWait flag 0x%08x timeout\n", __FUNCTION__, wait_evt);
return IMAGE_STATE_TIMEOUT;
} else if (flags != wait_evt)
dbg_msg("[%s] 1+ events 0x%08x (%d[%d] expected)\n", __FUNCTION__, flags, wait_evt, active_idx);
else
dbg_msg("[DBG][%s] got: raw_img_idx[active_idx]=%d[%d]\n", __FUNCTION__, raw_img_idx, active_idx);
osEventFlagsClear(s_img_data[active_idx].evt_caller, wait_evt);
if( 1 == is_abort ) {
dbg_msg("[DBG][%s] abort after n_model_slot_index = %d\n", __FUNCTION__, s_model_data.n_model_slot_index);
return KMDW_MODEL_RUN_RC_ABORT; //abort
}
return kmdw_ipc_get_input()->result.postproc.img_result.status;
}
__weak osStatus_t kmdw_fifoq_manager_result_enqueue(void *result_buf, int buf_size, bool preempt)
{
return osOK;
}
static void _ipc_handler(struct kdp_img_raw_s *p_raw_image, int state)
{
int ipc_idx;
if(state == 0x999) // FIXME, very workaround
{
kmdw_ipc_get_input()->kp_dbg_status = 0x0;
osStatus_t sts = kmdw_fifoq_manager_result_enqueue(kmdw_ipc_get_output()->kp_dbg_buffer, 0, false);
if(sts != osOK)
kmdw_printf("send dbg data failed in ipc, err %d\n", sts);
}
else if (state == IMAGE_STATE_RECEIVING) {
ipc_idx = p_raw_image->ref_idx;
// End time for ncpu/npu round trip
p_raw_image->tick_end = osKernelGetSysTimerCount();
if (s_img_data[ipc_idx].evt_result) {
dbg_msg("[done: post: P] ipc_idx: %d, result_e: %d (ram %x)\n", ipc_idx, s_img_data[ipc_idx].result_e, p_raw_image);
osEventFlagsSet(s_img_data[ipc_idx].evt_result, s_img_data[ipc_idx].result_e);
} else {
dbg_msg("[done: post: S] ipc_idx: %d, caller_e: %x.\n", ipc_idx, s_img_data[ipc_idx].caller_e);
osEventFlagsSet(s_img_data[ipc_idx].evt_caller, s_img_data[ipc_idx].caller_e);
}
} else if (state == IMAGE_STATE_ACTIVE){
ipc_idx = s_current_ipc_idx;
dbg_msg("[done: npu: P] ipc_idx: %d, caller_e: %x\n", ipc_idx, s_img_data[ipc_idx].caller_e);
osEventFlagsSet(s_img_data[ipc_idx].evt_caller, s_img_data[ipc_idx].caller_e);
} else {
err_msg("[ERR] wrong state: %d (ipc_idx %d)\n", state, ipc_idx);
}
}
/* ############################
* ## Public Functions ##
* ############################ */
void kmdw_model_init(void)
{
kmdw_ipc_initialize(_ipc_handler);
_init_fw_info_buf();
s_fw_info_buf_p->model_count = 0;
}
int32_t kmdw_model_load_model(int8_t model_info_index_p)
{
int32_t ret = 0;
if(1 != s_model_data.n_model_source || // check if s_model_data is not according to flash
0 == s_model_data.n_model_count) {
if(0 == _load_model_info(false/*from ddr*/, true/*reload*/))
return 0; //error, no model is loaded
}
// load all models
if (KMDW_MODEL_ALL_MODELS == model_info_index_p) {
uint8_t i;
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
ret = _load_model(i);
if( 0 == ret) {
err_msg("[ERR] %s : failed to load model array index:%d\n", __FUNCTION__, i);
return 0;
}
}
// Very slow if turn it on. Maybe hardware support is needed.
// Add a new compiler directive if CRC32 method is also used in other scenarios (ex: check FW image)
#if ENABLE_CRC32
// check CRC value of all_models.bin
kmdw_model_fw_info_t *model_info_p = _load_flash_model_info();
kmdw_model_fw_info_ext_t *model_info2_p = _get_fw_info_ext_by_fw_info(model_info_p);
// cmd_mem_addr of first model is the start address of all_models.bin
uint8_t *addr = (uint8_t *)s_model_data.p_model_info[0].cmd_mem_addr;
uint32_t crc32 = kmdw_utils_crc_gen_crc32(addr, model_info2_p->model_total_size);
dbg_msg("[%s] crc32 calculated: 0x%x\n", __FUNCTION__, crc32);
dbg_msg("[%s] crc32 read from flash: 0x%x\n", __FUNCTION__, model_info2_p->model_checksum);
dbg_msg("[%s] model start address: 0x%x\n", __FUNCTION__, s_model_data.p_model_info[0].cmd_mem_addr);
dbg_msg("[%s] model total size: %d\n", __FUNCTION__, model_info2_p->model_total_size);
if (crc32 != model_info2_p->model_checksum)
{
err_msg("[ERR] %s: all models.bin CRC check failed\n", __FUNCTION__);
return 0;
}
#endif
return s_model_data.n_model_count;
} else { // load specific model
ret = _load_model(model_info_index_p);
return ret;
}
}
int32_t kmdw_model_reload_model_info(bool from_ddr)
{
return _load_model_info(from_ddr, true/*reload*/);
}
int32_t kmdw_model_refresh_models(void) // reload all the models from flash again
{
uint8_t i;
// forcedly update s_model_data which might be poluted by model upload from host
if(0 == _load_model_info(false/*from ddr*/, true/*reload*/))
return 0; //error, no model is loaded
int ret;
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
if (s_model_data.pn_is_model_loaded_table[i]) { // if previously loaded
s_model_data.pn_is_model_loaded_table[i] = 0;
ret = _load_model(i); // reload the model again
if ( 0 == ret) {
err_msg("[ERR] %s : failed to load model array index:%d\n", __FUNCTION__, i);
return 0;
}
}
}
return s_model_data.n_model_count;
}
int32_t kmdw_model_config_result(osEventFlagsId_t result_evt, uint32_t result_evt_flag)
{
int active_idx = s_current_ipc_idx;
s_img_data[active_idx].evt_result = result_evt;
s_img_data[active_idx].result_e = result_evt_flag;
return 0;
}
void kmdw_model_config_img(struct kdp_img_cfg *img_cfg, void *ext_param)
{
int act_img_idx = img_cfg->image_buf_active_index;
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(act_img_idx);
s_current_ipc_idx = s_next_ipc_idx;
if (img_cfg->inf_format & IMAGE_FORMAT_PARALLEL_PROC)
s_next_ipc_idx = !s_next_ipc_idx;
kmdw_ipc_set_image_active(act_img_idx);
s_img_data[s_current_ipc_idx].raw_img_idx = act_img_idx;
raw_img->state = IMAGE_STATE_ACTIVE;
raw_img->seq_num = act_img_idx;
raw_img->ref_idx = s_current_ipc_idx;
raw_img->num_image = img_cfg->num_image;
raw_img->inf_format = img_cfg->inf_format;
for (int i = 0; i < img_cfg->num_image; i++) {
raw_img->image_list[i].input_row = img_cfg->image_list[i].input_row;
raw_img->image_list[i].input_col = img_cfg->image_list[i].input_col;
raw_img->image_list[i].input_channel = img_cfg->image_list[i].input_channel;
raw_img->image_list[i].format = img_cfg->image_list[i].format;
raw_img->image_list[i].image_mem_addr = img_cfg->image_list[i].image_mem_addr;
raw_img->image_list[i].image_mem_len = img_cfg->image_list[i].image_mem_len;
memcpy(&(raw_img->image_list[i].params_s), &(img_cfg->image_list[i].params_s), sizeof(parameter_t));
}
if (ext_param == NULL) {
memset(raw_img->ext_params, 0, MAX_PARAMS_LEN * 4);
} else {
memcpy(raw_img->ext_params, ext_param, MAX_PARAMS_LEN * 4);
}
}
struct kdp_img_raw_s* kmdw_model_get_raw_img(int idx)
{
struct scpu_to_ncpu_s *comm_out = kmdw_ipc_get_output();
return &(comm_out->raw_images[idx]);
}
int kmdw_model_run(const char *tag, void *output, uint32_t model_type, bool model_from_ddr)
{
int model_idx = _config_model(model_type, model_from_ddr);
if (model_idx < 0) {
return KMDW_MODEL_RUN_RC_ABORT;
}
int img_idx = s_img_data[s_current_ipc_idx].raw_img_idx;
struct kdp_img_raw_s *raw_img = kmdw_model_get_raw_img(img_idx);
raw_img->results[model_idx].result_mem_addr = (uint32_t)output;
dbg_msg("[INFO] %s:\n", tag);
dbg_msg(" model_idx = %d\n", model_idx);
dbg_msg(" model type = %d\n", model_type);
dbg_msg(" ref_idx = %d\n", raw_img->ref_idx);
dbg_msg(" inf_format = 0x%X\n", raw_img->inf_format);
dbg_msg(" output addr = 0x%x\n", raw_img->results[model_idx].result_mem_addr);
dbg_msg(" ext_params(first 4)= %d/%d/%d/%d\n", raw_img->ext_params[0], raw_img->ext_params[1],
raw_img->ext_params[2], raw_img->ext_params[3]);
for (int i = 0; i < raw_img->num_image; i++) {
dbg_msg(" image index: %d\n", i);
dbg_msg(" (row/col/ch) = %d/%d/%d\n", raw_img->image_list[i].input_row,
raw_img->image_list[i].input_col,
raw_img->image_list[i].input_channel);
dbg_msg(" image format = 0x%x\n", raw_img->image_list[i].format);
dbg_msg(" crop(tp/bt/lf/rt) = %d/%d/%d/%d\n", raw_img->image_list[i].params_s.crop_top,
raw_img->image_list[i].params_s.crop_bottom,
raw_img->image_list[i].params_s.crop_left,
raw_img->image_list[i].params_s.crop_right);
dbg_msg(" image addr = 0x%x\n", raw_img->image_list[i].image_mem_addr);
}
return _run_model();
}
void kmdw_model_abort(void)
{
int active_idx = s_current_ipc_idx;
if( 0 == s_img_data[active_idx].evt_caller)
return;
osEventFlagsSet(s_img_data[active_idx].evt_caller, FLAG_KMDW_MODEL_ABORT);
}
struct kdp_model_s* kmdw_model_get_model_info(int model_idx_p)
{
if (s_model_data.n_model_count == 0) {
return NULL;
} else if (model_idx_p >= s_model_data.n_model_count) {
return NULL;
} else {
return &(s_model_data.p_model_info[model_idx_p]);
}
}
void kmdw_model_get_run_time(int img_idx, kmdw_model_run_time_t *run_time/*out*/)
{
struct kdp_img_raw_s *p_raw_image;
if (run_time == NULL)
return;
p_raw_image = kmdw_model_get_raw_img(img_idx);
run_time->round_trip_time = p_raw_image->tick_end - p_raw_image->tick_start;
run_time->pre_proc_time = p_raw_image->tick_end_pre - p_raw_image->tick_start_pre;
run_time->npu_proc_time = p_raw_image->tick_end_npu - p_raw_image->tick_start_npu;
run_time->post_proc_time = p_raw_image->tick_end_post - p_raw_image->tick_start_post;
}
int kmdw_model_is_model_loaded(uint32_t model_type)
{
if (_get_model_info_array_index_by_model_type(model_type) == -1)
return 0;
else
return 1;
}
uint32_t *kmdw_model_get_all_model_info(bool trust_ddr_data)
{
static uint32_t *s_p_model_id_list = NULL; //[model_count, id0, id1, id2 ...]
kmdw_model_fw_info_t *fw_info_ptr = NULL;
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
if (fw_info_ptr) {
if (NULL == s_p_model_id_list)
s_p_model_id_list = (uint32_t *)calloc(1+KMDW_MODEL_MAX_MODEL_COUNT, sizeof(uint32_t));
if (NULL == s_p_model_id_list) {
err_msg("[ERR] insufficent memory for model id list\n");
} else {
int i;
uint32_t model_id;
s_p_model_id_list[0] = fw_info_ptr->model_count;
dbg_msg("%s:\n", __FUNCTION__);
dbg_msg("Model Count = %d\n", s_p_model_id_list[0]);
for (i = 0 ; i < s_p_model_id_list[0]; i++) {
model_id = fw_info_ptr->models[i].model_type;
dbg_msg("Extract Model ID %d\n", model_id);
s_p_model_id_list[i+1] = model_id;
}
}
return s_p_model_id_list;
} else {
return NULL;
}
}
uint32_t kmdw_model_get_crc(bool trust_ddr_data)
{
uint32_t ret = 0;
kmdw_model_fw_info_t *fw_info_ptr;
kmdw_model_fw_info_ext_t *fw_info_ext_ptr;
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
fw_info_ext_ptr = _get_fw_info_ext_by_fw_info(fw_info_ptr);
if (fw_info_ext_ptr) {
ret = fw_info_ext_ptr->model_checksum;
}
dbg_msg("%s = 0x%x\n", __FUNCTION__, ret);
return ret;
}
kmdw_model_fw_info_t *kmdw_model_get_fw_info(bool trust_ddr_data)
{
uint32_t model_cnt;
kmdw_model_fw_info_t *fw_info_ptr = s_fw_info_buf_p;
if (false == trust_ddr_data) {
if ((0 >= s_model_data.n_model_count) ||
((1 != s_model_data.n_model_source) && (2 != s_model_data.n_model_source))) {
fw_info_ptr = NULL;
} else {
model_cnt = fw_info_ptr->model_count;
if ((0 == model_cnt) || (model_cnt > KMDW_MODEL_MAX_MODEL_COUNT)) {
fw_info_ptr = NULL;
}
}
}
return fw_info_ptr;
}
uint32_t kmdw_model_get_model_end_addr(bool trust_ddr_data)
{
uint32_t ret = 0;
kmdw_model_fw_info_t* fw_info_ptr;
kmdw_model_fw_info_ext_t* fw_info_ext_ptr = NULL;
if (0 != s_model_data.n_ddr_addr_model_end) {
ret = s_model_data.n_ddr_addr_model_end;
goto FUNC_OUT;
}
fw_info_ptr = kmdw_model_get_fw_info(trust_ddr_data);
fw_info_ext_ptr = _get_fw_info_ext_by_fw_info(fw_info_ptr);
if (fw_info_ext_ptr) {
ret = fw_info_ext_ptr->model_dram_addr_end;
}
FUNC_OUT:
dbg_msg("%s = 0x%x\n", __FUNCTION__, ret);
return ret;
}
void kmdw_model_set_location(bool model_inddr)
{
ModelFromDDR = model_inddr;
}
bool kmdw_model_get_location(void)
{
return ModelFromDDR;
}
int kmdw_model_get_input_tensor_num(uint32_t model_type)
{
int model_idx = 0;
model_idx = _get_model_info_array_index_by_model_type(model_type);
if (model_idx >= 0) {
/******************************************************************
* KL520 only support single input model
******************************************************************/
return 1;
} else {
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
return 0;
}
}
int kmdw_model_get_input_tensor_info(uint32_t model_type, uint32_t tensor_idx, kmdw_model_tensor_descriptor_t *tensor_info)
{
int ret = 1;
int model_idx = 0;
uint32_t p_setup_bin = 0;
struct cnn_header_s *target_input_node = NULL;
if (NULL == tensor_info) {
err_msg("[%s] NULL tensor_info pointer\n", __FUNCTION__);
ret = 0;
goto FUNC_OUT;
}
model_idx = _get_model_info_array_index_by_model_type(model_type);
if (model_idx >= 0) {
struct kdp_model_s *p_model_info = kmdw_model_get_model_info(model_idx);
if (NULL != p_model_info) {
p_setup_bin = p_model_info->setup_mem_addr;
} else {
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
return 0;
}
} else {
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
ret = 0;
goto FUNC_OUT;
}
if (tensor_idx >= 1) {
err_msg("[%s] tensor index out of range %d\n", __FUNCTION__, tensor_idx);
ret = 0;
goto FUNC_OUT;
}
target_input_node = (struct cnn_header_s *)p_setup_bin;
tensor_info->index = 1;
tensor_info->shape_npu_len = 4;
tensor_info->shape_npu[0] = 1;
tensor_info->shape_npu[1] = target_input_node->input_channel;
tensor_info->shape_npu[2] = target_input_node->input_row;
tensor_info->shape_npu[3] = target_input_node->input_col;
tensor_info->data_layout = DATA_FMT_4W4C8B;
tensor_info->scale = 1.0;
tensor_info->radix = target_input_node->input_radix;
FUNC_OUT:
return ret;
}
int kmdw_model_get_output_tensor_num(uint32_t model_type)
{
int model_idx = 0;
struct kdp_model_s *p_model_info = NULL;
model_idx = _get_model_info_array_index_by_model_type(model_type);
if (model_idx >= 0) {
p_model_info = kmdw_model_get_model_info(model_idx);
} else {
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
return 0;
}
/******************************************************************
* legacy setup.bin model
******************************************************************/
if (NULL != p_model_info) {
return ((struct cnn_header_s *)p_model_info->setup_mem_addr)->output_nums;
} else {
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
return 0;
}
}
int kmdw_model_get_output_tensor_info(uint32_t model_type, uint32_t tensor_idx, kmdw_model_tensor_descriptor_t *tensor_info)
{
int ret = 1;
int model_idx = 0;
uint32_t p_setup_bin = 0;
uint32_t node_num = 0;
uint32_t setup_buff_offset = sizeof(struct cnn_header_s);
uint32_t setup_buff_size = 0;
struct out_node_s *target_output_node = NULL;
struct out_node_s *output_node = NULL;
if (NULL == tensor_info) {
err_msg("[%s] NULL tensor_info pointer\n", __FUNCTION__);
ret = 0;
goto FUNC_OUT;
}
model_idx = _get_model_info_array_index_by_model_type(model_type);
if (model_idx >= 0) {
struct kdp_model_s *p_model_info = kmdw_model_get_model_info(model_idx);
if (NULL != p_model_info) {
p_setup_bin = p_model_info->setup_mem_addr;
setup_buff_size = p_model_info->setup_mem_len;
} else {
err_msg("[%s] NULL model info pointer %d\n", __FUNCTION__);
return 0;
}
} else {
err_msg("[%s] invalid model id %d\n", __FUNCTION__, model_type);
ret = 0;
goto FUNC_OUT;
}
node_num = ((struct cnn_header_s *)p_setup_bin)->output_nums;
if (tensor_idx >= node_num) {
err_msg("[%s] tensor index out of range %d\n", __FUNCTION__, tensor_idx);
ret = 0;
goto FUNC_OUT;
}
while ((setup_buff_offset < setup_buff_size) && (NULL == target_output_node)) {
uintptr_t node_buff = (uintptr_t)p_setup_bin + setup_buff_offset;
uint32_t node_id = *(uint32_t *)node_buff;
uint32_t node_offset = 0;
switch (node_id) {
case NODE_TYPE_IN:
// NPU IN Signal NODE
dbg_msg("current node is an NPU IN Signal NODE\n");
node_offset = sizeof(struct in_node_s);
break;
case NODE_TYPE_CPU:
// CPU NODE
dbg_msg("current node is a CPU NODE\n");
node_offset = sizeof(struct cpu_node_s) - (2 * sizeof(struct data_node_s));
break;
case NODE_TYPE_OUTPUT:
// OUTPUT NODE
dbg_msg("current node is a output NODE\n");
output_node = (struct out_node_s *)node_buff;
node_offset = sizeof(struct out_node_s) - (sizeof(struct super_node_s));
if (output_node->output_index == tensor_idx)
target_output_node = output_node;
break;
case NODE_TYPE_DATA:
// NPU DATA NODE
dbg_msg("current node is an network data NODE\n");
node_offset = sizeof(struct data_node_s) - sizeof(struct super_node_s);
break;
case NODE_TYPE_SUPER:
// NPU SUPER NODE
dbg_msg("current node is an network super NODE\n");
node_offset = sizeof(struct super_node_s);
break;
default:
// Unknown NODE
err_msg("[%s] unknown node type: %d\n", __FUNCTION__, node_id);
ret = 0;
goto FUNC_OUT;
}
setup_buff_offset += node_offset;
}
if (NULL == target_output_node) {
err_msg("[%s] can not find target index node %d\n", __FUNCTION__, tensor_idx);
ret = 0;
goto FUNC_OUT;
}
tensor_info->index = target_output_node->output_index;
tensor_info->shape_npu_len = 4;
tensor_info->shape_npu[0] = 1;
tensor_info->shape_npu[1] = target_output_node->ch_length;
tensor_info->shape_npu[2] = target_output_node->row_length;
tensor_info->shape_npu[3] = target_output_node->col_length;
tensor_info->data_layout = target_output_node->data_format;
tensor_info->scale = *(float *)&(target_output_node->output_scale);
tensor_info->radix = target_output_node->output_radix;
FUNC_OUT:
return ret;
}
#ifdef EMBED_CMP_NPU
int8_t kmdw_model_add_update_model(uint32_t model_type,
int cmd_len, int wt_len, int input_len, int output_len, int setup_len,
uint32_t cmd_mem_addr, uint32_t wt_mem_addr,
uint32_t input_mem_addr, uint32_t output_mem_addr, uint32_t setup_mem_addr)
{
int model_info_idx = _get_model_info_array_index_from_model_type(model_type);
if (model_info_idx < 0) {
int model_count = s_model_data.n_model_count + 1;
s_model_data.n_model_count = model_count;
model_info_idx = model_count - 1;
s_model_data.p_model_info[model_info_idx].model_type = model_type;
s_model_data.p_model_info[model_info_idx].cmd_mem_addr = cmd_mem_addr;
s_model_data.p_model_info[model_info_idx].cmd_mem_len = cmd_len;
s_model_data.p_model_info[model_info_idx].weight_mem_len = wt_len;
s_model_data.p_model_info[model_info_idx].input_mem_addr = input_mem_addr;
s_model_data.p_model_info[model_info_idx].input_mem_len = input_len;
s_model_data.p_model_info[model_info_idx].output_mem_len = output_len;
s_model_data.p_model_info[model_info_idx].buf_len = output_len;
s_model_data.p_model_info[model_info_idx].setup_mem_addr = setup_mem_addr;
s_model_data.p_model_info[model_info_idx].setup_mem_len = setup_len;
s_model_data.pn_is_model_loaded_table[model_info_idx] = 1;
}
s_model_data.p_model_info[model_info_idx].weight_mem_addr = wt_mem_addr;
s_model_data.p_model_info[model_info_idx].output_mem_addr = output_mem_addr;
s_model_data.p_model_info[model_info_idx].buf_addr = output_mem_addr;
dbg_msg("[%s] model cmd addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].cmd_mem_addr);
dbg_msg("[%s] model wt addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].weight_mem_addr);
dbg_msg("[%s] model input addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].input_mem_addr);
dbg_msg("[%s] model output addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].output_mem_addr);
dbg_msg("[%s] model buf addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].buf_addr);
dbg_msg("[%s] model setup addr: 0x%x\n", __func__, s_model_data.p_model_info[model_info_idx].setup_mem_addr);
udt_npu_model_mem(wt_mem_addr, output_mem_addr, (void *)cmd_mem_addr);
return 0;
}
#endif // EMBED_CMP_NPU
#if DEBUG
void kmdw_model_dump_model_info(void)
{
struct kdp_model_s *p_modelInfo = 0;
uint8_t i;
dbg_msg("Model info Count = %d\n", s_model_data.n_model_count);
for (i = 0 ; i < s_model_data.n_model_count ; i++) {
p_modelInfo = &(kmdw_model_data.p_model_info[i]);
dbg_msg("Model(%2d) model_type(%3d)/version(%5d):\n",
(i+1),
p_modelInfo->model_type, p_modelInfo->model_version);
dbg_msg("input[%x](sz:%d) -> cmd[%x](sz:%d),weight[%x](sz:%d),setup[%x](sz:%d),buf[%x](sz:%d) -> out[%x](sz:%d)\n",
(i+1),
p_modelInfo->input_mem_addr, p_modelInfo->input_mem_len,
p_modelInfo->cmd_mem_addr, p_modelInfo->cmd_mem_len,
p_modelInfo->weight_mem_addr,p_modelInfo->weight_mem_len,
p_modelInfo->setup_mem_addr, p_modelInfo->setup_mem_len,
p_modelInfo->buf_addr, p_modelInfo->buf_len,
p_modelInfo->output_mem_addr,p_modelInfo->output_mem_len);
}
return;
}
#endif // DEBUG