2025-12-17 15:55:25 +08:00

467 lines
16 KiB
C

/*
* Kneron Header for KDP on KL520
*
* Copyright (C) 2018-2019 Kneron, Inc. All rights reserved.
*
*/
#ifndef KDPIO_H
#define KDPIO_H
#include <stdint.h>
#include "ipc.h"
#define MAX_MODEL_REGISTRATIONS 32
/* Type of Operations */
enum {
NODE_TYPE_IN,
NODE_TYPE_CPU,
NODE_TYPE_OUT,
};
/* Structures of Data Nodes */
struct super_node_s {
uint32_t node_id;
uint32_t addr;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
};
struct data_node_s {
uint32_t node_id;
uint32_t supernum;
uint32_t data_format;
uint32_t data_radix;
uint32_t data_scale;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
struct super_node_s node_list[1];
};
/* Structure of Input Operation */
struct in_node_s {
uint32_t node_id;
uint32_t next_npu;
};
/* Structure of Output Operation */
struct out_node_s {
uint32_t node_id;
uint32_t supernum;
uint32_t data_format;
uint32_t row_start;
uint32_t col_start;
uint32_t ch_start;
uint32_t row_length;
uint32_t col_length;
uint32_t ch_length;
uint32_t output_index;
uint32_t output_radix;
uint32_t output_scale;
struct super_node_s node_list[1];
};
/* Structure of CPU Operation */
struct cpu_node_s {
uint32_t node_id;
uint32_t input_datanode_num;
uint32_t op_type;
/* There will be more parameter here for cpu operation */
uint32_t in_num_row;
uint32_t in_num_col;
uint32_t in_num_ch;
uint32_t out_num_row;
uint32_t out_num_col;
uint32_t out_num_ch;
uint32_t h_pad;
uint32_t w_pad;
uint32_t kernel_h;
uint32_t kernel_w;
uint32_t stride_h;
uint32_t stride_w;
struct data_node_s output_datanode;
struct data_node_s input_datanode[1];
};
/* Structure of CNN Header in setup.bin */
struct cnn_header_s {
uint32_t crc;
uint32_t version;
uint32_t key_offset;
uint32_t model_type;
uint32_t app_type;
uint32_t dram_start;
uint32_t dram_size;
uint32_t input_row;
uint32_t input_col;
uint32_t input_channel;
uint32_t cmd_start;
uint32_t cmd_size;
uint32_t weight_start;
uint32_t weight_size;
uint32_t input_start;
uint32_t input_size;
uint32_t input_radix;
uint32_t output_nums;
};
/* Structure of setup.bin file */
struct setup_struct_s {
struct cnn_header_s header;
union {
struct in_node_s in_nd;
struct out_node_s out_nd;
struct cpu_node_s cpu_nd;
} nodes[1];
};
/* Structure of kdp_model_dim */
struct kdp_model_dim_s {
/* CNN input dimensions */
uint32_t input_row;
uint32_t input_col;
uint32_t input_channel;
};
/* Structure of kdp_pre_proc_s */
struct kdp_pre_proc_s {
/* input image in memory for NPU */
uint32_t input_mem_addr;
int32_t input_mem_len;
/* Input working buffers for NPU */
uint32_t input_mem_addr2;
int32_t input_mem_len2;
/* data memory for inproc array */
uint32_t inproc_mem_addr;
/* number of bits for input fraction */
uint32_t input_radix;
/* Other parameters for the model */
void *params_p;
};
/* Structure of kdp_post_proc_s */
struct kdp_post_proc_s {
/* output number */
uint32_t output_num;
/* output data memory from NPU */
uint32_t output_mem_addr;
int32_t output_mem_len;
/* result data memory from post processing */
uint32_t result_mem_addr;
int32_t result_mem_len;
/* 2nd output data memory for parallel processing */
uint32_t output_mem_addr2;
uint32_t output_mem_len2;
/* data memory for post processing */
uint32_t output_mem_addr3;
uint32_t output_mem_addr4;
/* output data format from NPU
* BIT(0): =0, 8-bits
* =1, 16-bits
*/
uint32_t output_format;
/* output node parameter */
struct out_node_s *node_p;
/* Other parameters for the model */
void *params_p;
};
/* Structure of kdp_cpu_op_s */
struct kdp_cpu_op_s {
/* cpu op node parameter */
struct cpu_node_s *node_p;
};
/* KDP image structure */
struct kdp_image_s {
/* Original image and model */
struct kdp_img_raw_s *raw_img_p;
struct kdp_model_s *model_p;
int model_id;
char *setup_mem_p;
/* Model dimension */
struct kdp_model_dim_s dim;
/* Pre process struct */
struct kdp_pre_proc_s preproc;
/* Post process struct */
struct kdp_post_proc_s postproc;
/* CPU operation struct */
struct kdp_cpu_op_s cpu_op;
};
/* Helper macros */
#define RAW_INFERENCE_FORMAT(image_p) (image_p->raw_img_p->inf_format)
#define RAW_IMAGE_MEM_ADDR(image_p) (image_p->raw_img_p->image_list[0].image_mem_addr)
#define RAW_IMAGE_MEM_LEN(image_p) (image_p->raw_img_p->image_list[0].image_mem_len)
#define RAW_FORMAT(image_p) (image_p->raw_img_p->image_list[0].format)
#define RAW_INPUT_ROW(image_p) (image_p->raw_img_p->image_list[0].input_row)
#define RAW_INPUT_COL(image_p) (image_p->raw_img_p->image_list[0].input_col)
#define RAW_CROP_TOP(image_p) (image_p->raw_img_p->image_list[0].params_s.crop_top)
#define RAW_CROP_BOTTOM(image_p) (image_p->raw_img_p->image_list[0].params_s.crop_bottom)
#define RAW_CROP_LEFT(image_p) (image_p->raw_img_p->image_list[0].params_s.crop_left)
#define RAW_CROP_RIGHT(image_p) (image_p->raw_img_p->image_list[0].params_s.crop_right)
#define RAW_PAD_TOP(image_p) (image_p->raw_img_p->image_list[0].params_s.pad_top)
#define RAW_PAD_BOTTOM(image_p) (image_p->raw_img_p->image_list[0].params_s.pad_bottom)
#define RAW_PAD_LEFT(image_p) (image_p->raw_img_p->image_list[0].params_s.pad_left)
#define RAW_PAD_RIGHT(image_p) (image_p->raw_img_p->image_list[0].params_s.pad_right)
#define RAW_SCALE_WIDTH(image_p) (image_p->raw_img_p->image_list[0].params_s.scale_width)
#define RAW_SCALE_HEIGHT(image_p) (image_p->raw_img_p->image_list[0].params_s.scale_height)
#define RAW_OTHER_PARAMS(image_p) (image_p->raw_img_p->ext_params)
#define RAW_TICK_START_PRE(image_p) (image_p->raw_img_p->tick_start_pre)
#define RAW_TICK_END_PRE(image_p) (image_p->raw_img_p->tick_end_pre)
#define RAW_TICK_START_NPU(image_p) (image_p->raw_img_p->tick_start_npu)
#define RAW_TICK_END_NPU(image_p) (image_p->raw_img_p->tick_end_npu)
#define RAW_TICK_START_POST(image_p) (image_p->raw_img_p->tick_start_post)
#define RAW_TICK_END_POST(image_p) (image_p->raw_img_p->tick_end_post)
#define DIM_INPUT_ROW(image_p) (image_p->dim.input_row)
#define DIM_INPUT_COL(image_p) (image_p->dim.input_col)
#define DIM_INPUT_CH(image_p) (image_p->dim.input_channel)
#define PREPROC_INPROC_MEM_ADDR(image_p) (image_p->preproc.inproc_mem_addr)
#define PREPROC_INPUT_MEM_ADDR(image_p) (image_p->preproc.input_mem_addr)
#define PREPROC_INPUT_MEM_LEN(image_p) (image_p->preproc.input_mem_len)
#define PREPROC_INPUT_MEM_ADDR2(image_p) (image_p->preproc.input_mem_addr2)
#define PREPROC_INPUT_MEM_LEN2(image_p) (image_p->preproc.input_mem_len2)
#define PREPROC_INPUT_RADIX(image_p) (image_p->preproc.input_radix)
#define PREPROC_PARAMS_P(image_p) (image_p->preproc.params_p)
#define POSTPROC_OUTPUT_NUM(image_p) (image_p->postproc.output_num)
#define POSTPROC_OUTPUT_FORMAT(image_p) (image_p->postproc.output_format)
#define POSTPROC_OUTPUT_MEM_ADDR(image_p) (image_p->postproc.output_mem_addr)
#define POSTPROC_OUTPUT_MEM_LEN(image_p) (image_p->postproc.output_mem_len)
#define POSTPROC_RESULT_MEM_ADDR(image_p) (image_p->postproc.result_mem_addr)
#define POSTPROC_RESULT_MEM_LEN(image_p) (image_p->postproc.result_mem_len)
#define POSTPROC_PARAMS_P(image_p) (image_p->postproc.params_p)
#define POSTPROC_OUTPUT_MEM_ADDR2(image_p) (image_p->postproc.output_mem_addr2)
#define POSTPROC_OUTPUT_MEM_LEN2(image_p) (image_p->postproc.output_mem_len2)
#define POSTPROC_OUTPUT_MEM_ADDR3(image_p) (image_p->postproc.output_mem_addr3)
#define POSTPROC_OUTPUT_MEM_ADDR4(image_p) (image_p->postproc.output_mem_addr4)
#define POSTPROC_OUT_NODE(image_p) (image_p->postproc.node_p)
#define POSTPROC_OUT_NODE_COL(image_p) (image_p->postproc.node_p->col_length)
#define POSTPROC_OUT_NODE_ROW(image_p) (image_p->postproc.node_p->row_length)
#define POSTPROC_OUT_NODE_CH(image_p) (image_p->postproc.node_p->ch_length)
#define POSTPROC_OUT_NODE_RADIX(image_p) (image_p->postproc.node_p->output_radix)
#define POSTPROC_OUT_NODE_SCALE(image_p) (image_p->postproc.node_p->output_scale)
#define POSTPROC_OUT_NODE_ADDR(image_p) (image_p->postproc.node_p->node_list[0].addr)
#define OUT_NODE_COL(out_p) (out_p->col_length)
#define OUT_NODE_ROW(out_p) (out_p->row_length)
#define OUT_NODE_CH(out_p) (out_p->ch_length)
#define OUT_NODE_RADIX(out_p) (out_p->output_radix)
#define OUT_NODE_SCALE(out_p) (out_p->output_scale)
#define OUT_NODE_ADDR(out_p) (out_p->node_list[0].addr)
#define OUT_NODE_ADDR_PARALLEL(out_p, image_p) \
(OUT_NODE_ADDR(out_p) + POSTPROC_OUTPUT_MEM_ADDR(image_p) - MODEL_OUTPUT_MEM_ADDR(image_p))
#define CPU_OP_NODE(image_p) (image_p->cpu_op.node_p)
#define CPU_OP_NODE_OP_TYPE(image_p) (image_p->cpu_op.node_p->op_type)
#define CPU_OP_NODE_INPUT_COL(image_p) (image_p->cpu_op.node_p->in_num_col)
#define CPU_OP_NODE_INPUT_ROW(image_p) (image_p->cpu_op.node_p->in_num_row)
#define CPU_OP_NODE_INPUT_CH(image_p) (image_p->cpu_op.node_p->in_num_ch)
#define CPU_OP_NODE_INPUT_ADDR(image_p) (image_p->cpu_op.node_p->input_datanode[0].node_list[0].addr)
#define CPU_OP_NODE_OUTPUT_COL(image_p) (image_p->cpu_op.node_p->out_num_col)
#define CPU_OP_NODE_OUTPUT_ROW(image_p) (image_p->cpu_op.node_p->out_num_row)
#define CPU_OP_NODE_OUTPUT_CH(image_p) (image_p->cpu_op.node_p->out_num_ch)
#define CPU_OP_NODE_OUTPUT_ADDR(image_p) (image_p->cpu_op.node_p->output_datanode.node_list[0].addr)
#define MODEL_P(image_p) (image_p->model_p)
#define MODEL_ID(image_p) (image_p->model_id)
#define MODEL_SETUP_MEM_P(image_p) (image_p->setup_mem_p)
#define MODEL_CMD_MEM_ADDR(image_p) (MODEL_P(image_p)->cmd_mem_addr)
#define MODEL_CMD_MEM_LEN(image_p) (MODEL_P(image_p)->cmd_mem_len)
#define MODEL_WEIGHT_MEM_ADDR(image_p) (MODEL_P(image_p)->weight_mem_addr)
#define MODEL_BUF_ADDR(image_p) (MODEL_P(image_p)->buf_addr)
#define MODEL_SETUP_MEM_ADDR(image_p) (MODEL_P(image_p)->setup_mem_addr)
#define MODEL_INPUT_MEM_ADDR(image_p) (MODEL_P(image_p)->input_mem_addr)
#define MODEL_INPUT_MEM_LEN(image_p) (MODEL_P(image_p)->input_mem_len)
#define MODEL_OUTPUT_MEM_ADDR(image_p) (MODEL_P(image_p)->output_mem_addr)
#define MODEL_OUTPUT_MEM_LEN(image_p) (MODEL_P(image_p)->output_mem_len)
/* API */
/* Return code */
#define RET_ERROR -1
#define RET_NO_ERROR 0
#define RET_NEXT_PRE_PROC 1
#define RET_NEXT_NPU 2
#define RET_NEXT_CPU 3
#define RET_NEXT_POST_PROC 4
/* Prototypes for callback functions */
// used for both pre/post process
typedef int (*model_pre_post_fn)(struct kdp_image_s *image_p);
typedef struct model_pre_post_func_s {
int model_id;
model_pre_post_fn ppf;
} model_pre_post_func_t;
typedef void (*pre_post_fn)(void *dst_p, void *src_p, int size);
/**
* kdpio_init() - initialize kdpio
*
* This function tells kdpio to initialize the platform and
* resources for NPU support.
*
*/
void kdpio_init(void);
void npu_reset(void);
void kdpio_sdk_init(void);
/**
* kdpio_handle_npu_int() - Kneron NPU interrupt handler
*
* This is the interrupt handler for Kneron NPU.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdpio_handle_npu_int(void);
/**
* kdp_preproc_inproc() - Kneron preprocessing procedure
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* @model_id: the model id this function was registered for
*
* This is a preprocess function which uses Kneron NPU to accelerate the
* processing and uses Kneron NCPU to do rotation and right-shift. It can take parameters passed in to do resize, crop, padding
* normalization like -128, rotation, and right-shift.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdp_preproc_inproc(int model_id, struct kdp_image_s *image_p);
/**
* kdpio_set_model() - set the model for an input image
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* @model_p: pointer to struct kdp_model with buffers of setup,
* command, weights, input and output for npu.
*
* This function tells kdpio the CNN model to use for processing next
* image(s).
*
* Return value:
* none
*/
void kdpio_set_model(struct kdp_image_s *image_p, struct kdp_model_s *model_p);
/**
* kdpio_run_preprocess() - run preprocessing for the image
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* This function tells kdpio to pre-process the raw image for npu
* before npu running the model. Its output will be put in model's
* input buffer for npu.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdpio_run_preprocess(struct kdp_image_s *image_p);
/**
* kdpio_run_npu_op() - run cnn model in npu for the image
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* This function tells kdpio to run NPU to process the input data
* with the cnn model previously set.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdpio_run_npu_op(struct kdp_image_s *image_p);
/**
* kdpio_run_cpu_op() - run cpu operation for the image
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* This function tells kdpio to let cpu run the input image when
* npu finishes its running. If there is no cpu operation to run as
* specified by model compiler, the function will still return
* success so that next step (postprocess) can continue.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdpio_run_cpu_op(struct kdp_image_s *image_p);
/**
* kdpio_run_postprocess() - run postprocessing for the image
*
* @image_p: pointer to struct kdp_image with buffer of raw image
* and dimension, and data for pre/cpu/post processing.
*
* @perf_improv_fn: pointer to pre_post_fn callback function to move
* npu output to additional buffer for postprocessing while
* the original one could be used by npu again.
* This is intended to improve fps performance if possible
* and desired. Additional output buffer needs to be allocated
* for the purpose, and DMA could be used in the callback.
*
* This function tells kdpio to post-process the output data from npu
* before returning to the calling system.
*
* Return value:
* 0 : success
* <0 : error
*/
int kdpio_run_postprocess(struct kdp_image_s *image_p, pre_post_fn perf_improv_fn);
/**
* kdpio_exit() - exit kdpio
*
* This function tells kdpio to free allocated resources and
* quit from NPU support.
*
*/
void kdpio_exit(void);
#endif