277 lines
11 KiB
JSON
277 lines
11 KiB
JSON
{
|
||
// tag is used in report names for quick recognition.
|
||
"tag": "USE_YOUR_OWN",
|
||
"comments": "",
|
||
"path": {
|
||
// where to save report generated
|
||
"report": "USE_YOUR_OWN",
|
||
// the base directory for all test cases.
|
||
// need to be in two levels, example: v011/00001_conv
|
||
// big model cases MUST have "big_model" in the full path
|
||
// multi layer cases MUST have "multi_layer" in the full path
|
||
"cases": "USE_YOUR_OWN",
|
||
// use "search" to filter only cases you want to run.
|
||
// use [] empty to run all
|
||
// use ["FDR"] to run all cases with "FDR" in the full path
|
||
// use ["FDR", "onet"] to run all cases with "FDR" AND "onet" in the full path.
|
||
"search": [],
|
||
// called inside kneron or by customer?
|
||
"internal": false,
|
||
// workflow is where regression to look for. TODELETE?
|
||
"workflow": "USE_YOUR_OWN"
|
||
},
|
||
"module_run": {
|
||
// only run ip_evaluator, without knerex
|
||
"only_ip_evaluator": false,
|
||
//
|
||
"validate_onnx": false,
|
||
"compiler_frontend": false,
|
||
"piano_knerex": false,
|
||
"piano_dynasty": false,
|
||
// for debug. check model sensitive to noise
|
||
"piano_dynasty_noise": false,
|
||
"compiler_piano": false,
|
||
"csim": false,
|
||
// TODO: should use csim_ci only. turn on each sub-module in regression.
|
||
"csim_ci": false,
|
||
// TODELETE
|
||
"gen_nef": false,
|
||
"run_nef_kneron_plus": false,
|
||
"only_dongle": false,
|
||
//
|
||
"tflite": false,
|
||
"onnxruntime": false,
|
||
//
|
||
"filter_cpu_cases": false,
|
||
"release_model_prepare": false,
|
||
// snr_calculation will be turned on automcatically if necessary
|
||
"snr_calculation": false,
|
||
"verify_decomp_snr": false,
|
||
"calculate_layer_statistics": false,
|
||
"knerex_modes": false,
|
||
"rtl_cmd_check": false
|
||
},
|
||
"mode_run": {
|
||
// each key must have definition in flow_constats/DYNASTY_MODE_SETTINGS
|
||
"float": false,
|
||
// 730
|
||
"730": false,
|
||
"730graphopt": false,
|
||
"730scl": false,
|
||
"730wq": false,
|
||
"730dq": false,
|
||
"730dc": false,
|
||
"730wqdq": false,
|
||
"730wqdc": false,
|
||
// 1140
|
||
"1140": false,
|
||
"1140graphopt": false,
|
||
"1140scl": false,
|
||
"1140wq": false,
|
||
"1140dq": false,
|
||
"1140dc": false,
|
||
"1140wqdq": false,
|
||
"1140wqdc": false
|
||
},
|
||
"pre_clean_up": {
|
||
"knerex_analysis": false,
|
||
"knerex_output": false,
|
||
"compiler_output": false,
|
||
"dynasty_output": false,
|
||
"all_output": false
|
||
},
|
||
"post_clean_up": {
|
||
"all_output": false,
|
||
"dynasty_output": false,
|
||
"knerex_output": false, // TODO
|
||
"csim_output": false
|
||
},
|
||
"dynasty": {
|
||
// 0: only dump model specified output
|
||
// 1: model specifed output + CPU node input/output
|
||
// 2: dump every layer
|
||
"do_dump": 0,
|
||
// choose input files in regression.
|
||
// all or default
|
||
// "all" -> all files in ${DIR_INPUT_simulator}
|
||
// example: big model snr analsysis
|
||
// "default" -> 1 file, ${DIR_INPUT_simulator}/test_input.txt
|
||
// example: single test case for regression
|
||
"regression_input": "default",
|
||
// max num of inputs to run if above set to "all". to save some time.
|
||
// must be a number. use 10000 or bigger number if want to use all images.
|
||
"num_input_samples": 10000,
|
||
// randomly pick inputs for inference.
|
||
// if not specify or leave as null, will pick by the order of
|
||
"timeout": 7200,
|
||
"sample_seed": null,
|
||
// for one model, the max number of inputs do dynasty inference at same time
|
||
"n_parallel_input": 4,
|
||
// max number of models run in parallel
|
||
"n_parallel_model": 10,
|
||
// input shape could be: onnx_shape or channel_last
|
||
"input_shape": "onnx_shape",
|
||
// Enable or disable CUDA
|
||
"cuda": false,
|
||
// round_mode: 0 is round to inf (default for other platform. setting to 0/1 has NO effects)
|
||
// : 1 is round to round2even (default for 540/730)
|
||
"round_mode": 1,
|
||
// HACK: create more datasets from simulator_input with noise added.
|
||
"noise_sigma": [
|
||
3,
|
||
6
|
||
],
|
||
"piano_dynasty": {
|
||
// which onnx to use: "renaissance_onnx", "piano_onnx", "piano_bie"
|
||
"onnx_source": "piano_bie",
|
||
"model_format": "bie" // pick from MODEL_FORMAT
|
||
}
|
||
},
|
||
"snr": {
|
||
"report_snr_col": [
|
||
"SNR"
|
||
], // ["SNR"], ["SNR_With_Mean"], ["SNR", "SNR_With_Mean"]
|
||
"per_channel": false,
|
||
"n_thread": 4,
|
||
"plot_snr_per_channel": false
|
||
},
|
||
"layer_statistics": {
|
||
"per_channel": false,
|
||
"mode_on":["float", "520", "520-wqbi", "720", "720-wqbi"],
|
||
"do_difference_matrix": false,
|
||
"do_fix": true,
|
||
"do_float": false,
|
||
"params": false,
|
||
"no_plot": false,
|
||
"weight_stats": false,
|
||
"tile_analysis": false
|
||
},
|
||
"knerex": {
|
||
// Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model)
|
||
// or "outlier" internally
|
||
"data_analysis_pct": 0.999,
|
||
// whether to calculate additional data path analysis results w.r.p. to additional pcts
|
||
"need_additional_data_analysis_pct": 0,
|
||
// calculate additional data path analysis results w.r.p. to these pct values
|
||
"additional_data_analysis_pcts": [0.999999],
|
||
// whether dynamic range gen uses additional data path analysis results based on bitwidth
|
||
// default: 8bit use default pct, 15/16bit use additional pct
|
||
"dynamic_range_based_on_bitwidth": 0,
|
||
// How many threads used for datapath analysis.
|
||
"data_analysis_threads": 8,
|
||
"openblas_num_threads": 1,
|
||
"bn_weight_pct": 1,
|
||
"conv_weight_pct": 1,
|
||
// use the compiler frontend given basic_info.json
|
||
"use_compiler_cpu_node": true,
|
||
"verbose": 1,
|
||
"skip_qat_json": false, // dont use the input/qat.json
|
||
// sequential bias adjust: "00010001111"
|
||
// parallel bias adjust: "10010001011"
|
||
// no bias adjust: "00010001001"
|
||
"dump_level": 13,
|
||
// same scale for all channels in one layer. (default: 0)
|
||
"same_scale": 0,
|
||
// ---------------------------------------
|
||
// below for are for per channel control
|
||
"per_channel_radix": 1,
|
||
// 控制model output和cpu node的scale给1.0
|
||
// "0", default, no seting.
|
||
// "co" for cpu + model output
|
||
// "all" for all layer
|
||
"fixed_scale_mode": "0",
|
||
// FORCE_OUTPUT_LAYER_SAME_RADIX_KEY
|
||
// 可以控制model output是否per layer radix。参数为bool:0 per channel radix;1 per layer radix
|
||
// will convert to outputradix in template.// remove the _
|
||
"output_radix": 1,
|
||
// force output node with same scale for all channels (default: 1)
|
||
"output_scale": 1,
|
||
// FORCE_CPU_LAYER_SAME_RADIX_KEY
|
||
// 可以控制cpu node是否per layer radix。参数为bool:
|
||
// 0 per channel radix;1 per layer radix
|
||
"cpu_radix": 1,
|
||
"cpu_scale": 1,
|
||
// ---------------------------------------
|
||
// hack parameter. 1.0 force some datapath not fully scaled.
|
||
// NOTE: use 1.0 if no bias adjust turn on.
|
||
"max_scale": 0.984375,
|
||
// percentile of data for analsysi in mmse range estimator.
|
||
// the larger, the more memory in dp analysis will use
|
||
// and the larger size of the analysis_datapath.bin
|
||
"percentile": 0,
|
||
// for mmse range esitmator.
|
||
// The error of clamped outliers will apply this factor (default: 1.0.)
|
||
"outlier_factor": 1.0,
|
||
// Max of text files to run from simulator_input. (default: 10000 to run all)
|
||
"num_input_samples": 10000,
|
||
// set up timeout for knerex. default is 2 hours.
|
||
"timeout": 7200,
|
||
// choose from mmse / percentage
|
||
"datapath_range_method": "percentage",
|
||
// DELETED: bitwidth_mode since 0.21.1
|
||
// two new bw mode since 0.21.1
|
||
"datapath_bitwidth_mode": "int8",
|
||
"model_in_bitwidth_mode": "int8",
|
||
"model_out_bitwidth_mode": "int8",
|
||
"weight_bitwidth_mode": "int8",
|
||
"cpu_bitwidth_mode": "int8",
|
||
"datapath_mix_percentile": "0",
|
||
"weight_mix_percentile": "0",
|
||
"lut_high_accuracy_mode": "2",
|
||
// "0": no dummy bn remove, "1": remove dummy bn, "2": remove dummy bn after exp&log
|
||
"dummy_bn_remove_mode": "1"
|
||
},
|
||
"csim": {
|
||
// 18000, set to 3 for rtl-release
|
||
"timeout": 7200,
|
||
"pld_timeout": 43200,
|
||
"dump_core_opt": 0
|
||
},
|
||
"compiler_piano": {
|
||
"weight_compress": false,
|
||
"max_onnx_MB": 3000,
|
||
// "input_fmt": "1W", // dont use. for debug!
|
||
// "output_fmt": "1W", // dont use. for debug!
|
||
"timeout": 7200, // usually compiler is fast. unless deep_search
|
||
"no_dummy_bn": false, // for knerex need for qat
|
||
"node_schedule_mode": "default", // choose from "default" or "deep_search"
|
||
"ip_evaluator": true, // this ip_evaluator is for normal compiler after knerex.
|
||
"ip_evaluator_debug": "", // choose from "", "stage_1"
|
||
"model_format": "bie", // bie (prefered) or onnx (for debug)
|
||
"model_optimize": "scaled", // scaled, wqbi, hwbi, hwbi-mse
|
||
"limit_input_formats": false, // if want to limit input to have only 1 format.
|
||
"do_loop_for_batch": false, // special compiler test.
|
||
"convert_enc" : false // whether generate enc based on optimized mode onnx, if model_optimize is scaled, wq mode need to be on
|
||
},
|
||
"nef" : {
|
||
// http://tracking.kneron.com/dongle-service-monitor
|
||
"dongle_server" : "SYS_730", // internal server nickname, DEV by default, DEV-INTERNAL for test purpose
|
||
"inference_count" : 3, // number of times to inference after loading model into dongle
|
||
"npu_timeout" : 10 // npu time limit for per model, adjust it only when model is too big, only works for 730
|
||
},
|
||
"regression": {
|
||
// CRITIAL / ERROR / WARNING / INFO / DEBUG
|
||
// CRITICAL will only print very critical information
|
||
// DEBUG will print almost everything
|
||
"logging_level": "ERROR",
|
||
// print the knerex/dynasty commands so that you can copy and run directly.
|
||
"print_command": false,
|
||
// print detailed error info. Especially useful when you debug python code.
|
||
"print_error": false,
|
||
// include timestamps in final report to monitor how long does each part takes
|
||
// time unit can also be specified, second / minute / hour
|
||
"generate_time_stamps": false,
|
||
// big_model / single_layer / multi_layer / multi_core / cpu_nodes
|
||
"model_type": "big_model",
|
||
// docker mays run as --privileged if work_in_memory
|
||
"work_in_memory": false,
|
||
"app_release": false,
|
||
// HACK: pld will be different for ng
|
||
"ng": false,
|
||
"qat": false,
|
||
"skip_success": false,
|
||
"only_success": false,
|
||
"time_stamps_unit": "second"
|
||
}
|
||
}
|