2026-01-28 06:16:04 +00:00

347 lines
13 KiB
JSON
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

{
// tag is used in report names for quick recognition.
"tag": "USE_YOUR_OWN",
"comments": "",
"path": {
// where to save report generated
"report": "USE_YOUR_OWN",
// the base directory for all test cases.
// need to be in two levels, example: v011/00001_conv
// big model cases MUST have "big_model" in the full path
// multi layer cases MUST have "multi_layer" in the full path
"cases": "USE_YOUR_OWN",
// use "search" to filter only cases you want to run.
// use [] empty to run all
// use ["FDR"] to run all cases with "FDR" in the full path
// use ["FDR", "onet"] to run all cases with "FDR" AND "onet" in the full path.
"search": [],
// called inside kneron or by customer?
"internal": false,
// workflow is where regression to look for. TODELETE?
"workflow": "USE_YOUR_OWN"
},
"module_run": {
// only run ip_evaluator, without knerex
"only_ip_evaluator": false,
//
"validate_onnx": false,
"piano_knerex": false,
"piano_dynasty": false,
// for debug. check model sensitive to noise
"piano_dynasty_noise": false,
"compiler_piano": false,
"csim": false,
// TODO: should use csim_ci only. turn on each sub-module in regression.
"csim_ci": false,
// TODELETE
"gen_nef": false,
"run_nef_kneron_plus": false,
//
"tflite": false,
"onnxruntime": false,
//
"filter_cpu_cases": false,
"release_model_prepare": false,
// snr_calculation will be turned on automcatically if necessary
"snr_calculation": false,
"verify_decomp_snr": false,
"calculate_layer_statistics": false,
"knerex_modes": false,
"rtl_cmd_check": false
},
"mode_run": {
// each key must have definition in flow_constats/DYNASTY_MODE_SETTINGS
"float": false,
// 520
"520": false,
"520decomp": false,
"520graphopt": false,
"520scl": false,
"520wq": false,
"520dq": false,
"520dc": false,
"520wqdq": false,
"520wqdc": false,
// 720
"720": false,
"720decomp": false,
"720graphopt": false,
"720scl": false,
"720wq": false,
"720dq": false,
"720dc": false,
"720wqdq": false,
"720wqdc": false,
// 530
"530": false,
"530decomp": false,
"530graphopt": false,
"530scl": false,
"530wq": false,
"530dq": false,
"530dc": false,
"530wqdq": false,
"530wqdc": false,
// 540
"540": false,
"540decomp": false,
"540graphopt": false,
"540scl": false,
"540wq": false,
"540dq": false,
"540dc": false,
"540wqdq": false,
"540wqdc": false,
// 630
"630": false,
"630decomp": false,
"630graphopt": false,
"630scl": false,
"630wq": false,
"630dq": false,
"630dc": false,
"630wqdq": false,
"630wqdc": false,
// 730
"730": false,
"730decomp": false,
"730graphopt": false,
"730scl": false,
"730wq": false,
"730dq": false,
"730dc": false,
"730wqdq": false,
"730wqdc": false,
// for bias adjust, need to turn on [knerex][dump_level] = 7
// bias_adjust 520, using wqbi onnx
"520wq-wqbi": false,
"520wqdq-wqbi": false,
"520wqdc-wqbi": false,
"520-wqbi": false,
// bias_adjust 520, using 520bi onnx
"520wq-hwbi": false,
"520wqdq-hwbi": false,
"520wqdc-hwbi": false,
"520-hwbi": false,
// bias_adjust 720, using wqbi onnx
"720wq-wqbi": false,
"720wqdq-wqbi": false,
"720wqdc-wqbi": false,
"720-wqbi": false,
// bias_adjust 720, using 720bi onnx
"720wq-hwbi": false,
"720wqdq-hwbi": false,
"720wqdc-hwbi": false,
"720-hwbi": false,
// bias_adjust 530, using wqbi onnx
"530wq-wqbi": false,
"530wqdq-wqbi": false,
"530wqdc-wqbi": false,
"530-wqbi": false,
// bias_adjust 530, using 530bi onnx
"530wq-hwbi": false,
"530wqdq-hwbi": false,
"530-hwbi": false,
//
"520wq-hwbi-mse": false,
"520wqdq-hwbi-mse": false,
"520wqdc-hwbi-mse": false,
"520-hwbi-mse": false,
//
"720wq-hwbi-mse": false,
"720wqdq-hwbi-mse": false,
"720wqdc-hwbi-mse": false,
"720-hwbi-mse": false,
//
"530wq-hwbi-mse": false,
"530wqdq-hwbi-mse": false,
"530wqdc-hwbi-mse": false,
"530-hwbi-mse": false
},
"pre_clean_up": {
"knerex_analysis": false,
"knerex_output": false,
"compiler_output": false,
"dynasty_output": false,
"all_output": false
},
"post_clean_up": {
"clean_when_success": false,
"dynasty_output": false,
"knerex_output": false, // TODO
"csim_output": false
},
"dynasty": {
// 0: only dump model specified output
// 1: model specifed output + CPU node input/output
// 2: dump every layer
"do_dump": 0,
// choose input files in regression.
// all or default
// "all" -> all files in ${DIR_INPUT_simulator}
// example: big model snr analsysis
// "default" -> 1 file, ${DIR_INPUT_simulator}/test_input.txt
// example: single test case for regression
"regression_input": "default",
// max num of inputs to run if above set to "all". to save some time.
// must be a number. use 10000 or bigger number if want to use all images.
"num_input_samples": 10000,
// randomly pick inputs for inference.
// if not specify or leave as null, will pick by the order of
"sample_seed": null,
// for one model, the max number of inputs do dynasty inference at same time
"n_parallel_input": 4,
// max number of models run in parallel
"n_parallel_model": 10,
// input shape could be: onnx_shape or channel_last
"input_shape": "onnx_shape",
// Enable or disable CUDA
"cuda": false,
// round_mode: 0 is round to inf (default for other platform. setting to 0/1 has NO effects)
// : 1 is round to round2even (default for 540/730)
"round_mode": 1,
// HACK: create more datasets from simulator_input with noise added.
"noise_sigma": [
3,
6
],
"piano_dynasty": {
// which onnx to use: "renaissance_onnx", "piano_onnx", "piano_bie"
"onnx_source": "piano_bie",
"model_format": "bie" // pick from MODEL_FORMAT
}
},
"snr": {
"report_snr_col": [
"SNR"
], // ["SNR"], ["SNR_With_Mean"], ["SNR", "SNR_With_Mean"]
"per_channel": false,
"n_thread": 4,
"plot_snr_per_channel": false
},
"layer_statistics": {
"per_channel": false,
"mode_on":["float", "520", "520-wqbi", "720", "720-wqbi"],
"do_difference_matrix": false,
"do_fix": true,
"do_float": false,
"params": false,
"no_plot": false,
"weight_stats": false,
"tile_analysis": false
},
"knerex": {
// Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model)
// or "outlier" internally
"data_analysis_pct": 0.999,
// whether to calculate additional data path analysis results w.r.p. to additional pcts
"need_additional_data_analysis_pct": 0,
// calculate additional data path analysis results w.r.p. to these pct values
"additional_data_analysis_pcts": [0.999999],
// whether dynamic range gen uses additional data path analysis results based on bitwidth
// default: 8bit use default pct, 15/16bit use additional pct
"dynamic_range_based_on_bitwidth": 0,
// How many threads used for datapath analysis.
"data_analysis_threads": 8,
"openblas_num_threads": 1,
"bn_weight_pct": 1,
"conv_weight_pct": 1,
// use the compiler frontend given basic_info.json
"use_compiler_cpu_node": true,
"verbose": 1,
// sequential bias adjust: "00010001111"
// parallel bias adjust: "10010001011"
// no bias adjust: "00010001001"
"dump_level": 13,
// same scale for all channels in one layer. (default: 0)
"same_scale": 0,
// ---------------------------------------
// below for are for per channel control
"per_channel_radix": 0,
// 控制model output和cpu node的scale给1.0
"fixed_scale_mode": "0",
// FORCE_OUTPUT_LAYER_SAME_RADIX_KEY
// 可以控制model output是否per layer radix。参数为bool0 per channel radix1 per layer radix
// will convert to outputradix in template.// remove the _
"output_radix": 1,
// force output node with same scale for all channels (default: 1)
"output_scale": 1,
// FORCE_CPU_LAYER_SAME_RADIX_KEY
// 可以控制cpu node是否per layer radix。参数为bool
// 0 per channel radix1 per layer radix
"cpu_radix": 1,
"cpu_scale": 1,
// ---------------------------------------
// hack parameter. 1.0 force some datapath not fully scaled.
// NOTE: use 1.0 if no bias adjust turn on.
"max_scale": 0.984375,
// percentile of data for analsysi in mmse range estimator.
// the larger, the more memory in dp analysis will use
// and the larger size of the analysis_datapath.bin
"percentile": 0,
// for mmse range esitmator.
// The error of clamped outliers will apply this factor (default: 1.0.)
"outlier_factor": 1.0,
// Max of text files to run from simulator_input. (default: 10000 to run all)
"num_input_samples": 10000,
// set up timeout for knerex. default is 4 hours.
"timeout": 14400,
// choose from mmse / percentage
"datapath_range_method": "percentage",
// DELETED: bitwidth_mode since 0.21.1
// two new bw mode since 0.21.1
"datapath_bitwidth_mode": "int8",
"model_in_bitwidth_mode": "int8",
"model_out_bitwidth_mode": "int8",
"weight_bitwidth_mode": "int8",
"cpu_bitwidth_mode": "int8",
"datapath_mix_percentile": "0",
"weight_mix_percentile": "0"
},
"csim": {
// 18000, set to 3 for rtl-release
"dump_core_opt": 0
},
"compiler_piano": {
"weight_compress": false,
"max_onnx_MB": 1000,
"no_dummy_bn": false, // for knerex need for qat
"node_schedule_mode": "default", // choose from "default" or "deep_search"
"ip_evaluator": true, // this ip_evaluator is for normal compiler after knerex.
"timeout": 14400,
"ip_evaluator_debug": "", // choose from "", "stage_1"
"model_format": "bie", // bie (prefered) or onnx (for debug)
"model_optimize": "scaled", // scaled, wqbi, hwbi, hwbi-mse
"limit_input_formats": false, // if want to limit input to have only 1 format.
"convert_enc" : false // whether generate enc based on optimized mode onnx, if model_optimize is scaled, wq mode need to be on
},
"nef" : {
// http://10.200.210.170:8080/dongle-service-monitor
"dongle_server" : "DEV", // internal server nickname, DEV by default, DEV-INTERNAL for test purpose
"inference_count" : 3 // number of times to inference after loading model into dongle
},
"regression": {
// CRITIAL / ERROR / WARNING / INFO / DEBUG
// CRITICAL will only print very critical information
// DEBUG will print almost everything
"logging_level": "ERROR",
// print the knerex/dynasty commands so that you can copy and run directly.
"print_command": false,
// print detailed error info. Especially useful when you debug python code.
"print_error": false,
// include timestamps in final report to monitor how long does each part takes
// time unit can also be specified, second / minute / hour
"generate_time_stamps": false,
// big_model / single_layer / multi_layer / multi_core / cpu_nodes
"model_type": "big_model",
// docker mays run as --privileged if work_in_memory
"work_in_memory": false,
"app_release": false,
// HACK: pld will be different for ng
"ng": false,
"qat": false,
"skip_success": false,
"time_stamps_unit": "second"
}
}