{ // tag is used in report names for quick recognition. "tag": "USE_YOUR_OWN", "comments": "", "path": { // where to save report generated "report": "USE_YOUR_OWN", // the base directory for all test cases. // need to be in two levels, example: v011/00001_conv // big model cases MUST have "big_model" in the full path // multi layer cases MUST have "multi_layer" in the full path "cases": "USE_YOUR_OWN", // use "search" to filter only cases you want to run. // use [] empty to run all // use ["FDR"] to run all cases with "FDR" in the full path // use ["FDR", "onet"] to run all cases with "FDR" AND "onet" in the full path. "search": [], // called inside kneron or by customer? "internal": false, // workflow is where regression to look for. TODELETE? "workflow": "USE_YOUR_OWN" }, "module_run": { // only run ip_evaluator, without knerex "only_ip_evaluator": false, // "validate_onnx": false, "compiler_frontend": false, "piano_knerex": false, "piano_dynasty": false, // for debug. check model sensitive to noise "piano_dynasty_noise": false, "compiler_piano": false, "csim": false, // TODO: should use csim_ci only. turn on each sub-module in regression. "csim_ci": false, // TODELETE "gen_nef": false, "run_nef_kneron_plus": false, "only_dongle": false, // "tflite": false, "onnxruntime": false, // "filter_cpu_cases": false, "release_model_prepare": false, // snr_calculation will be turned on automcatically if necessary "snr_calculation": false, "verify_decomp_snr": false, "calculate_layer_statistics": false, "knerex_modes": false, "rtl_cmd_check": false }, "mode_run": { // each key must have definition in flow_constats/DYNASTY_MODE_SETTINGS "float": false, // 730 "730": false, "730graphopt": false, "730scl": false, "730wq": false, "730dq": false, "730dc": false, "730wqdq": false, "730wqdc": false, // 1140 "1140": false, "1140graphopt": false, "1140scl": false, "1140wq": false, "1140dq": false, "1140dc": false, "1140wqdq": false, "1140wqdc": false }, "pre_clean_up": { "knerex_analysis": false, "knerex_output": false, "compiler_output": false, "dynasty_output": false, "all_output": false }, "post_clean_up": { "all_output": false, "dynasty_output": false, "knerex_output": false, // TODO "csim_output": false }, "dynasty": { // 0: only dump model specified output // 1: model specifed output + CPU node input/output // 2: dump every layer "do_dump": 0, // choose input files in regression. // all or default // "all" -> all files in ${DIR_INPUT_simulator} // example: big model snr analsysis // "default" -> 1 file, ${DIR_INPUT_simulator}/test_input.txt // example: single test case for regression "regression_input": "default", // max num of inputs to run if above set to "all". to save some time. // must be a number. use 10000 or bigger number if want to use all images. "num_input_samples": 10000, // randomly pick inputs for inference. // if not specify or leave as null, will pick by the order of "timeout": 7200, "sample_seed": null, // for one model, the max number of inputs do dynasty inference at same time "n_parallel_input": 4, // max number of models run in parallel "n_parallel_model": 10, // input shape could be: onnx_shape or channel_last "input_shape": "onnx_shape", // Enable or disable CUDA "cuda": false, // round_mode: 0 is round to inf (default for other platform. setting to 0/1 has NO effects) // : 1 is round to round2even (default for 540/730) "round_mode": 1, // HACK: create more datasets from simulator_input with noise added. "noise_sigma": [ 3, 6 ], "piano_dynasty": { // which onnx to use: "renaissance_onnx", "piano_onnx", "piano_bie" "onnx_source": "piano_bie", "model_format": "bie" // pick from MODEL_FORMAT } }, "snr": { "report_snr_col": [ "SNR" ], // ["SNR"], ["SNR_With_Mean"], ["SNR", "SNR_With_Mean"] "per_channel": false, "n_thread": 4, "plot_snr_per_channel": false }, "layer_statistics": { "per_channel": false, "mode_on":["float", "520", "520-wqbi", "720", "720-wqbi"], "do_difference_matrix": false, "do_fix": true, "do_float": false, "params": false, "no_plot": false, "weight_stats": false, "tile_analysis": false }, "knerex": { // Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model) // or "outlier" internally "data_analysis_pct": 0.999, // whether to calculate additional data path analysis results w.r.p. to additional pcts "need_additional_data_analysis_pct": 0, // calculate additional data path analysis results w.r.p. to these pct values "additional_data_analysis_pcts": [0.999999], // whether dynamic range gen uses additional data path analysis results based on bitwidth // default: 8bit use default pct, 15/16bit use additional pct "dynamic_range_based_on_bitwidth": 0, // How many threads used for datapath analysis. "data_analysis_threads": 8, "openblas_num_threads": 1, "bn_weight_pct": 1, "conv_weight_pct": 1, // use the compiler frontend given basic_info.json "use_compiler_cpu_node": true, "verbose": 1, "skip_qat_json": false, // dont use the input/qat.json // sequential bias adjust: "00010001111" // parallel bias adjust: "10010001011" // no bias adjust: "00010001001" "dump_level": 13, // same scale for all channels in one layer. (default: 0) "same_scale": 0, // --------------------------------------- // below for are for per channel control "per_channel_radix": 1, // 控制model output和cpu node的scale给1.0 // "0", default, no seting. // "co" for cpu + model output // "all" for all layer "fixed_scale_mode": "0", // FORCE_OUTPUT_LAYER_SAME_RADIX_KEY // 可以控制model output是否per layer radix。参数为bool:0 per channel radix;1 per layer radix // will convert to outputradix in template.// remove the _ "output_radix": 1, // force output node with same scale for all channels (default: 1) "output_scale": 1, // FORCE_CPU_LAYER_SAME_RADIX_KEY // 可以控制cpu node是否per layer radix。参数为bool: // 0 per channel radix;1 per layer radix "cpu_radix": 1, "cpu_scale": 1, // --------------------------------------- // hack parameter. 1.0 force some datapath not fully scaled. // NOTE: use 1.0 if no bias adjust turn on. "max_scale": 0.984375, // percentile of data for analsysi in mmse range estimator. // the larger, the more memory in dp analysis will use // and the larger size of the analysis_datapath.bin "percentile": 0, // for mmse range esitmator. // The error of clamped outliers will apply this factor (default: 1.0.) "outlier_factor": 1.0, // Max of text files to run from simulator_input. (default: 10000 to run all) "num_input_samples": 10000, // set up timeout for knerex. default is 2 hours. "timeout": 7200, // choose from mmse / percentage "datapath_range_method": "percentage", // DELETED: bitwidth_mode since 0.21.1 // two new bw mode since 0.21.1 "datapath_bitwidth_mode": "int8", "model_in_bitwidth_mode": "int8", "model_out_bitwidth_mode": "int8", "weight_bitwidth_mode": "int8", "cpu_bitwidth_mode": "int8", "datapath_mix_percentile": "0", "weight_mix_percentile": "0", "lut_high_accuracy_mode": "2", // "0": no dummy bn remove, "1": remove dummy bn, "2": remove dummy bn after exp&log "dummy_bn_remove_mode": "1" }, "csim": { // 18000, set to 3 for rtl-release "timeout": 7200, "pld_timeout": 43200, "dump_core_opt": 0 }, "compiler_piano": { "weight_compress": false, "max_onnx_MB": 3000, // "input_fmt": "1W", // dont use. for debug! // "output_fmt": "1W", // dont use. for debug! "timeout": 7200, // usually compiler is fast. unless deep_search "no_dummy_bn": false, // for knerex need for qat "node_schedule_mode": "default", // choose from "default" or "deep_search" "ip_evaluator": true, // this ip_evaluator is for normal compiler after knerex. "ip_evaluator_debug": "", // choose from "", "stage_1" "model_format": "bie", // bie (prefered) or onnx (for debug) "model_optimize": "scaled", // scaled, wqbi, hwbi, hwbi-mse "limit_input_formats": false, // if want to limit input to have only 1 format. "do_loop_for_batch": false, // special compiler test. "convert_enc" : false // whether generate enc based on optimized mode onnx, if model_optimize is scaled, wq mode need to be on }, "nef" : { // http://tracking.kneron.com/dongle-service-monitor "dongle_server" : "SYS_730", // internal server nickname, DEV by default, DEV-INTERNAL for test purpose "inference_count" : 3, // number of times to inference after loading model into dongle "npu_timeout" : 10 // npu time limit for per model, adjust it only when model is too big, only works for 730 }, "regression": { // CRITIAL / ERROR / WARNING / INFO / DEBUG // CRITICAL will only print very critical information // DEBUG will print almost everything "logging_level": "ERROR", // print the knerex/dynasty commands so that you can copy and run directly. "print_command": false, // print detailed error info. Especially useful when you debug python code. "print_error": false, // include timestamps in final report to monitor how long does each part takes // time unit can also be specified, second / minute / hour "generate_time_stamps": false, // big_model / single_layer / multi_layer / multi_core / cpu_nodes "model_type": "big_model", // docker mays run as --privileged if work_in_memory "work_in_memory": false, "app_release": false, // HACK: pld will be different for ng "ng": false, "qat": false, "skip_success": false, "only_success": false, "time_stamps_unit": "second" } }