kneron_model_converter/vendor/sys_flow_v2/config_default.json

{
    // tag is used in report names for quick recognition.
    "tag": "USE_YOUR_OWN",
    "comments": "",
    "path": {
        // where to save report generated
        "report": "USE_YOUR_OWN",
        // the base directory for all test cases.
        // need to be in two levels, example: v011/00001_conv
        // big model cases MUST have "big_model" in the full path
        // multi layer cases MUST have "multi_layer" in the full path
        "cases": "USE_YOUR_OWN",
        // use "search" to filter only cases you want to run.
        // use [] empty to run all
        // use ["FDR"] to run all cases with "FDR" in the full path
        // use ["FDR", "onet"] to run all cases with "FDR" AND "onet" in the full path.
        "search": [],
        // called inside kneron or by customer?
        "internal": false,
        // workflow is where regression to look for. TODELETE?
        "workflow": "USE_YOUR_OWN"
    },
    "module_run": {
        // only run ip_evaluator, without knerex
        "only_ip_evaluator": false,
        //
        "validate_onnx": false,
        "compiler_frontend": false,
        "piano_knerex": false,
        "piano_dynasty": false,
        // for debug. check model sensitive to noise
        "piano_dynasty_noise": false,
        "compiler_piano": false,
        "csim": false,
        // TODO: should use csim_ci only. turn on each sub-module in regression.
        "csim_ci": false,
        // TODELETE
        "gen_nef": false,
        "run_nef_kneron_plus": false,
        "only_dongle": false,
        //
        "tflite": false,
        "onnxruntime": false,
        //
        "filter_cpu_cases": false,
        "release_model_prepare": false,
        // snr_calculation will be turned on automcatically if necessary
        "snr_calculation": false,
        "verify_decomp_snr": false,
        "calculate_layer_statistics": false,
        "knerex_modes": false,
        "rtl_cmd_check": false
    },
    "mode_run": {
        // each key must have definition in flow_constats/DYNASTY_MODE_SETTINGS
        "float": false,
        // 730
        "730": false,
        "730graphopt": false,
        "730scl": false,
        "730wq": false,
        "730dq": false,
        "730dc": false,
        "730wqdq": false,
        "730wqdc": false,
        // 1140
        "1140": false,
        "1140graphopt": false,
        "1140scl": false,
        "1140wq": false,
        "1140dq": false,
        "1140dc": false,
        "1140wqdq": false,
        "1140wqdc": false
    },
    "pre_clean_up": {
        "knerex_analysis": false,
        "knerex_output": false,
        "compiler_output": false,
        "dynasty_output": false,
        "all_output": false
    },
    "post_clean_up": {
        "all_output": false,
        "dynasty_output": false,
        "knerex_output": false,  // TODO
        "csim_output": false
    },
    "dynasty": {
        // 0: only dump model specified output
        // 1: model specifed output + CPU node input/output
        // 2: dump every layer
        "do_dump": 0,
        // choose input files in regression.
        // all or default
        // "all" -> all files in ${DIR_INPUT_simulator}
        //      example: big model snr analsysis
        // "default" -> 1 file, ${DIR_INPUT_simulator}/test_input.txt
        //      example: single test case for regression
        "regression_input": "default",
        // max num of inputs to run if above set to "all". to save some time.
        // must be a number. use 10000 or bigger number if want to use all images.
        "num_input_samples": 10000,
        // randomly pick inputs for inference.
        // if not specify or leave as null, will pick by the order of
        "timeout": 7200,
        "sample_seed": null,
        // for one model, the max number of inputs do dynasty inference at same time
        "n_parallel_input": 4,
        // max number of models run in parallel
        "n_parallel_model": 10,
        // input shape could be:  onnx_shape or channel_last
        "input_shape": "onnx_shape",
        // Enable or disable CUDA
        "cuda": false,
        // round_mode: 0 is round to inf (default for other platform. setting to 0/1 has NO effects)
        //           : 1 is round to round2even (default for 540/730)
        "round_mode": 1,
        // HACK: create more datasets from simulator_input with noise added.
        "noise_sigma": [
            3,
            6
        ],
        "piano_dynasty": {
            // which onnx to use: "renaissance_onnx", "piano_onnx", "piano_bie"
            "onnx_source": "piano_bie",
            "model_format": "bie" // pick from MODEL_FORMAT
        }
    },
    "snr": {
        "report_snr_col": [
            "SNR"
        ], // ["SNR"], ["SNR_With_Mean"], ["SNR", "SNR_With_Mean"]
        "per_channel": false,
        "n_thread": 4,
        "plot_snr_per_channel": false
    },
    "layer_statistics": {
        "per_channel": false,
        "mode_on":["float", "520", "520-wqbi", "720", "720-wqbi"],
        "do_difference_matrix": false,
        "do_fix": true,
        "do_float": false,
        "params": false,
        "no_plot": false,
        "weight_stats": false,
        "tile_analysis": false
    },
    "knerex": {
        // Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model)
        // or "outlier" internally
        "data_analysis_pct": 0.999,
        // whether to calculate additional data path analysis results w.r.p. to additional pcts
        "need_additional_data_analysis_pct": 0,
        // calculate additional data path analysis results w.r.p. to these pct values
        "additional_data_analysis_pcts": [0.999999],
        // whether dynamic range gen uses additional data path analysis results based on bitwidth
        // default: 8bit use default pct, 15/16bit use additional pct
        "dynamic_range_based_on_bitwidth": 0,
        // How many threads used for datapath analysis.
        "data_analysis_threads": 8,
        "openblas_num_threads": 1,
        "bn_weight_pct": 1,
        "conv_weight_pct": 1,
        // use the compiler frontend given basic_info.json
        "use_compiler_cpu_node": true,
        "verbose": 1,
        "skip_qat_json": false,  // dont use the input/qat.json
        // sequential bias adjust: "00010001111"
        // parallel   bias adjust: "10010001011"
        // no         bias adjust: "00010001001"
        "dump_level": 13,
        // same scale for all channels in one layer. (default: 0)
        "same_scale": 0,
        // ---------------------------------------
        // below for are for per channel control
        "per_channel_radix": 1,
        // 控制model output和cpu node的scale给1.0
        // "0", default, no seting.
        // "co" for cpu + model output
        // "all" for all layer
        "fixed_scale_mode": "0",
        // FORCE_OUTPUT_LAYER_SAME_RADIX_KEY
        // 可以控制model output是否per layer radix。参数为bool：0 per channel radix；1 per layer radix
        // will convert to outputradix in template.// remove the _
        "output_radix": 1,
        // force output node with same scale for all channels (default: 1)
        "output_scale": 1,
        // FORCE_CPU_LAYER_SAME_RADIX_KEY
        // 可以控制cpu node是否per layer radix。参数为bool：
        // 0 per channel radix；1 per layer radix
        "cpu_radix": 1,
        "cpu_scale": 1,
        // ---------------------------------------
        // hack parameter. 1.0 force some datapath not fully scaled.
        // NOTE: use 1.0 if no bias adjust turn on.
        "max_scale": 0.984375,
        // percentile of data for analsysi in mmse range estimator.
        // the larger, the more memory in dp analysis will use
        // and the larger size of the analysis_datapath.bin
        "percentile": 0,
        // for mmse range esitmator.
        // The error of clamped outliers will apply this factor (default: 1.0.)
        "outlier_factor": 1.0,
        // Max of text files to run from simulator_input. (default: 10000 to run all)
        "num_input_samples": 10000,
        // set up timeout for knerex. default is 2 hours.
        "timeout": 7200,
        // choose from mmse / percentage
        "datapath_range_method": "percentage",
        // DELETED: bitwidth_mode since 0.21.1
        // two new bw mode since 0.21.1
        "datapath_bitwidth_mode": "int8",
        "model_in_bitwidth_mode": "int8",
        "model_out_bitwidth_mode": "int8",
        "weight_bitwidth_mode": "int8",
        "cpu_bitwidth_mode": "int8",
        "datapath_mix_percentile": "0",
        "weight_mix_percentile": "0",
        "lut_high_accuracy_mode": "2",
        // "0": no dummy bn remove, "1": remove dummy bn, "2": remove dummy bn after exp&log
        "dummy_bn_remove_mode": "1"
    },
    "csim": {
        // 18000, set to 3 for rtl-release
        "timeout": 7200,
        "pld_timeout": 43200,
        "dump_core_opt": 0
    },
    "compiler_piano": {
        "weight_compress": false,
        "max_onnx_MB": 3000,
        // "input_fmt": "1W",  // dont use. for debug!
        // "output_fmt": "1W",  // dont use. for debug!
        "timeout": 7200, // usually compiler is fast. unless deep_search
        "no_dummy_bn": false,  // for knerex need for qat
        "node_schedule_mode": "default", // choose from "default" or "deep_search"
        "ip_evaluator": true,     // this ip_evaluator is for normal compiler after knerex.
        "ip_evaluator_debug": "", // choose from "", "stage_1"
        "model_format": "bie", // bie (prefered) or onnx (for debug)
        "model_optimize": "scaled", // scaled, wqbi, hwbi, hwbi-mse
        "limit_input_formats": false, // if want to limit input to have only 1 format.
        "do_loop_for_batch": false, // special compiler test.
        "convert_enc" : false  // whether generate enc based on optimized mode onnx, if model_optimize is scaled, wq mode need to be on
    },
    "nef" : {
        // http://tracking.kneron.com/dongle-service-monitor
        "dongle_server" : "SYS_730", // internal server nickname, DEV by default, DEV-INTERNAL for test purpose
        "inference_count" : 3, // number of times to inference after loading model into dongle
        "npu_timeout" : 10 // npu time limit for per model, adjust it only when model is too big, only works for 730
    },
    "regression": {
        // CRITIAL / ERROR / WARNING / INFO / DEBUG
        // CRITICAL will only print very critical information
        // DEBUG will print almost everything
        "logging_level": "ERROR",
        // print the knerex/dynasty commands so that  you can copy and run directly.
        "print_command": false,
        // print detailed error info. Especially useful when you debug python code.
        "print_error": false,
        // include timestamps in final report to monitor how long does each part takes
        // time unit can also be specified, second / minute / hour
        "generate_time_stamps": false,
        // big_model / single_layer / multi_layer / multi_core / cpu_nodes
        "model_type": "big_model",
        // docker mays run as --privileged if work_in_memory
        "work_in_memory": false,
        "app_release": false,
        // HACK: pld will be different for ng
        "ng": false,
        "qat": false,
        "skip_success": false,
        "only_success": false,
        "time_stamps_unit": "second"
    }
}