"""Generate scripts to run regression based on regression_config.yaml Usage: gen_regression_json.py tc --output= gen_regression_json.py (-h | --help) gen_regression_json.py --version Options: -o --output= Specify output folder -h --help Show this screen. --version Show version. """ from docopt import docopt import pathlib import os import commentjson as json from copy import deepcopy from sys_flow.flow_constants import MODE_HW_LIMIT, P_BIN_CACHE, MODE_HARDWARE DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False import snoop snoop.install(enabled=DEBUG) ##################################################################### # constant ##################################################################### p_script = pathlib.Path(__file__).resolve().parent ##################################################################### # code ##################################################################### def get_module_run(mode): """ mode 0: ip evaluator only, mode 1: knerex + compiler mode 2: ip eval + knerex + compiler + btm + snr (1 image, dump 0) mode 3: ip eval + knerex + compiler + btm + snr (all image, dump 2) """ if mode == 0: module_run = {"only_ip_evaluator": True} elif mode == 1: module_run = { "piano_knerex": True, "compiler_piano": True } elif mode in [2, 3]: module_run = { "piano_knerex": True, "piano_dynasty": True, "snr_calculation": True, "compiler_piano": True, "csim": True } elif mode == 51: ### this mode is for internal dongle test only, will call internal dongle server module_run = { "piano_knerex": True, "piano_dynasty": True, "snr_calculation": True, "compiler_piano": True, "csim": True, "run_nef_kneron_plus": True } else: raise NotImplementedError return module_run def get_mode_run(platform, optimize): """Determine which mode (of dynasty) to run.""" # NOTE: only run 1 platform for now. platform must be integer. not list assert platform in [520, 530, 540, 630, 720, 730] # see config_default.json for more optimization assert optimize in ["scaled", "wqbi-p", "wqbi-s"] """ knerex dump level bit 0, scaled.bie `|1` bit 1: quan.bie `|2` bit 2: wqbi.bie `|4` bit 3: onnx+json `|8` bit 4: json only. `|16` bit 5: hwbi.bie. `|32` bit 6: hwbi-mse.bie. `|64` bit 7: decomposed.bie. `|128` bit 8: thread_auto_adjusted. `|256` bit 9: less_DRAM_mode. `|512` bit 10: wqbip.bie parallel mode. `|1024`. """ if optimize == "scaled": knerex_dump_level = "00010001001" mode_on = { f"{platform}": True, } model_optimize = optimize elif optimize == "wqbi-p": mode_on = { f"{platform}-wqbi": True, } model_optimize = "wqbi" knerex_dump_level = "10010001011" elif optimize == "wqbi-s": mode_on = { f"{platform}-wqbi": True, } model_optimize = "wqbi" knerex_dump_level = "00010001001" else: raise NotImplementedError # need to turn on reference signal # NEW from 0.24.0 mode_on[f"{platform}graphopt"] = True # mode_on["float"] = True return mode_on, knerex_dump_level, model_optimize def get_dynasty_conf(mode): """Determine how many sample input pairs to use.""" if mode == 3: do_dump = 2 num_input_samples = 1000 else: do_dump = 0 num_input_samples = 1 return do_dump, num_input_samples def check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, hw_mode): """Verify given bitwidth modes, and possible expand for weight mix interleave. NOTE: this is only a quick check. Knerex will run detailed check. NOTE: all regression check on bw should be here. easier to maintain. """ # NOTE: airtable must be same as this codes. # NOTE: weight code "mix_interleave_8" is "mix interleave" + 8bit weight_mix_percentile # weight code "mix_interleave_16" is "mix interleave" + 16bit weight_mix_percentile # "mix interleave" is for DEBUG only. # mix light: Set all weight bitwidth as int16, except Gemm, Convolution # mix balance: Set all weight bitwidth as int16, except Non-Depthwise Convolution valid_wghts = ["int8", "int16", "int4", "mix light", "mix balance", "mix interleave", "mix_interleave_8", "mix_interleave_16"] # mix light: Set all datapath bitwidth as int16, except Gemm, Convolution, and MatMul # mix balance: Set all datapath bitwidth as int16, except MatMul # "all int8" / "mix interleave" is for DEBUG only. valid_data = ["int8", "int16", "mix light", "mix balance", "all int8", "mix interleave"] valid_bw_io = ["int8", "int16"] d_bw = { "cpu node bitwidth": (cpu_bw, valid_bw_io), "input bitwidth": (in_bw, valid_bw_io), "output bitwidth": (out_bw, valid_bw_io), "datapath bitwidth": (dp_bw, valid_data), "weight bitwidth": (wt_bw, valid_wghts) } for name, (bw, valid_bw) in d_bw.items(): if bw not in valid_bw: msg = f"Invalid {name}: {bw}. Should be in {valid_bw} ." raise ValueError(msg) # more special settings if hw_mode in [520, 720]: if wt_bw == "int4": msg = f"platform {hw_mode} does NOT support weight 4bit." raise ValueError(msg) if hw_mode in [520]: for k, (bw, valid_bw) in d_bw.items(): if bw not in ["int8"]: msg = f"Invalid {k}: {bw}. 520 only support 8bit." raise ValueError(msg) d_knerex = {} d_knerex["datapath_bitwidth_mode"] = dp_bw d_knerex["model_in_bitwidth_mode"] = in_bw d_knerex["model_out_bitwidth_mode"] = out_bw d_knerex["cpu_bitwidth_mode"] = cpu_bw d_knerex["weight_bitwidth_mode"] = wt_bw d_knerex["weight_mix_percentile"] = 0 if wt_bw.startswith("mix_interleave_"): # mix_interleave is for debug only if wt_bw.endswith("_8"): d_knerex["weight_bitwidth_mode"] = "mix interleave" d_knerex["weight_mix_percentile"] = "int8" elif wt_bw.endswith("_16"): d_knerex["weight_bitwidth_mode"] = "mix interleave" d_knerex["weight_mix_percentile"] = "int16" else: raise NotImplementedError(f"wrong weight bw: {wt_bw}") return d_knerex def generate_conf(template, platform, # is number 520/720/... optimize="wqbi-p", mode=2, # choose from 0/1/2/3. See document for details limit_input_formats=False, dp_bw="int8", wt_bw="int8", in_bw="int8", out_bw="int8", cpu_bw="int8", # from 0.24.0 datapath_range_method="percentage", data_analysis_pct=0.999, data_analysis_16b_pct=0.999999, data_analysis_threads=8, percentile=0.001, outlier_factor=1.0, unlock_size_limit=False, fm_cut="default", # "default" / "deep_search" ): """Generate config for toolchain/gen_fx_model.""" module_run = get_module_run(mode) do_dump, num_input_samples = get_dynasty_conf(mode) mode_on, knerex_dump_level, model_optimize = get_mode_run(platform, optimize) tag = f"mode{mode}_{platform}_{optimize}" # TODO: make weight_compress default by platform # weight_compress = platform in MODE_HW_LIMIT["weight_compress"] weight_compress = False d_bw = check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, platform) j = deepcopy(template) j["tag"] = pp(tag) j["module_run"] = module_run j["mode_run"] = mode_on j["dynasty"]["do_dump"] = do_dump j["dynasty"]["num_input_samples"] = num_input_samples j["knerex"]["dump_level"] = knerex_dump_level j["knerex"].update(d_bw) j["knerex"]["percentile"] = percentile j["knerex"]["data_analysis_pct"] = data_analysis_pct j["knerex"]["need_additional_data_analysis_pct"] = 1 j["knerex"]["additional_data_analysis_pcts"] = [data_analysis_16b_pct] j["knerex"]["data_analysis_threads"] = data_analysis_threads j["knerex"]["datapath_range_method"] = datapath_range_method j["knerex"]["outlier_factor"] = outlier_factor j["compiler_piano"]["model_optimize"] = model_optimize j["compiler_piano"]["node_schedule_mode"] = fm_cut j["compiler_piano"]["weight_compress"] = weight_compress j["compiler_piano"]["limit_input_formats"] = limit_input_formats if unlock_size_limit: j["compiler_piano"]["max_onnx_MB"] = 100000 # no onnx larger than 100G? if DEBUG: j["path"]["internal"] = True # if os.environ.get("USE_PREBUILD", False): # j["path"]["use_toolchain"] = False return j, tag def generate_conf_batch(template, p_out): """Automatically generate json for needed situations.""" for mode in [0, 1, 2, 3]: for platform in [520, 530, 540, 630, 720, 730]: for optimize in ["scaled", "wqbi-p", "wqbi-s"]: """ wqbi-p for wqbi in parallel. faster, less improvement. wqbi-s for wqbi in sequential. slower, need more memory. more accurate. """ j, tag = generate_conf(template, platform=platform, optimize=optimize, mode=mode) fn_j = p_out / f"{tag}.json" with open(fn_j, "w") as f: json.dump(j, f, indent=4, sort_keys=True) if __name__ == "__main__": arguments = docopt(__doc__, version="gen_configs 1.0") if arguments["tc"]: p_template = p_script / "template" / "regerssion_tc.json" with open(p_template, "r") as f: template = json.load(f) p_out = arguments["--output"] p_out.mkdir(parents=True, exist_ok=True) generate_conf(template, p_out)