kneron_model_converter/vendor/sys_flow/gen_regression_json.py
2026-01-28 06:16:04 +00:00

301 lines
10 KiB
Python

"""Generate scripts to run regression based on regression_config.yaml
Usage:
gen_regression_json.py tc --output=<d_out>
gen_regression_json.py (-h | --help)
gen_regression_json.py --version
Options:
-o --output=<d_out> Specify output folder
-h --help Show this screen.
--version Show version.
"""
from docopt import docopt
import pathlib
import os
import commentjson as json
from copy import deepcopy
from sys_flow.flow_constants import MODE_HW_LIMIT, P_BIN_CACHE, MODE_HARDWARE
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
import snoop
snoop.install(enabled=DEBUG)
#####################################################################
# constant
#####################################################################
p_script = pathlib.Path(__file__).resolve().parent
#####################################################################
# code
#####################################################################
def get_module_run(mode):
"""
mode 0: ip evaluator only,
mode 1: knerex + compiler
mode 2: ip eval + knerex + compiler + btm + snr (1 image, dump 0)
mode 3: ip eval + knerex + compiler + btm + snr (all image, dump 2)
"""
if mode == 0:
module_run = {"only_ip_evaluator": True}
elif mode == 1:
module_run = {
"piano_knerex": True,
"compiler_piano": True
}
elif mode in [2, 3]:
module_run = {
"piano_knerex": True,
"piano_dynasty": True,
"snr_calculation": True,
"compiler_piano": True,
"csim": True
}
elif mode == 51:
### this mode is for internal dongle test only, will call internal dongle server
module_run = {
"piano_knerex": True,
"piano_dynasty": True,
"snr_calculation": True,
"compiler_piano": True,
"csim": True,
"run_nef_kneron_plus": True
}
else:
raise NotImplementedError
return module_run
def get_mode_run(platform, optimize):
"""Determine which mode (of dynasty) to run."""
# NOTE: only run 1 platform for now. platform must be integer. not list
assert platform in [520, 530, 540, 630, 720, 730]
# see config_default.json for more optimization
assert optimize in ["scaled", "wqbi-p", "wqbi-s"]
""" knerex dump level
bit 0, scaled.bie `|1`
bit 1: quan.bie `|2`
bit 2: wqbi.bie `|4`
bit 3: onnx+json `|8`
bit 4: json only. `|16`
bit 5: hwbi.bie. `|32`
bit 6: hwbi-mse.bie. `|64`
bit 7: decomposed.bie. `|128`
bit 8: thread_auto_adjusted. `|256`
bit 9: less_DRAM_mode. `|512`
bit 10: wqbip.bie parallel mode. `|1024`.
"""
if optimize == "scaled":
knerex_dump_level = "00010001001"
mode_on = {
f"{platform}": True,
}
model_optimize = optimize
elif optimize == "wqbi-p":
mode_on = {
f"{platform}-wqbi": True,
}
model_optimize = "wqbi"
knerex_dump_level = "10010001011"
elif optimize == "wqbi-s":
mode_on = {
f"{platform}-wqbi": True,
}
model_optimize = "wqbi"
knerex_dump_level = "00010001001"
else:
raise NotImplementedError
# need to turn on reference signal
# NEW from 0.24.0
mode_on[f"{platform}graphopt"] = True
# mode_on["float"] = True
return mode_on, knerex_dump_level, model_optimize
def get_dynasty_conf(mode):
"""Determine how many sample input pairs to use."""
if mode == 3:
do_dump = 2
num_input_samples = 1000
else:
do_dump = 0
num_input_samples = 1
return do_dump, num_input_samples
def check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, hw_mode):
"""Verify given bitwidth modes, and possible expand for weight mix interleave.
NOTE: this is only a quick check.
Knerex will run detailed check.
NOTE: all regression check on bw should be here.
easier to maintain.
"""
# NOTE: airtable must be same as this codes.
# NOTE: weight code "mix_interleave_8" is "mix interleave" + 8bit weight_mix_percentile
# weight code "mix_interleave_16" is "mix interleave" + 16bit weight_mix_percentile
# "mix interleave" is for DEBUG only.
# mix light: Set all weight bitwidth as int16, except Gemm, Convolution
# mix balance: Set all weight bitwidth as int16, except Non-Depthwise Convolution
valid_wghts = ["int8", "int16", "int4", "mix light", "mix balance", "mix interleave", "mix_interleave_8", "mix_interleave_16"]
# mix light: Set all datapath bitwidth as int16, except Gemm, Convolution, and MatMul
# mix balance: Set all datapath bitwidth as int16, except MatMul
# "all int8" / "mix interleave" is for DEBUG only.
valid_data = ["int8", "int16", "mix light", "mix balance", "all int8", "mix interleave"]
valid_bw_io = ["int8", "int16"]
d_bw = {
"cpu node bitwidth": (cpu_bw, valid_bw_io),
"input bitwidth": (in_bw, valid_bw_io),
"output bitwidth": (out_bw, valid_bw_io),
"datapath bitwidth": (dp_bw, valid_data),
"weight bitwidth": (wt_bw, valid_wghts)
}
for name, (bw, valid_bw) in d_bw.items():
if bw not in valid_bw:
msg = f"Invalid {name}: {bw}. Should be in {valid_bw} ."
raise ValueError(msg)
# more special settings
if hw_mode in [520, 720]:
if wt_bw == "int4":
msg = f"platform {hw_mode} does NOT support weight 4bit."
raise ValueError(msg)
if hw_mode in [520]:
for k, (bw, valid_bw) in d_bw.items():
if bw not in ["int8"]:
msg = f"Invalid {k}: {bw}. 520 only support 8bit."
raise ValueError(msg)
d_knerex = {}
d_knerex["datapath_bitwidth_mode"] = dp_bw
d_knerex["model_in_bitwidth_mode"] = in_bw
d_knerex["model_out_bitwidth_mode"] = out_bw
d_knerex["cpu_bitwidth_mode"] = cpu_bw
d_knerex["weight_bitwidth_mode"] = wt_bw
d_knerex["weight_mix_percentile"] = 0
if wt_bw.startswith("mix_interleave_"):
# mix_interleave is for debug only
if wt_bw.endswith("_8"):
d_knerex["weight_bitwidth_mode"] = "mix interleave"
d_knerex["weight_mix_percentile"] = "int8"
elif wt_bw.endswith("_16"):
d_knerex["weight_bitwidth_mode"] = "mix interleave"
d_knerex["weight_mix_percentile"] = "int16"
else:
raise NotImplementedError(f"wrong weight bw: {wt_bw}")
return d_knerex
def generate_conf(template,
platform, # is number 520/720/...
optimize="wqbi-p",
mode=2, # choose from 0/1/2/3. See document for details
limit_input_formats=False,
dp_bw="int8",
wt_bw="int8",
in_bw="int8",
out_bw="int8",
cpu_bw="int8", # from 0.24.0
datapath_range_method="percentage",
data_analysis_pct=0.999,
data_analysis_16b_pct=0.999999,
data_analysis_threads=8,
percentile=0.001,
outlier_factor=1.0,
unlock_size_limit=False,
fm_cut="default", # "default" / "deep_search"
):
"""Generate config for toolchain/gen_fx_model."""
module_run = get_module_run(mode)
do_dump, num_input_samples = get_dynasty_conf(mode)
mode_on, knerex_dump_level, model_optimize = get_mode_run(platform, optimize)
tag = f"mode{mode}_{platform}_{optimize}"
# TODO: make weight_compress default by platform
# weight_compress = platform in MODE_HW_LIMIT["weight_compress"]
weight_compress = False
d_bw = check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, platform)
j = deepcopy(template)
j["tag"] = pp(tag)
j["module_run"] = module_run
j["mode_run"] = mode_on
j["dynasty"]["do_dump"] = do_dump
j["dynasty"]["num_input_samples"] = num_input_samples
j["knerex"]["dump_level"] = knerex_dump_level
j["knerex"].update(d_bw)
j["knerex"]["percentile"] = percentile
j["knerex"]["data_analysis_pct"] = data_analysis_pct
j["knerex"]["need_additional_data_analysis_pct"] = 1
j["knerex"]["additional_data_analysis_pcts"] = [data_analysis_16b_pct]
j["knerex"]["data_analysis_threads"] = data_analysis_threads
j["knerex"]["datapath_range_method"] = datapath_range_method
j["knerex"]["outlier_factor"] = outlier_factor
j["compiler_piano"]["model_optimize"] = model_optimize
j["compiler_piano"]["node_schedule_mode"] = fm_cut
j["compiler_piano"]["weight_compress"] = weight_compress
j["compiler_piano"]["limit_input_formats"] = limit_input_formats
if unlock_size_limit:
j["compiler_piano"]["max_onnx_MB"] = 100000 # no onnx larger than 100G?
if DEBUG:
j["path"]["internal"] = True
# if os.environ.get("USE_PREBUILD", False):
# j["path"]["use_toolchain"] = False
return j, tag
def generate_conf_batch(template, p_out):
"""Automatically generate json for needed situations."""
for mode in [0, 1, 2, 3]:
for platform in [520, 530, 540, 630, 720, 730]:
for optimize in ["scaled", "wqbi-p", "wqbi-s"]:
"""
wqbi-p for wqbi in parallel. faster, less improvement.
wqbi-s for wqbi in sequential. slower, need more memory. more accurate.
"""
j, tag = generate_conf(template, platform=platform, optimize=optimize, mode=mode)
fn_j = p_out / f"{tag}.json"
with open(fn_j, "w") as f:
json.dump(j, f, indent=4, sort_keys=True)
if __name__ == "__main__":
arguments = docopt(__doc__, version="gen_configs 1.0")
if arguments["tc"]:
p_template = p_script / "template" / "regerssion_tc.json"
with open(p_template, "r") as f:
template = json.load(f)
p_out = arguments["--output"]
p_out.mkdir(parents=True, exist_ok=True)
generate_conf(template, p_out)