301 lines
10 KiB
Python
301 lines
10 KiB
Python
"""Generate scripts to run regression based on regression_config.yaml
|
|
|
|
Usage:
|
|
gen_regression_json.py tc --output=<d_out>
|
|
gen_regression_json.py (-h | --help)
|
|
gen_regression_json.py --version
|
|
|
|
Options:
|
|
-o --output=<d_out> Specify output folder
|
|
-h --help Show this screen.
|
|
--version Show version.
|
|
"""
|
|
|
|
from docopt import docopt
|
|
|
|
import pathlib
|
|
import os
|
|
|
|
import commentjson as json
|
|
from copy import deepcopy
|
|
|
|
from sys_flow.flow_constants import MODE_HW_LIMIT, P_BIN_CACHE, MODE_HARDWARE
|
|
|
|
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
|
import snoop
|
|
snoop.install(enabled=DEBUG)
|
|
|
|
|
|
#####################################################################
|
|
# constant
|
|
#####################################################################
|
|
|
|
p_script = pathlib.Path(__file__).resolve().parent
|
|
|
|
#####################################################################
|
|
# code
|
|
#####################################################################
|
|
|
|
def get_module_run(mode):
|
|
"""
|
|
mode 0: ip evaluator only,
|
|
mode 1: knerex + compiler
|
|
mode 2: ip eval + knerex + compiler + btm + snr (1 image, dump 0)
|
|
mode 3: ip eval + knerex + compiler + btm + snr (all image, dump 2)
|
|
"""
|
|
|
|
if mode == 0:
|
|
module_run = {"only_ip_evaluator": True}
|
|
elif mode == 1:
|
|
module_run = {
|
|
"piano_knerex": True,
|
|
"compiler_piano": True
|
|
}
|
|
elif mode in [2, 3]:
|
|
module_run = {
|
|
"piano_knerex": True,
|
|
"piano_dynasty": True,
|
|
"snr_calculation": True,
|
|
"compiler_piano": True,
|
|
"csim": True
|
|
}
|
|
elif mode == 51:
|
|
### this mode is for internal dongle test only, will call internal dongle server
|
|
module_run = {
|
|
"piano_knerex": True,
|
|
"piano_dynasty": True,
|
|
"snr_calculation": True,
|
|
"compiler_piano": True,
|
|
"csim": True,
|
|
"run_nef_kneron_plus": True
|
|
}
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
return module_run
|
|
|
|
|
|
def get_mode_run(platform, optimize):
|
|
"""Determine which mode (of dynasty) to run."""
|
|
# NOTE: only run 1 platform for now. platform must be integer. not list
|
|
assert platform in [520, 530, 540, 630, 720, 730]
|
|
# see config_default.json for more optimization
|
|
assert optimize in ["scaled", "wqbi-p", "wqbi-s"]
|
|
""" knerex dump level
|
|
bit 0, scaled.bie `|1`
|
|
bit 1: quan.bie `|2`
|
|
bit 2: wqbi.bie `|4`
|
|
bit 3: onnx+json `|8`
|
|
bit 4: json only. `|16`
|
|
bit 5: hwbi.bie. `|32`
|
|
bit 6: hwbi-mse.bie. `|64`
|
|
bit 7: decomposed.bie. `|128`
|
|
bit 8: thread_auto_adjusted. `|256`
|
|
bit 9: less_DRAM_mode. `|512`
|
|
bit 10: wqbip.bie parallel mode. `|1024`.
|
|
"""
|
|
if optimize == "scaled":
|
|
knerex_dump_level = "00010001001"
|
|
mode_on = {
|
|
f"{platform}": True,
|
|
}
|
|
model_optimize = optimize
|
|
elif optimize == "wqbi-p":
|
|
mode_on = {
|
|
f"{platform}-wqbi": True,
|
|
}
|
|
model_optimize = "wqbi"
|
|
knerex_dump_level = "10010001011"
|
|
elif optimize == "wqbi-s":
|
|
mode_on = {
|
|
f"{platform}-wqbi": True,
|
|
}
|
|
model_optimize = "wqbi"
|
|
knerex_dump_level = "00010001001"
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
# need to turn on reference signal
|
|
# NEW from 0.24.0
|
|
mode_on[f"{platform}graphopt"] = True
|
|
# mode_on["float"] = True
|
|
|
|
return mode_on, knerex_dump_level, model_optimize
|
|
|
|
|
|
def get_dynasty_conf(mode):
|
|
"""Determine how many sample input pairs to use."""
|
|
if mode == 3:
|
|
do_dump = 2
|
|
num_input_samples = 1000
|
|
else:
|
|
do_dump = 0
|
|
num_input_samples = 1
|
|
|
|
return do_dump, num_input_samples
|
|
|
|
|
|
def check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, hw_mode):
|
|
"""Verify given bitwidth modes, and possible expand for weight mix interleave.
|
|
|
|
NOTE: this is only a quick check.
|
|
Knerex will run detailed check.
|
|
|
|
NOTE: all regression check on bw should be here.
|
|
easier to maintain.
|
|
"""
|
|
# NOTE: airtable must be same as this codes.
|
|
# NOTE: weight code "mix_interleave_8" is "mix interleave" + 8bit weight_mix_percentile
|
|
# weight code "mix_interleave_16" is "mix interleave" + 16bit weight_mix_percentile
|
|
# "mix interleave" is for DEBUG only.
|
|
# mix light: Set all weight bitwidth as int16, except Gemm, Convolution
|
|
# mix balance: Set all weight bitwidth as int16, except Non-Depthwise Convolution
|
|
valid_wghts = ["int8", "int16", "int4", "mix light", "mix balance", "mix interleave", "mix_interleave_8", "mix_interleave_16"]
|
|
# mix light: Set all datapath bitwidth as int16, except Gemm, Convolution, and MatMul
|
|
# mix balance: Set all datapath bitwidth as int16, except MatMul
|
|
# "all int8" / "mix interleave" is for DEBUG only.
|
|
valid_data = ["int8", "int16", "mix light", "mix balance", "all int8", "mix interleave"]
|
|
valid_bw_io = ["int8", "int16"]
|
|
|
|
d_bw = {
|
|
"cpu node bitwidth": (cpu_bw, valid_bw_io),
|
|
"input bitwidth": (in_bw, valid_bw_io),
|
|
"output bitwidth": (out_bw, valid_bw_io),
|
|
"datapath bitwidth": (dp_bw, valid_data),
|
|
"weight bitwidth": (wt_bw, valid_wghts)
|
|
}
|
|
|
|
for name, (bw, valid_bw) in d_bw.items():
|
|
if bw not in valid_bw:
|
|
msg = f"Invalid {name}: {bw}. Should be in {valid_bw} ."
|
|
raise ValueError(msg)
|
|
|
|
# more special settings
|
|
if hw_mode in [520, 720]:
|
|
if wt_bw == "int4":
|
|
msg = f"platform {hw_mode} does NOT support weight 4bit."
|
|
raise ValueError(msg)
|
|
|
|
if hw_mode in [520]:
|
|
for k, (bw, valid_bw) in d_bw.items():
|
|
if bw not in ["int8"]:
|
|
msg = f"Invalid {k}: {bw}. 520 only support 8bit."
|
|
raise ValueError(msg)
|
|
|
|
d_knerex = {}
|
|
d_knerex["datapath_bitwidth_mode"] = dp_bw
|
|
d_knerex["model_in_bitwidth_mode"] = in_bw
|
|
d_knerex["model_out_bitwidth_mode"] = out_bw
|
|
d_knerex["cpu_bitwidth_mode"] = cpu_bw
|
|
d_knerex["weight_bitwidth_mode"] = wt_bw
|
|
d_knerex["weight_mix_percentile"] = 0
|
|
|
|
if wt_bw.startswith("mix_interleave_"):
|
|
# mix_interleave is for debug only
|
|
if wt_bw.endswith("_8"):
|
|
d_knerex["weight_bitwidth_mode"] = "mix interleave"
|
|
d_knerex["weight_mix_percentile"] = "int8"
|
|
elif wt_bw.endswith("_16"):
|
|
d_knerex["weight_bitwidth_mode"] = "mix interleave"
|
|
d_knerex["weight_mix_percentile"] = "int16"
|
|
else:
|
|
raise NotImplementedError(f"wrong weight bw: {wt_bw}")
|
|
|
|
return d_knerex
|
|
|
|
|
|
def generate_conf(template,
|
|
platform, # is number 520/720/...
|
|
optimize="wqbi-p",
|
|
mode=2, # choose from 0/1/2/3. See document for details
|
|
limit_input_formats=False,
|
|
dp_bw="int8",
|
|
wt_bw="int8",
|
|
in_bw="int8",
|
|
out_bw="int8",
|
|
cpu_bw="int8", # from 0.24.0
|
|
datapath_range_method="percentage",
|
|
data_analysis_pct=0.999,
|
|
data_analysis_16b_pct=0.999999,
|
|
data_analysis_threads=8,
|
|
percentile=0.001,
|
|
outlier_factor=1.0,
|
|
unlock_size_limit=False,
|
|
fm_cut="default", # "default" / "deep_search"
|
|
):
|
|
"""Generate config for toolchain/gen_fx_model."""
|
|
module_run = get_module_run(mode)
|
|
do_dump, num_input_samples = get_dynasty_conf(mode)
|
|
mode_on, knerex_dump_level, model_optimize = get_mode_run(platform, optimize)
|
|
|
|
tag = f"mode{mode}_{platform}_{optimize}"
|
|
|
|
# TODO: make weight_compress default by platform
|
|
# weight_compress = platform in MODE_HW_LIMIT["weight_compress"]
|
|
weight_compress = False
|
|
|
|
d_bw = check_bitwidth_mode(dp_bw, wt_bw, in_bw, out_bw, cpu_bw, platform)
|
|
|
|
j = deepcopy(template)
|
|
j["tag"] = pp(tag)
|
|
j["module_run"] = module_run
|
|
j["mode_run"] = mode_on
|
|
|
|
j["dynasty"]["do_dump"] = do_dump
|
|
j["dynasty"]["num_input_samples"] = num_input_samples
|
|
|
|
j["knerex"]["dump_level"] = knerex_dump_level
|
|
j["knerex"].update(d_bw)
|
|
j["knerex"]["percentile"] = percentile
|
|
j["knerex"]["data_analysis_pct"] = data_analysis_pct
|
|
j["knerex"]["need_additional_data_analysis_pct"] = 1
|
|
j["knerex"]["additional_data_analysis_pcts"] = [data_analysis_16b_pct]
|
|
j["knerex"]["data_analysis_threads"] = data_analysis_threads
|
|
j["knerex"]["datapath_range_method"] = datapath_range_method
|
|
j["knerex"]["outlier_factor"] = outlier_factor
|
|
|
|
j["compiler_piano"]["model_optimize"] = model_optimize
|
|
j["compiler_piano"]["node_schedule_mode"] = fm_cut
|
|
j["compiler_piano"]["weight_compress"] = weight_compress
|
|
j["compiler_piano"]["limit_input_formats"] = limit_input_formats
|
|
if unlock_size_limit:
|
|
j["compiler_piano"]["max_onnx_MB"] = 100000 # no onnx larger than 100G?
|
|
|
|
if DEBUG:
|
|
j["path"]["internal"] = True
|
|
# if os.environ.get("USE_PREBUILD", False):
|
|
# j["path"]["use_toolchain"] = False
|
|
|
|
return j, tag
|
|
|
|
|
|
def generate_conf_batch(template, p_out):
|
|
"""Automatically generate json for needed situations."""
|
|
for mode in [0, 1, 2, 3]:
|
|
for platform in [520, 530, 540, 630, 720, 730]:
|
|
for optimize in ["scaled", "wqbi-p", "wqbi-s"]:
|
|
"""
|
|
wqbi-p for wqbi in parallel. faster, less improvement.
|
|
wqbi-s for wqbi in sequential. slower, need more memory. more accurate.
|
|
"""
|
|
|
|
j, tag = generate_conf(template, platform=platform, optimize=optimize, mode=mode)
|
|
|
|
fn_j = p_out / f"{tag}.json"
|
|
with open(fn_j, "w") as f:
|
|
json.dump(j, f, indent=4, sort_keys=True)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
arguments = docopt(__doc__, version="gen_configs 1.0")
|
|
|
|
if arguments["tc"]:
|
|
p_template = p_script / "template" / "regerssion_tc.json"
|
|
with open(p_template, "r") as f:
|
|
template = json.load(f)
|
|
|
|
p_out = arguments["--output"]
|
|
p_out.mkdir(parents=True, exist_ok=True)
|
|
generate_conf(template, p_out)
|
|
|