1190 lines
50 KiB
Python
1190 lines
50 KiB
Python
#! /usr/bin/env python3
|
||
|
||
""" CLI interface for regression
|
||
|
||
Usage:
|
||
run.py [--all-pass] <fn_json> [<keys>...]
|
||
run.py (-h | --help)
|
||
run.py --version
|
||
|
||
Options:
|
||
--all-pass If all pass, exit with 0, otherwise with 1
|
||
-h --help Show this screen.
|
||
--version Show version.
|
||
|
||
"""
|
||
|
||
|
||
import shutil
|
||
import os
|
||
import time
|
||
import tempfile
|
||
|
||
import multiprocessing
|
||
|
||
from pathlib import Path
|
||
import json
|
||
|
||
from blinker import signal
|
||
|
||
from docopt import docopt
|
||
|
||
import sys_flow_v2.flow_utils as futils
|
||
import sys_flow_v2.flow_constants as fconsts
|
||
from sys_flow_v2.report_utils import report, generate_snr_reports, df_row2status
|
||
from sys_flow_v2.exceptions import RegressionError
|
||
from sys_flow_v2.test_case import test_case
|
||
from sys_flow_v2.regression import regression
|
||
from sys_flow_v2.compiler_v2 import check_input_fmt
|
||
from sys_flow_v2.onnx_op_stats import get_ioinfo_onnx
|
||
from sys_flow_v2.compiler_config import gen_ip_config
|
||
from sys_flow_v2.gen_regression_json import generate_conf
|
||
from sys_flow_v2 import mix_bitwidth_utils as mbu
|
||
from sys_flow_v2 import mix_bitwidth_utils_v2 as mbu_v2
|
||
|
||
import snoop
|
||
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
||
MIXBW_DEBUG = True if os.environ.get("MIXBW_DEBUG", False) else False
|
||
snoop.install(enabled=DEBUG)
|
||
|
||
p_script = Path(__file__).resolve().parent
|
||
|
||
|
||
def rename_directory(source_path, target_path):
|
||
source_path = Path(source_path)
|
||
target_path = Path(target_path)
|
||
|
||
# Check if the source directory exists
|
||
if not source_path.exists():
|
||
raise FileNotFoundError(f"The directory {source_path} does not exist.")
|
||
# Remove the target directory if it exists
|
||
if target_path.exists():
|
||
shutil.rmtree(target_path)
|
||
|
||
target_path.parent.mkdir(parents=True, exist_ok=True)
|
||
try:
|
||
os.rename(source_path, target_path)
|
||
print(f"Directory renamed from {source_path} to {target_path}")
|
||
except Exception as e:
|
||
print(f"Directory rename failed: {e}")
|
||
|
||
|
||
def run_single_case(ts_w_r):
|
||
"""Use for multiprocess call.
|
||
|
||
A error is returned to callback in main process then combined into report.
|
||
(RETURN other information if needed. This is the only way to sync messages.)
|
||
|
||
This function must in top level, should not be embeded in another function.
|
||
|
||
the input ts_w_r must be one parameter.
|
||
"""
|
||
|
||
test_case_path, r_config = ts_w_r
|
||
|
||
try:
|
||
i_case = test_case(test_case_path, r_config)
|
||
released_files = i_case.run_flow()
|
||
# success!
|
||
return RegressionError("general/Success", i_case.model_id), released_files
|
||
except Exception as e:
|
||
# NOTE: if any submodule failed, it will reach here.
|
||
try:
|
||
# free up first
|
||
i_case.clean_opt()
|
||
released_files = i_case.save_summary()
|
||
# released_files is probably only the model_fx_html / model_fx_json
|
||
i_case.post_clean_up()
|
||
|
||
if DEBUG:
|
||
print(f"run_flow failed. Clean up {i_case}")
|
||
print(e)
|
||
return e, released_files
|
||
except:
|
||
return e, None
|
||
|
||
|
||
|
||
def gen_opt_model(
|
||
p_onnx,
|
||
np_txt,
|
||
platform=730, # choose "520" / "720" / "530" / "630"
|
||
optimize="o0", # choose from "o0", "o1", "02"
|
||
limit_input_formats=False,
|
||
datapath_range_method="percentage",
|
||
data_analysis_pct=0.999, # set to 1.0 if detection model
|
||
data_analysis_16b_pct=0.999999, # set to 1.0 if detection model
|
||
data_analysis_threads=8,
|
||
datapath_bitwidth_mode="int8",
|
||
weight_bitwidth_mode="int8",
|
||
model_in_bitwidth_mode="int8",
|
||
model_out_bitwidth_mode="int8",
|
||
cpu_node_bitwidth_mode="int8", # from 0.24.0
|
||
lut_high_accuracy_mode="2", # from 0.25.0
|
||
percentile=0.001,
|
||
outlier_factor=1.0,
|
||
quantize_mode="default", # choose from "default", "post_sigmoid"
|
||
quan_config=None, # let user to set constraints for quantization.
|
||
qat_config=None, # optional config for qat
|
||
compiler_tiling="default", # changed from fm_cut, since 0.24.0
|
||
p_output="/data1/kneron_flow",
|
||
weight_bandwidth=None, # None will use default.
|
||
unlock_size_limit=False, # set to True if need to use huge onnx file.
|
||
mode=2, # choose from 0/1/2/3. See document for details.
|
||
snr_max_in_pair=3, # max number pairs of input for dynasty+snr
|
||
snr_layer_control=None, # to control how dynsty dump results for snr check. From 0.26.0
|
||
opt_model_json=None, # if given, will use this json to generate opt model. From 0.26.0
|
||
target_snr=None # generate fastest fx model and user_config to reach target snr. From 0.26.0
|
||
):
|
||
"""
|
||
1. run a output16 and output light
|
||
2. get all conv value_info name via output_16
|
||
3. run single 8bit cases
|
||
4. run all cumulative cases and plot to culmulative snr plot
|
||
5. if tagets snr exists:
|
||
generate a new user_config.json and run -> final.opt.onnx
|
||
"""
|
||
|
||
env_output = os.environ.get("KTC_OUTPUT_DIR")
|
||
if env_output and p_output == "/data1/kneron_flow":
|
||
p_output = env_output
|
||
assert platform == 730, f"Only platform 730 is supported."
|
||
assert optimize == "o0", f"Only optimize o0 is supported."
|
||
|
||
gen_fx_model_params = {
|
||
"p_onnx": p_onnx,
|
||
"np_txt": np_txt,
|
||
"platform": platform,
|
||
"optimize": optimize,
|
||
"datapath_bitwidth_mode": "int16",
|
||
"weight_bitwidth_mode": "int16",
|
||
"model_in_bitwidth_mode": "int16",
|
||
"model_out_bitwidth_mode": "int16",
|
||
"cpu_node_bitwidth_mode": "int16",
|
||
"lut_high_accuracy_mode": "2",
|
||
"quan_config": None,
|
||
"p_output": p_output,
|
||
"mode": 55,
|
||
"snr_max_in_pair": snr_max_in_pair,
|
||
"snr_layer_control": 0
|
||
}
|
||
p_onnx = Path(p_onnx)
|
||
m_name = p_onnx.stem
|
||
if m_name.endswith(".origin"):
|
||
m_name = m_name.replace(".origin", "")
|
||
if not p_onnx.exists():
|
||
msg = f"Given onnx {p_onnx} does not exist!"
|
||
raise FileExistsError(msg)
|
||
|
||
env_workdir = os.environ.get("KTC_WORKDIR")
|
||
p_working = Path(env_workdir or "/workspace/.tmp/models")
|
||
p_model = p_working / m_name / m_name
|
||
p_model_16 = p_working / f"{m_name}_16" / f"{m_name}_16"
|
||
p_model_light = p_working / f"{m_name}_light" / f"{m_name}_light"
|
||
p_model_target = p_working / f"{m_name}_target" / f"{m_name}_target"
|
||
p_onnx_decomposed_16 = p_model_16 /"output"/ "knerex_730" / f"{m_name}.kdp730.decomposed.onnx"
|
||
|
||
opt_model_data = None
|
||
if opt_model_json is not None:
|
||
with open(opt_model_json, 'r') as file:
|
||
opt_model_data = json.load(file)
|
||
|
||
# 1.1 run output 16
|
||
# load from p_working directory, guarantee p_model_16 exists
|
||
success_16, [snr_16, cfunc_time_16] = mbu.load_single_rst(p_model_16)
|
||
if not success_16.value:
|
||
gen_fx_model_params_16 = gen_fx_model_params.copy()
|
||
# run output 16
|
||
gen_fx_model(**gen_fx_model_params_16)
|
||
rename_directory(p_model, p_model_16)
|
||
success_16, [snr_16, cfunc_time_16] = mbu.load_single_rst(p_model_16)
|
||
assert success_16.value, f"Failed to run output 16 for {m_name}"
|
||
print('=====================================')
|
||
print('success_16', success_16.name)
|
||
print("snr_16", snr_16)
|
||
print("cfunc_time_16", cfunc_time_16)
|
||
print('=====================================')
|
||
|
||
# # 1.2 run output mix light
|
||
success_light, [snr_light, cfunc_time_light] = mbu.load_json_rst(opt_model_data, 'mix light')
|
||
if not success_light.value:
|
||
success_light, [snr_light, cfunc_time_light] = mbu.load_single_rst(p_model_light)
|
||
if not success_light.value:
|
||
gen_fx_model_params_light = gen_fx_model_params.copy()
|
||
gen_fx_model_params_light['datapath_bitwidth_mode']='mix light'
|
||
|
||
gen_fx_model(**gen_fx_model_params_light)
|
||
rename_directory(p_model, p_model_light)
|
||
success_light, [snr_light, cfunc_time_light] = mbu.load_single_rst(p_model_light)
|
||
assert success_light.value, f"Failed to run output light for {m_name}"
|
||
print('=====================================')
|
||
print('success_light', success_light.name)
|
||
print("snr_light", snr_light)
|
||
print("cfunc_time_light", cfunc_time_light)
|
||
print('=====================================')
|
||
|
||
# 2. get all conv value_info name via output_16
|
||
p_single = p_working / f"{m_name}_single"
|
||
|
||
conv_nodes = mbu.get_conv_nodes_skip_input(p_onnx_decomposed_16)
|
||
snr_list_single, cfunc_time_list_single = [], []
|
||
print("conv_nodes", [node.name for node in conv_nodes])
|
||
print("conv_nodes ", len(conv_nodes))
|
||
|
||
# 3. run single 8bit case
|
||
success_single, [snr_single, cfunc_time_single] = mbu.load_json_rst(opt_model_data, 'single')
|
||
if success_single.value and len(snr_single) == len(conv_nodes):
|
||
snr_list_single = snr_single
|
||
cfunc_time_list_single = cfunc_time_single
|
||
else:
|
||
# re-run single 8bit case
|
||
for node in conv_nodes:
|
||
node_name = node.name.replace("/", "_")
|
||
user_config_single = mbu.generate_user_config([node.input[0]])
|
||
p_single_model = p_single / node_name
|
||
|
||
success_single, [snr, cfunc_time] = mbu.load_single_rst(p_single_model)
|
||
mbu.post_clean(p_single_model)
|
||
if not success_single.value:
|
||
gen_fx_model_params_single = gen_fx_model_params.copy()
|
||
gen_fx_model_params_single['quan_config'] = user_config_single
|
||
|
||
gen_fx_model(**gen_fx_model_params_single)
|
||
rename_directory(p_model, p_single_model)
|
||
success_single, [snr, cfunc_time] = mbu.load_single_rst(p_single_model)
|
||
mbu.post_clean(p_single_model)
|
||
assert success_single.value, f"Failed to run output single 8bit for {m_name}"
|
||
|
||
snr_list_single.append(snr)
|
||
cfunc_time_list_single.append(cfunc_time)
|
||
print('=====================================')
|
||
print('success_single', success_single.name)
|
||
print("snr_list_single ", snr_list_single)
|
||
print("cfunc_time_list_single ", cfunc_time_list_single)
|
||
print('=====================================')
|
||
|
||
|
||
# 4 run cumculative 8bit case
|
||
# 4.1 get the dp_name_list in sorted order
|
||
dp_name_list = [node.input[0] for node in conv_nodes]
|
||
sorted_dp_idx_list = mbu.get_sorted_dp_idx_by_snr(dp_name_list, snr_list_single)
|
||
print("sorted_dp_idx", sorted_dp_idx_list)
|
||
|
||
p_cum = p_working / f"{m_name}_cum"
|
||
snr_list_cum, cfunc_time_list_cum = [], []
|
||
|
||
length, step = len(dp_name_list), max(len(dp_name_list)//40+1, 1)
|
||
# 4.2 run cumulative 8bit case
|
||
success_cum, [snr_cum, cfunc_time_cum] = mbu.load_json_rst(opt_model_data, 'cumulative')
|
||
if success_cum.value and len(snr_cum) == len(range(0, length, step)):
|
||
snr_list_cum = snr_cum
|
||
cfunc_time_list_cum = cfunc_time_cum
|
||
else:
|
||
for i in range(0, length, step):
|
||
begin, end = 0, min(i+step, length)
|
||
user_config_cum = mbu.generate_user_config_by_idx(dp_name_list, sorted_dp_idx_list, begin, end)
|
||
print(begin, end)
|
||
|
||
p_cum_model = p_cum / str(i)
|
||
success_cum, [snr, cfunc_time] = mbu.load_single_rst(p_cum_model)
|
||
mbu.post_clean(p_cum_model)
|
||
if not success_cum.value:
|
||
gen_fx_model_params_cum = gen_fx_model_params.copy()
|
||
gen_fx_model_params_cum['quan_config'] = user_config_cum
|
||
gen_fx_model(**gen_fx_model_params_cum)
|
||
|
||
rename_directory(p_model, p_cum_model)
|
||
success_cum, [snr, cfunc_time] = mbu.load_single_rst(p_cum_model)
|
||
mbu.post_clean(p_cum_model)
|
||
assert success_cum.value, f"Failed to run output cumulative 8bit for {m_name}"
|
||
snr_list_cum.append(snr)
|
||
cfunc_time_list_cum.append(cfunc_time)
|
||
print('=====================================')
|
||
print("success_cum", success_cum.name)
|
||
print("snr_list_cum ", snr_list_cum)
|
||
print("cfunc_time_list_cum ", cfunc_time_list_cum)
|
||
print('=====================================')
|
||
|
||
# save gen_opt_json
|
||
opt_model_json = mbu.dump_opt_json(snr_list_single, cfunc_time_list_single, snr_list_cum, cfunc_time_list_cum, snr_16, cfunc_time_16, snr_light, cfunc_time_light, sorted_dp_idx_list, m_name, p_working)
|
||
|
||
mbu.plot_cum_snr_cfunc_time(opt_model_json, p_working)
|
||
|
||
# 5. generate a new user_config.json and run -> target fx model
|
||
if target_snr is not None:
|
||
user_config_target = mbu.generate_user_config_by_target_snr(snr_16, snr_light, snr_list_cum, target_snr, dp_name_list, sorted_dp_idx_list, step)
|
||
|
||
if user_config_target is not None:
|
||
# run with user_config_tagret_snr
|
||
gen_fx_model_params_target = gen_fx_model_params.copy()
|
||
gen_fx_model_params_target['quan_config'] = user_config_target
|
||
gen_fx_model(**gen_fx_model_params_target)
|
||
|
||
rename_directory(p_model, p_model_target)
|
||
success_target, [snr_target, cfunc_time_target] = mbu.load_single_rst(p_model_target)
|
||
assert success_target.value, f"Failed to run output target for {m_name}"
|
||
print('=====================================')
|
||
print('success_target', success_target.name)
|
||
print("snr_target", snr_target)
|
||
print("cfunc_time_target", cfunc_time_target)
|
||
print('=====================================')
|
||
return p_output
|
||
|
||
def gen_opt_model_v2(
|
||
p_onnx,
|
||
np_txt,
|
||
data_analysis_threads=8,
|
||
weight_bitwidth_mode="int16",
|
||
mixbw_mode='data', # ['data', 'weight', 'both']
|
||
flops_ratio=0.5,
|
||
p_output="/data1/kneron_flow",
|
||
p_cache="/workspace/.tmp/models",
|
||
clean_cache=False, # whether to clean cache
|
||
num_of_processors=16,
|
||
snr_max_in_pair=3, # max number pairs of input for dynasty+snr
|
||
):
|
||
"""
|
||
使用15bit的conv浮点运算量占比,flops_ratio=0, 所有conv使用8bit,flops_ratio=1.0, 所有conv使用15bit.
|
||
"""
|
||
|
||
env_output = os.environ.get("KTC_OUTPUT_DIR")
|
||
env_workdir = os.environ.get("KTC_WORKDIR")
|
||
if env_output and p_output == "/data1/kneron_flow":
|
||
p_output = env_output
|
||
if env_workdir and p_cache == "/workspace/.tmp/models":
|
||
p_cache = env_workdir
|
||
gen_fx_model_params = {
|
||
"p_onnx": p_onnx,
|
||
"np_txt": np_txt,
|
||
"platform": 730,
|
||
"optimize": "o0",
|
||
"data_analysis_threads": data_analysis_threads,
|
||
"datapath_bitwidth_mode": "int16",
|
||
"weight_bitwidth_mode": weight_bitwidth_mode,
|
||
"model_in_bitwidth_mode": "int16",
|
||
"model_out_bitwidth_mode": "int16",
|
||
"cpu_node_bitwidth_mode": "int16",
|
||
"lut_high_accuracy_mode": "2",
|
||
"quan_config": None,
|
||
"p_output": p_output,
|
||
"p_cache": p_cache,
|
||
"clean_cache": False,
|
||
"mode": 55,
|
||
"snr_max_in_pair": snr_max_in_pair,
|
||
"snr_layer_control": 2
|
||
}
|
||
p_onnx = Path(p_onnx)
|
||
m_name = p_onnx.stem
|
||
time_start = time.perf_counter()
|
||
|
||
if m_name.endswith(".origin"):
|
||
m_name = m_name.replace(".origin", "")
|
||
if not p_onnx.exists():
|
||
msg = f"Given onnx {p_onnx} does not exist!"
|
||
raise FileExistsError(msg)
|
||
|
||
p_temp = Path(p_cache)
|
||
p_subgraphs = Path(os.path.join(p_temp, f'ng5_subgraphs'))
|
||
p_bm_txt = p_temp / f"bm_mixbw.txt"
|
||
p_model = p_temp / m_name / m_name
|
||
p_model_16 = p_temp / f"{m_name}_16" / f"{m_name}_16"
|
||
p_model_target = p_temp / f"{m_name}_target" / f"{m_name}_target"
|
||
p_model_data_png = p_subgraphs / "data"
|
||
p_model_weight_png = p_subgraphs / "weight"
|
||
p_model_log = p_subgraphs / "snr_debug.txt"
|
||
p_model_release = p_temp / "release"
|
||
|
||
p_output_target = os.path.join(p_output, f"{m_name}_target")
|
||
p_output_log = os.path.join(p_output, f"snr_debug.txt")
|
||
p_output_data_png = os.path.join(p_output, f"data")
|
||
p_output_weight_png = os.path.join(p_output, f"weight")
|
||
|
||
# run output 16
|
||
gen_fx_model_params_16 = gen_fx_model_params.copy()
|
||
gen_fx_model_params_16['p_output'] = p_model_release
|
||
gen_fx_model(**gen_fx_model_params_16)
|
||
rename_directory(p_model, p_model_16)
|
||
|
||
xbu_v2 = mbu_v2.MixBitwidthUtilsFast(workspace_dir=p_model_16,
|
||
model_dir='.',
|
||
subgraph_dir=p_temp,
|
||
model_name=m_name,
|
||
f_name_16='output',
|
||
mixbw_mode=mixbw_mode,
|
||
flops_ratio=flops_ratio
|
||
)
|
||
# step1
|
||
path_subgraphs = Path(p_subgraphs)
|
||
if os.path.exists(p_subgraphs):
|
||
shutil.rmtree(p_subgraphs)
|
||
p_subgraphs.mkdir(parents=True, exist_ok=True)
|
||
xbu_v2.run_subgraph_creation()
|
||
time_step1_end = time.perf_counter()
|
||
print("\n======= run subgraph creation success =======\n")
|
||
|
||
# step2
|
||
xbu_v2.run_sub_cases_creation(float_input=False, pre_clean=True)
|
||
xbu_v2.run_sub_cases_creation(float_input=True, pre_clean=True)
|
||
time_step2_end = time.perf_counter()
|
||
print("\n======= run subcases creation success =======\n")
|
||
|
||
# step3
|
||
m_name_dict = xbu_v2.get_m_name_dict()
|
||
all_fx_m_names = [item for item in m_name_dict.keys() if item.endswith('fx')]
|
||
|
||
def _process_parallel_models(config, task_mode):
|
||
valid_modes = ("regression", "dynasty")
|
||
if task_mode not in valid_modes:
|
||
raise ValueError(f"Invalid task_mode: {task_mode}. Must be one of {valid_modes}")
|
||
mode_configurations = {
|
||
"regression": {
|
||
"module_run": {
|
||
"piano_knerex": True,
|
||
"compiler_piano": True
|
||
},
|
||
"pre_clean_up": {
|
||
"dynasty_output": False,
|
||
"all_output": True
|
||
}
|
||
},
|
||
"dynasty": {
|
||
"module_run": {
|
||
"piano_knerex": False,
|
||
"compiler_piano": False
|
||
},
|
||
"pre_clean_up": {
|
||
"dynasty_output": True,
|
||
"all_output": False
|
||
}
|
||
}
|
||
}
|
||
for section, settings in mode_configurations[task_mode].items():
|
||
config[section].update(settings)
|
||
|
||
rfs, success_list, df_report, *_ = run_flow(config)
|
||
n_good = len([a for a in success_list if a])
|
||
n_all = len(success_list)
|
||
assert n_good == n_all, f"Successed cases are {n_good}/{n_all}"
|
||
d_status, d_time = df_row2status(df_report)
|
||
return success_list, rfs[0], d_status, d_time
|
||
|
||
p_mixbw_template = p_script / "template" / "regression_mixbw.json"
|
||
with open(p_mixbw_template, "r") as f:
|
||
conf_mixbw = json.load(f)
|
||
conf_mixbw["path"]["cases"] = str(p_temp)
|
||
conf_mixbw["path"]["search"] = ["-f", str(p_bm_txt)]
|
||
conf_mixbw["knerex"]["data_analysis_threads"] = data_analysis_threads
|
||
conf_mixbw["dynasty"]["num_input_samples"] = snr_max_in_pair
|
||
conf_mixbw["dynasty"]["n_parallel_model"] = num_of_processors
|
||
|
||
def _chunker(seq, size):
|
||
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
||
|
||
def _update_regression_case_name(bm_path, m_names):
|
||
with open(bm_path, 'w') as f:
|
||
for m_name in m_names:
|
||
f.write(f'{m_name}\n')
|
||
|
||
def _link_knerex_output(base_m_names):
|
||
for base_m_name in base_m_names:
|
||
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
|
||
fl_knerex_output = os.path.join(p_subgraphs, fl_m_name, 'output', 'knerex_730')
|
||
fx_knerex_output = os.path.join(p_subgraphs, fx_m_name, 'output', 'knerex_730')
|
||
assert os.path.exists(fl_knerex_output), f'fl_knerex_output {fl_knerex_output} does not exist'
|
||
os.makedirs(fx_knerex_output, exist_ok=True)
|
||
assert os.path.exists(fx_knerex_output), f'fx_knerex_output {fx_knerex_output} does not exist'
|
||
for f_name in os.listdir(fl_knerex_output):
|
||
fl_f_name = os.path.join(fl_knerex_output, f_name)
|
||
# Find the position of the last occurrence of '_fl'
|
||
last_fl_index = f_name.rfind('fl')
|
||
if last_fl_index != -1:
|
||
replaced_f_name = f_name[:last_fl_index] + 'fx' + f_name[last_fl_index + 2:]
|
||
else:
|
||
replaced_f_name = f_name
|
||
fx_f_name = os.path.join(fx_knerex_output, replaced_f_name)
|
||
if os.path.exists(fx_f_name):
|
||
os.remove(fx_f_name)
|
||
os.symlink(fl_f_name, fx_f_name)
|
||
|
||
def _move_result(base_m_names):
|
||
for base_m_name in base_m_names:
|
||
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
|
||
fx_p_model, fl_p_model = m_name_dict[fx_m_name][0], m_name_dict[fl_m_name][0]
|
||
p_source = os.path.join(fl_p_model, 'output', 'results')
|
||
path_source = Path(p_source)
|
||
for item in path_source.iterdir():
|
||
if not item.is_dir():
|
||
continue
|
||
# cp mode_730graphopt_piano to ori_p_model
|
||
dir_src = os.path.join(fl_p_model, 'output', 'results', item.name, 'mode_730graphopt_piano')
|
||
dir_dst = os.path.join(fx_p_model, 'output', 'results', item.name, 'mode_730graphopt_piano')
|
||
# replace dir_src with dir_dst
|
||
if os.path.exists(dir_dst):
|
||
if os.path.islink(dir_dst):
|
||
os.remove(dir_dst)
|
||
elif os.path.isdir(dir_dst):
|
||
shutil.rmtree(dir_dst)
|
||
os.symlink(dir_src, dir_dst)
|
||
|
||
def _regression_snr(fx_m_names):
|
||
for fx_m_name in fx_m_names:
|
||
fx_p_model = m_name_dict[fx_m_name][0]
|
||
p_subgraph_output = os.path.join(fx_p_model, 'output')
|
||
p_subgraph_results = os.path.join(p_subgraph_output, 'results')
|
||
dir_output_list = [f.path for f in os.scandir(p_subgraph_results) if f.is_dir()]
|
||
xbu_v2.run_dynasty_snr(dir_output_list)
|
||
xbu_v2.generate_snr_report(p_subgraph_output)
|
||
|
||
def _remove_subgraph_results(base_m_names):
|
||
for base_m_name in base_m_names:
|
||
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
|
||
shutil.rmtree(os.path.join(p_subgraphs, fx_m_name, 'output', 'results'))
|
||
shutil.rmtree(os.path.join(p_subgraphs, fl_m_name, 'output', 'results'))
|
||
|
||
for fx_m_names in _chunker(all_fx_m_names, num_of_processors):
|
||
print("fx_m_names", fx_m_names)
|
||
base_m_names = [item[:-2] for item in fx_m_names]
|
||
fl_m_names = [item + 'fl' for item in base_m_names]
|
||
# 3.1 run fl regression
|
||
_update_regression_case_name(p_bm_txt, fl_m_names)
|
||
success, _, d_status, d_time = _process_parallel_models(conf_mixbw, "regression")
|
||
# 3.2 link fl knerex_output to fx
|
||
_link_knerex_output(base_m_names)
|
||
# 3.3 run fx dynasty& snr
|
||
_update_regression_case_name(p_bm_txt, fx_m_names)
|
||
success, _, d_status, d_time = _process_parallel_models(conf_mixbw, "dynasty")
|
||
# 3.4 move result
|
||
_move_result(base_m_names)
|
||
# 3.5 run snr
|
||
_regression_snr(fx_m_names)
|
||
# 3.6 post-process
|
||
_remove_subgraph_results(base_m_names)
|
||
|
||
time_step3_end = time.perf_counter()
|
||
print("\n======= run subcases regression success =======\n")
|
||
|
||
# step4
|
||
xbu_v2.run_offset_calculation(dump=False)
|
||
time_step4_end = time.perf_counter()
|
||
print("\n======= run offset calculation success =======\n")
|
||
|
||
# step5 run simulation
|
||
xbu_v2.run_snr_simulation(dump_plot=True, dump_diff=False)
|
||
time_step5_end = time.perf_counter()
|
||
print("\n======= run snr simulation success =======\n")
|
||
|
||
# step6 run target
|
||
user_config_target = xbu_v2.run_user_config_generation(dump=False)
|
||
time_step6_end = time.perf_counter()
|
||
print("\n======= run user_config generation success =======\n")
|
||
|
||
gen_fx_model_params_target = gen_fx_model_params.copy()
|
||
gen_fx_model_params_target['quan_config'] = user_config_target
|
||
gen_fx_model_params_target['snr_max_in_pair'] = 1000
|
||
gen_fx_model_params_target['snr_layer_control'] = 0
|
||
success, _, d_status, d_time = gen_fx_model(**gen_fx_model_params_target)
|
||
rename_directory(p_model, p_model_target)
|
||
time_step7_end = time.perf_counter()
|
||
|
||
# release png, snr_debug.txt and target to p_output if MIXBW_DEBUG
|
||
if MIXBW_DEBUG:
|
||
def safe_copytree(src, dst):
|
||
if os.path.exists(dst):
|
||
shutil.rmtree(dst)
|
||
if os.path.exists(src):
|
||
shutil.copytree(src, dst)
|
||
def safe_copy(src, dst):
|
||
if os.path.exists(dst):
|
||
os.remove(dst)
|
||
if os.path.exists(src):
|
||
shutil.copy(src, dst)
|
||
# Copy directories
|
||
for src, dst in [
|
||
(p_model_data_png, p_output_data_png),
|
||
(p_model_weight_png, p_output_weight_png),
|
||
]:
|
||
safe_copytree(src, dst)
|
||
# Copy file
|
||
safe_copy(p_model_log, p_output_log)
|
||
# post-clean
|
||
if clean_cache:
|
||
time.sleep(10) # waiting for test_case() to finish
|
||
shutil.rmtree(p_temp)
|
||
|
||
time_end = time.perf_counter()
|
||
|
||
release_time_dict = {
|
||
'mixbw/subgraph creation:t': time_step1_end - time_start,
|
||
'mixbw/subcases creation:t': time_step2_end - time_step1_end,
|
||
'mixbw/subcases regression total:t': time_step3_end - time_step2_end,
|
||
'mixbw/offset calculatio:t': time_step4_end - time_step3_end,
|
||
'mixbw/snr simulation:t': time_step5_end - time_step4_end,
|
||
'mixbw/user config gneration:t': time_step6_end - time_step5_end,
|
||
'mixbw/target total:t': time_step7_end - time_step6_end,
|
||
'mixbw/post process:t': time_end - time_step7_end,
|
||
'mixbw/total:t': time_end - time_start
|
||
}
|
||
d_status.update(release_time_dict)
|
||
return p_output, d_status
|
||
|
||
def gen_fx_model(
|
||
p_onnx,
|
||
np_txt,
|
||
platform, # choose "520" / "720" / "530" / "630"
|
||
optimize="o0", # choose from "o0", "o1", "o2"
|
||
limit_input_formats=False,
|
||
input_fmt=None, # 可以是None(由编译器决定)、字符串或字典类型。字符串表示统一格式,字典用于指定每个输入节点的格式
|
||
datapath_range_method="percentage",
|
||
data_analysis_pct=0.999, # set to 1.0 if detection model
|
||
data_analysis_16b_pct=0.999999, # set to 1.0 if detection model
|
||
data_analysis_threads=8,
|
||
datapath_bitwidth_mode="int8",
|
||
weight_bitwidth_mode="int8",
|
||
model_in_bitwidth_mode="int8",
|
||
model_out_bitwidth_mode="int8",
|
||
cpu_node_bitwidth_mode="int8", # from 0.24.0
|
||
lut_high_accuracy_mode="2", # from 0.25.0
|
||
per_channel_radix=1,
|
||
percentile=0.001,
|
||
outlier_factor=1.0,
|
||
quantize_mode="default", # choose from "default", "post_sigmoid"
|
||
quan_config=None, # let user to set constraints for quantization.
|
||
qat_config=None, # optional config for qat
|
||
compiler_tiling="default", # changed from fm_cut, From 0.24.0
|
||
p_output="/data1/kneron_flow", # where to put result
|
||
p_cache="/workspace/.tmp/models", # where to put temp folder
|
||
clean_cache=False, # whether to clean cache
|
||
weight_bandwidth=None, # None will use default.
|
||
dma_bandwidth=None, # None will use default.
|
||
unlock_size_limit=False, # set to True if need to use huge onnx file.
|
||
mode=2, # choose from 0/1/2/3. See document for details.
|
||
snr_max_in_pair=1000, # max number pairs of input for dynasty+snr. From 0.26.0
|
||
snr_layer_control=None, # to control how dynsty dump results for snr check. From 0.26.0
|
||
need_copy_prepare_model=True
|
||
):
|
||
"""Generate fix-point model for kneron NPUs.
|
||
|
||
Entrypoint for toolchain. Suppose only 1 model per flow run.
|
||
|
||
Args:
|
||
p_onnx (pathlib / str): path to onnx file. it should have passed through onnx2onnx.py.
|
||
np_txt (dict): a dictionary of list of images in numpy format.
|
||
The keys are the names of input nodes of model.
|
||
e.g., `{"input1": [img1, img2]}`, here img1/img2 are two images -> preprocess -> numpy 3D array (HWC)
|
||
if set to None, will run ip evaluator only, ignore knerenx+dynasty+compiler+csim
|
||
platform:
|
||
- "520"
|
||
- "530"
|
||
- "540"
|
||
- "630"
|
||
- "720"
|
||
- "730"
|
||
- "1140"
|
||
|
||
mode:
|
||
- 0: run ip_evaluator only.
|
||
- 1: run knerex (for quantization) + compiler only.
|
||
- 2: run knerex + compiler + dynasty + csim + bit-true-match check.
|
||
dynasty will inference only 1 image and only check quantization accuracy of output layers.
|
||
- 3: run knerex + compiler + dynasty + csim + bit-true-match check + SNR calcuation.
|
||
dynasty will inference all images (or up to snr_max_in_pair) and dump results of all layers.
|
||
It will provide most detailed analysis but will take much longer time.
|
||
- 55: run knerex + compiler + dynasty + snr. For internal snr analysis.
|
||
- 56: run dynasty + snr.
|
||
|
||
snr_max_in_pair: max number of input to run dynasty+snr. Default is 1000. Change to smaller number to decrease time.
|
||
only valid for mode 3, 55, 56. from 0.26.0
|
||
|
||
snr_layer_control: dynasty will dump results for snr check. New from 0.26.0
|
||
|
||
- None: Each mode had a preset value for dump level. Will use the preset value if snr_layer_control is None. User can specify below to override (in some modes).
|
||
- 0: only dump output layer.
|
||
- 1: only dump cpu layer and output layer.
|
||
- 2: dump all layers.
|
||
|
||
optimize: choose "o0" / "o1" / "o2"
|
||
|
||
- "o0": the knerex generated quantization model.
|
||
- "o1": bias adjust parallel, without fm cut improve
|
||
- "o2": bias adjust parallel, with fm cut improve
|
||
- "o3": bias adjust sequential, no fm cut improve. SLOW! Not recommended.
|
||
- "o4": bias adjust sequential, w fm cut improve. SLOW! Not recommended.
|
||
limit_input_formats: Default False. If set to True, will force all
|
||
input nodes to have only one hardware format.
|
||
If a input node is connected to multiple computational nodes,
|
||
compiler may set different formats for each connection by default.
|
||
input_fmt: Default None.
|
||
- None: 由编译器自动决定输入格式
|
||
- str: 字符串类型,指定所有输入节点使用统一格式
|
||
- dict: 字典类型,用于分别指定每个输入节点的格式
|
||
可用格式请参考 `compiler_v2.get_support_formats(hw_mode)`
|
||
datapath_range_method:
|
||
- "percentage"
|
||
- "mmse"
|
||
data_analysis_pct: It is used to exclude extreme values for int8 mode.
|
||
The default setting is 0.999. It means 0.1% of absolute maximum value
|
||
will be removed among all data. set to 1.0 if detection model.
|
||
(Appliable when datapath_range_method set to "percentage").
|
||
data_analysis_16b_pct: It is used to exclude extreme values for int16 mode.
|
||
The default setting is `0.999999`. It means `0.0001%` of absolute
|
||
maximum value will be removed among all data.
|
||
set to `1.0` if `detection` model.
|
||
(Appliable when datapath_range_method set to "percentage").
|
||
data_analysis_threads: how many threads to use for data analsysis for
|
||
quantization. Default value is 8. Increase if more cpu cores / memory available.
|
||
datapath_bitwidth_mode:
|
||
- "int8", default value. (and only choice for `520`)
|
||
- "int16".
|
||
- "mix balance". A combined bitwidth of int8 and int16, with a preference for int16.
|
||
- "mix light". A combined bitwidth of int8 and int16, with a preference for int8.
|
||
weight_bitwidth_mode:
|
||
- "int8", default value. (and only choice for `520`)
|
||
- "int16".
|
||
- "int4". (not supported in `520`/`720`)
|
||
- "mix balance". A combined bitwidth of int8 and int16, with a preference for int16.
|
||
- "mix light". A combined bitwidth of int8 and int16, with a preference for int8.
|
||
model_in_bitwidth_mode:
|
||
- "int8", default value.
|
||
- "int16". (not supported in `520`).
|
||
model_out_bitwidth_mode:
|
||
- "int8", default value.
|
||
- "int16". (not supported in `520`).
|
||
cpu_node_bitwidth_mode:
|
||
- "int8", default value.
|
||
- "int16". (not supported in `520`).
|
||
lut_high_accuracy_mode:
|
||
- "0": only use one look-up table for Exp/Log.
|
||
- "1": use multiple LUTs to support high accuracy and a wide input range in Exp/Log with a 15-bit bitwidth.
|
||
- "2": (default) support Exp/Log for higher accuracy, but restrict the input andd output range.
|
||
per_channel_radix: default 1. 0 for per layer radix, 1 for per channel radix (better accuracy). Change to 0 for debug. From 0.26.0
|
||
percentile: default value 0.001. Appliable when datapath_range_method set to "mmse".
|
||
Increase this parameter will increase the search range for optimized range.
|
||
outlier_factor: default 1.0. Appliable when datapath_range_method set to "mmse".
|
||
Increase this parameter will give weight on outliers so the final range will increased. Vice vesa.
|
||
quantize_mode:
|
||
- "default": no extra tuning.
|
||
- "post_sigmoid": If a model's output nodes were ALL sigmoids and had been removed, choose "post_sigmoid" for better performance.
|
||
quan_config: Default: `None`. User can pass in a dictionary to
|
||
set constraints for quantization.
|
||
compiler_tiling: methods to search for best feature map cut. choose from:
|
||
|
||
- "default" (default)
|
||
- "deep_search" (slow when calling this function, but will improve inference speed when deployed on NPU.)
|
||
- "partial_graph_search" (search runtime optimization based on partial graph comparison. Less performance than deep_search) Available from 0.29.0.
|
||
|
||
p_output: where to save the generated fix models. Default: "/data1/kneron_flow",
|
||
weight_bandwidth: set the weight bandwidth. Set to `None` to use default value.
|
||
dma_bandwidth: set the dma bandwidth. Set to `None` to use default value.
|
||
|
||
unlock_size_limit:
|
||
|
||
- False (default), will raise exceptions if onnx is larger than 3G.
|
||
- True. the limitation of origin.onnx is 100G.
|
||
|
||
Returns:
|
||
|
||
- success: bool, whether the fix model generation is successful. The caller function should check this value before following actions.
|
||
- released: dict of released files. including the html report.
|
||
- d_status: dict of module status on each submodule.
|
||
- d_time: dict of time on each submodule.
|
||
"""
|
||
# check platforms
|
||
assert platform in fconsts.MODE_HW_LIMIT["inc_in_toolchain"]
|
||
|
||
# working directory
|
||
# NOTE: p_working must be same as specified in template/regression_tc.json/path/cases
|
||
env_workdir = os.environ.get("KTC_WORKDIR")
|
||
if env_workdir and p_cache == "/workspace/.tmp/models":
|
||
p_cache = env_workdir
|
||
p_working = Path(p_cache)
|
||
p_working.mkdir(parents=True, exist_ok=True)
|
||
|
||
# prepare working_model_folder
|
||
env_output = os.environ.get("KTC_OUTPUT_DIR")
|
||
if env_output and p_output == "/data1/kneron_flow":
|
||
p_output = env_output
|
||
p_export = Path(p_output)
|
||
p_export.mkdir(parents=True, exist_ok=True)
|
||
|
||
p_onnx = Path(p_onnx)
|
||
if not p_onnx.exists():
|
||
msg = f"Given onnx {p_onnx} does not exist!"
|
||
raise FileExistsError(msg)
|
||
m_name = futils.remove_appendix(futils.clean_file_name(p_onnx.name))
|
||
if DEBUG:
|
||
print(f"given onnx: {p_onnx.name}, use cleanup model name: {m_name}")
|
||
|
||
# check input shapes
|
||
if mode > 0:
|
||
# no need check npy if ip-eval only
|
||
assert np_txt is not None, f"mode {mode} need a valid np_input."
|
||
# Make sure input npy is dict
|
||
np_txt = futils.load_np_in(np_txt)
|
||
futils.verify_input_shape_onnx_npy(p_onnx, np_txt)
|
||
|
||
platform = int(platform) # platform must be like 520/720/... type: integers
|
||
|
||
opt_map = {
|
||
"o0": "scaled", # no bias adjust, no fmcut
|
||
"o1": "wqbi-p", # bias adjust parallel, no fmcut
|
||
"o2": "wqbi-p", # bias adjust parallel, w fmcut
|
||
"o3": "wqbi-s", # bias adjust sequential, no fmcut. slow. don't use.
|
||
"o4": "wqbi-s", # bias adjust sequential, w fmcut. slow. don't use.
|
||
}
|
||
if optimize not in opt_map:
|
||
msg = f"""Given optimize ({optimize}) not in {list(opt_map.keys())}. """
|
||
raise ValueError(msg)
|
||
|
||
# to keep same interface
|
||
user_config = quantize_mode
|
||
|
||
p_template = p_script / "template" / "regression_tc.json"
|
||
with open(p_template, "r") as f:
|
||
template = json.load(f)
|
||
template["path"]["cases"] = str(p_working)
|
||
|
||
# verify knerex parameters
|
||
# choose from mmse / percentage
|
||
valid_dp_range = ["percentage", "mmse"]
|
||
if datapath_range_method not in valid_dp_range:
|
||
raise ValueError(f"datapath_range_method should be {valid_dp_range}. But got: {datapath_range_method}")
|
||
# Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model)
|
||
# verify: 0.9 <= data_analysis_pct <= data_analysis_16b_pct <= 1.0
|
||
if not 0.9 <= data_analysis_pct <= 1.0:
|
||
raise ValueError(f"data_analysis_pct shoud be between 0.9 and 1.0. But got: {data_analysis_pct}")
|
||
if not 0.9 <= data_analysis_16b_pct <= 1.0:
|
||
raise ValueError(f"data_analysis_16b_pct shoud be between 0.9 and 1.0. But got: {data_analysis_16b_pct}")
|
||
if data_analysis_pct > data_analysis_16b_pct:
|
||
raise ValueError(f"data_analysis_pct should be less than or equal to data_analysis_16b_pct. But got: {data_analysis_pct} > {data_analysis_16b_pct}")
|
||
if not 0 <= percentile <= 0.2:
|
||
raise ValueError(f"percentile must be between 0 and 0.2. But got: {percentile}")
|
||
if (datapath_range_method == "percentage") and (percentile > 0):
|
||
# print(f"WARNING: using '{datapath_range_method}' datapath analysis. Percetile reset to 0.")
|
||
percentile = 0
|
||
if outlier_factor <= 0:
|
||
raise ValueError(f"outlier_factor must > 0. But got: {outlier_factor}")
|
||
|
||
# verify compiler parameters
|
||
valid_tiling = ["default", "deep_search", "partial_graph_search"]
|
||
if compiler_tiling not in valid_tiling:
|
||
raise ValueError(f"compiler_tiling should be in {valid_tiling}. But got {compiler_tiling}")
|
||
# possible override
|
||
if platform == 520:
|
||
# no compiler_tiling for 520
|
||
compiler_tiling = "default"
|
||
if optimize in ["o2", "o4"]:
|
||
# force to use deep_search
|
||
compiler_tiling = "deep_search"
|
||
|
||
# verify input_fmt
|
||
# did not check input node number here.
|
||
check_input_fmt(input_fmt, platform)
|
||
|
||
# create config for whole flow.
|
||
try:
|
||
conf_reg, _ = generate_conf(template,
|
||
platform,
|
||
optimize=opt_map[optimize],
|
||
mode=mode,
|
||
snr_max_in_pair=snr_max_in_pair,
|
||
snr_layer_control=snr_layer_control,
|
||
limit_input_formats=limit_input_formats,
|
||
input_fmt=input_fmt,
|
||
dp_bw=datapath_bitwidth_mode,
|
||
wt_bw=weight_bitwidth_mode,
|
||
in_bw=model_in_bitwidth_mode,
|
||
out_bw=model_out_bitwidth_mode,
|
||
cpu_bw=cpu_node_bitwidth_mode,
|
||
lut_high_accuracy_mode=lut_high_accuracy_mode,
|
||
per_channel_radix=per_channel_radix,
|
||
datapath_range_method=datapath_range_method,
|
||
data_analysis_pct=data_analysis_pct,
|
||
data_analysis_16b_pct=data_analysis_16b_pct,
|
||
data_analysis_threads=data_analysis_threads,
|
||
percentile=percentile,
|
||
outlier_factor=outlier_factor,
|
||
fm_cut=compiler_tiling
|
||
)
|
||
except Exception as e:
|
||
# probably bad configuration
|
||
pp(e)
|
||
raise ValueError("Wrong configuration for ktc.analysis().")
|
||
|
||
# save conf_reg to disk for debug. it will not be used in gen_fx_model later.
|
||
p_json = p_working / "regression_config.json"
|
||
futils.dict2json(conf_reg, p_json)
|
||
|
||
def update_config_ip_val(weight_bandwidth, dma_bandwidth, platform):
|
||
# NOTE: if running multiple platform at same time,
|
||
# one setting for dma_bandwidth / weight_bandwidth may not be accurate
|
||
|
||
# override the ip_evaluator in toolchain.
|
||
# s1.json will based on this file. if necessary.
|
||
ip_config = gen_ip_config(platform, weight_bandwidth, dma_bandwidth)
|
||
res_dir = os.environ.get("KTC_SCRIPT_RES", "/workspace/scripts/res")
|
||
os.makedirs(res_dir, exist_ok=True)
|
||
fn_ip_config = os.path.join(res_dir, f"ip_config_{platform}.json")
|
||
futils.dict2json(ip_config, fn_ip_config)
|
||
|
||
update_config_ip_val(weight_bandwidth, dma_bandwidth, platform)
|
||
|
||
# prepare model folder
|
||
def copy_onnx(p_onnx, p_to):
|
||
shutil.copy(p_onnx, p_to)
|
||
|
||
# copy onnx_data
|
||
p_from = p_onnx.parent / f"{p_onnx.name}_data"
|
||
if p_from.exists():
|
||
p_to_data = p_to.parent / f"{p_to.name}_data"
|
||
if DEBUG:
|
||
print(f"Found {p_from}, copy to {p_to_data}")
|
||
shutil.copy(p_from, p_to_data)
|
||
|
||
def prepare_model(p_user_config=None, quan_config=None, qat_config=None, hw_mode=platform):
|
||
"""Prepare model structure: onnx / input / configs."""
|
||
# our model name convention require "cat/model" structure
|
||
# use `m_name`/`m_name` will limit the flow will run only one category (which include only one model)
|
||
p_model = p_working / m_name / m_name
|
||
if p_model.exists():
|
||
shutil.rmtree(str(p_model))
|
||
p_input = p_model / "input"
|
||
p_input.mkdir(parents=True, exist_ok=False)
|
||
|
||
# copy onnx
|
||
p_to = p_input / f"{m_name}.origin.onnx"
|
||
copy_onnx(p_onnx, p_to)
|
||
|
||
# read onnx for input and get input nodes info
|
||
input_names, output_names, opset = get_ioinfo_onnx(str(p_to))
|
||
|
||
futils.npy2txt(np_txt, input_names, p_input)
|
||
|
||
# copy user_config.json which apply some constraints for better performance.
|
||
if (p_user_config is not None) and p_user_config.exists():
|
||
f_to = p_input / "user_config.json"
|
||
shutil.copy(p_user_config, f_to)
|
||
elif quan_config is not None:
|
||
# BUG: need to merge with existing json (e.g., p_user_config from quantize_mode).
|
||
futils.dict2json(quan_config, p_input / "user_config.json")
|
||
|
||
# save qat_xxx_config.json
|
||
if qat_config:
|
||
futils.dict2json(qat_config, p_input / f"qat_{hw_mode}_config.json")
|
||
|
||
return p_model
|
||
|
||
def prepare_model_only_ip_eval():
|
||
"""Simpler version of prepare_model."""
|
||
# our model name convention require "cat/model" structure
|
||
# use `m_name`/`m_name` will limit the flow will run only one category (which include only one model)
|
||
p_model = p_working / m_name / m_name
|
||
if p_model.exists():
|
||
shutil.rmtree(str(p_model))
|
||
p_input = p_model / "input" # need this folder to be find.
|
||
p_knerex_in = p_input / "knerex_input"
|
||
p_knerex_in.mkdir(parents=True, exist_ok=False)
|
||
|
||
# TODO: only_ip_eval need to support both onnx and bie format!
|
||
# but we need .origin.onnx to be find a model
|
||
ext = p_onnx.suffix
|
||
p_to = p_input / f"{m_name}.origin{ext}"
|
||
# NOTE: only .onnx_data possible. if in .bie format, _data info in included in the bie.
|
||
copy_onnx(p_onnx, p_to)
|
||
|
||
return p_model
|
||
|
||
def run_ip_evaluator_only():
|
||
"""Mode 0 for ip evaluator only."""
|
||
try:
|
||
p_model = prepare_model_only_ip_eval()
|
||
rfs, success_list, df_report, _, _ = run_flow(conf_reg, [m_name])
|
||
success = success_list[0], rfs[0], df_report # only one model
|
||
except Exception as e:
|
||
success = False, None, None
|
||
|
||
return success
|
||
|
||
def copy_release_file(fn_to_release: dict, p_export):
|
||
fn_released = {}
|
||
|
||
if not fn_to_release:
|
||
# no files to copy
|
||
return fn_released
|
||
|
||
for k, fn_from in fn_to_release.items():
|
||
fn_to = p_export / fn_from.name
|
||
shutil.copy(fn_from, fn_to, follow_symlinks=False)
|
||
fn_released[k] = fn_to
|
||
return fn_released
|
||
|
||
def run_btm_and_release(need_copy_prepare_model):
|
||
"""Mode 1/2/3 to generate fix models.
|
||
|
||
TODO: init model in the given folder and run regression in it.
|
||
currently we create in a temp folder then copy to given folder
|
||
"""
|
||
# check user_config
|
||
assert user_config in ["default", "post_sigmoid"]
|
||
user_config_available = {
|
||
"post_sigmoid": p_script / "template" / "user_config_post_sigmoid.json"
|
||
}
|
||
p_user_config = user_config_available.get(user_config, None)
|
||
|
||
if need_copy_prepare_model:
|
||
p_model = prepare_model(p_user_config, quan_config, qat_config)
|
||
# now the model should be ready to generate fx models
|
||
rfs, success_list, df_report, _, _ = run_flow(conf_reg, [m_name])
|
||
|
||
# if release_time_report:
|
||
# # TODO: save time.json. btm_report is a dataframe,
|
||
# df_time = df_only_time(btm_report)
|
||
# temp_pkl = Path(tempfile.gettempdir()) / "time_report.pkl.xz"
|
||
# futils.df2pkl(df_time, str(temp_pkl))
|
||
# rfs[0]["time_report"] = temp_pkl
|
||
|
||
# only one model
|
||
return success_list[0], rfs[0], df_report
|
||
|
||
# force to have same output
|
||
try:
|
||
if mode == 0:
|
||
success, fn_to_release, df_report = run_ip_evaluator_only()
|
||
else:
|
||
success, fn_to_release, df_report = run_btm_and_release(need_copy_prepare_model)
|
||
d_status, d_time = df_row2status(df_report)
|
||
except Exception as e:
|
||
pp(e)
|
||
success, fn_to_release, df_report, d_status, d_time = False, {}, None, {}, {}
|
||
|
||
released = copy_release_file(fn_to_release, p_export)
|
||
|
||
if clean_cache:
|
||
time.sleep(10) # waiting for test_case() to finish
|
||
shutil.rmtree(p_working)
|
||
|
||
return success, released, d_status, d_time
|
||
|
||
|
||
def run_flow(fn_json, keywords=None):
|
||
"""Core function for kneron regression flow.
|
||
|
||
1. init regresson config
|
||
2. run regression on each model, using multi-processing if appliable
|
||
3. generate compiled report on btm and snr
|
||
|
||
Returns: list of released files.
|
||
- btm_report: a dataframe on module status on each model
|
||
- snr_reports: a dictionary
|
||
|
||
- key is platform, e.g., 520, 720, if turned on in this regression
|
||
- value is a dataframe, with snr of output nodes for each model.
|
||
"""
|
||
r = regression(fn_json)
|
||
|
||
time_start = time.perf_counter()
|
||
|
||
selected_case = r.filter_cases(keywords)
|
||
|
||
logger = futils.create_logger("flow", None, r.config["regression"]["logging_level"])
|
||
|
||
# this object is to record status/timestamp of all models long whole regression
|
||
rep = report()
|
||
signal("time_sender").connect(rep.receive_time_usage)
|
||
signal("data_sender").connect(rep.receive_info)
|
||
|
||
if len(selected_case) == 0:
|
||
logger.critical("Error: found 0 test case matching keywords ({}). ".format(keywords))
|
||
exit(1)
|
||
|
||
logger.info("total models are: {}".format(len(selected_case)))
|
||
n_parallel_model = r.config["dynasty"]["n_parallel_model"]
|
||
|
||
is_customer = not r.config["path"]["internal"]
|
||
# TODO: this condition may be wrong
|
||
is_big_model = any(["big_model" in str(test_case_path) for test_case_path in selected_case])
|
||
print_each_model = n_parallel_model == 1 and (is_customer or is_big_model)
|
||
|
||
if n_parallel_model > 1:
|
||
p = multiprocessing.Pool(n_parallel_model)
|
||
ts_w_c = [(sc, r.config) for sc in selected_case]
|
||
# NOTE: the run_single_case must be serializable. it should be on top level, not local function
|
||
w = p.map_async(run_single_case, ts_w_c) # , callback=rep.add_err_record
|
||
w.wait()
|
||
# collect reports and released files
|
||
success_list = []
|
||
for e in [a[0] for a in w.get()]:
|
||
rep.add_err_record([e])
|
||
success_list.append(fconsts.is_success(e))
|
||
released_files = [a[1] for a in w.get()]
|
||
p.close()
|
||
else:
|
||
# only 1 model at 1 time
|
||
# usually SNR regression & toolchain will be in this setting.
|
||
released_files = []
|
||
success_list = []
|
||
for one_case in selected_case:
|
||
e, rel_fn = run_single_case((one_case, r.config))
|
||
# if run_single_case failed, rel_fn will be None
|
||
released_files.append(rel_fn)
|
||
rep.add_err_record([e])
|
||
success_list.append(fconsts.is_success(e))
|
||
if print_each_model:
|
||
btm_report, btm_summary = rep.compile(r.report_csv)
|
||
snr_reports = generate_snr_reports(r, rep, selected_case)
|
||
|
||
# this run is finished.
|
||
time_end = time.perf_counter()
|
||
time_used_m = max(int((time_end - time_start) / 60), 1)
|
||
r.commit_info.append(f"Duration for this run: {time_used_m} minutes\n")
|
||
r.write_info()
|
||
|
||
# generate reports for whole regression. not for only 1 test case.
|
||
if not print_each_model:
|
||
# final print of results. skip if print already.
|
||
# compile report on errors
|
||
btm_report, btm_summary = rep.compile(r.report_csv)
|
||
# compile report on snr when piano_dynasty run
|
||
snr_reports = generate_snr_reports(r, rep, selected_case)
|
||
|
||
return released_files, success_list, btm_report, btm_summary, snr_reports
|
||
|
||
|
||
def check_reg_success_by_keys(d):
|
||
"""Quick way to examine a model flow executed successfully or not."""
|
||
for k in d.keys():
|
||
if k.endswith("/bie"):
|
||
# at least one bie is released.
|
||
return True
|
||
# failed. but at leased report.html / report.json released.
|
||
# MAYBE: use len(d) > 2
|
||
return False
|
||
|
||
|
||
if __name__ == "__main__":
|
||
arguments = docopt(__doc__, version="run regression 1.2")
|
||
# print(arguments)
|
||
|
||
# check commit folder
|
||
|
||
fn_json = Path(arguments["<fn_json>"])
|
||
if not fn_json.exists():
|
||
print(f"Given config file: {fn_json} does not exist. quit...")
|
||
exit(1)
|
||
|
||
keywords = arguments["<keys>"]
|
||
|
||
released_files, success_list, btm_report, btm_summary, snr_reports = run_flow(fn_json, keywords)
|
||
|
||
n_good = len([a for a in success_list if a])
|
||
n_all = len(success_list)
|
||
print(f"Successed cases are {n_good}/{n_all} for {fn_json.name}.")
|
||
print(f"▤"*140 + "\n\n\n")
|
||
|
||
# check all cases success or not. needed in CI.
|
||
if arguments["--all-pass"]:
|
||
if not all(success_list):
|
||
exit(99)
|
||
|
||
# otherwise will always return 0 even if regression failed.
|
||
exit(0)
|