1190 lines
50 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#! /usr/bin/env python3
""" CLI interface for regression
Usage:
run.py [--all-pass] <fn_json> [<keys>...]
run.py (-h | --help)
run.py --version
Options:
--all-pass If all pass, exit with 0, otherwise with 1
-h --help Show this screen.
--version Show version.
"""
import shutil
import os
import time
import tempfile
import multiprocessing
from pathlib import Path
import json
from blinker import signal
from docopt import docopt
import sys_flow_v2.flow_utils as futils
import sys_flow_v2.flow_constants as fconsts
from sys_flow_v2.report_utils import report, generate_snr_reports, df_row2status
from sys_flow_v2.exceptions import RegressionError
from sys_flow_v2.test_case import test_case
from sys_flow_v2.regression import regression
from sys_flow_v2.compiler_v2 import check_input_fmt
from sys_flow_v2.onnx_op_stats import get_ioinfo_onnx
from sys_flow_v2.compiler_config import gen_ip_config
from sys_flow_v2.gen_regression_json import generate_conf
from sys_flow_v2 import mix_bitwidth_utils as mbu
from sys_flow_v2 import mix_bitwidth_utils_v2 as mbu_v2
import snoop
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
MIXBW_DEBUG = True if os.environ.get("MIXBW_DEBUG", False) else False
snoop.install(enabled=DEBUG)
p_script = Path(__file__).resolve().parent
def rename_directory(source_path, target_path):
source_path = Path(source_path)
target_path = Path(target_path)
# Check if the source directory exists
if not source_path.exists():
raise FileNotFoundError(f"The directory {source_path} does not exist.")
# Remove the target directory if it exists
if target_path.exists():
shutil.rmtree(target_path)
target_path.parent.mkdir(parents=True, exist_ok=True)
try:
os.rename(source_path, target_path)
print(f"Directory renamed from {source_path} to {target_path}")
except Exception as e:
print(f"Directory rename failed: {e}")
def run_single_case(ts_w_r):
"""Use for multiprocess call.
A error is returned to callback in main process then combined into report.
(RETURN other information if needed. This is the only way to sync messages.)
This function must in top level, should not be embeded in another function.
the input ts_w_r must be one parameter.
"""
test_case_path, r_config = ts_w_r
try:
i_case = test_case(test_case_path, r_config)
released_files = i_case.run_flow()
# success!
return RegressionError("general/Success", i_case.model_id), released_files
except Exception as e:
# NOTE: if any submodule failed, it will reach here.
try:
# free up first
i_case.clean_opt()
released_files = i_case.save_summary()
# released_files is probably only the model_fx_html / model_fx_json
i_case.post_clean_up()
if DEBUG:
print(f"run_flow failed. Clean up {i_case}")
print(e)
return e, released_files
except:
return e, None
def gen_opt_model(
p_onnx,
np_txt,
platform=730, # choose "520" / "720" / "530" / "630"
optimize="o0", # choose from "o0", "o1", "02"
limit_input_formats=False,
datapath_range_method="percentage",
data_analysis_pct=0.999, # set to 1.0 if detection model
data_analysis_16b_pct=0.999999, # set to 1.0 if detection model
data_analysis_threads=8,
datapath_bitwidth_mode="int8",
weight_bitwidth_mode="int8",
model_in_bitwidth_mode="int8",
model_out_bitwidth_mode="int8",
cpu_node_bitwidth_mode="int8", # from 0.24.0
lut_high_accuracy_mode="2", # from 0.25.0
percentile=0.001,
outlier_factor=1.0,
quantize_mode="default", # choose from "default", "post_sigmoid"
quan_config=None, # let user to set constraints for quantization.
qat_config=None, # optional config for qat
compiler_tiling="default", # changed from fm_cut, since 0.24.0
p_output="/data1/kneron_flow",
weight_bandwidth=None, # None will use default.
unlock_size_limit=False, # set to True if need to use huge onnx file.
mode=2, # choose from 0/1/2/3. See document for details.
snr_max_in_pair=3, # max number pairs of input for dynasty+snr
snr_layer_control=None, # to control how dynsty dump results for snr check. From 0.26.0
opt_model_json=None, # if given, will use this json to generate opt model. From 0.26.0
target_snr=None # generate fastest fx model and user_config to reach target snr. From 0.26.0
):
"""
1. run a output16 and output light
2. get all conv value_info name via output_16
3. run single 8bit cases
4. run all cumulative cases and plot to culmulative snr plot
5. if tagets snr exists:
generate a new user_config.json and run -> final.opt.onnx
"""
env_output = os.environ.get("KTC_OUTPUT_DIR")
if env_output and p_output == "/data1/kneron_flow":
p_output = env_output
assert platform == 730, f"Only platform 730 is supported."
assert optimize == "o0", f"Only optimize o0 is supported."
gen_fx_model_params = {
"p_onnx": p_onnx,
"np_txt": np_txt,
"platform": platform,
"optimize": optimize,
"datapath_bitwidth_mode": "int16",
"weight_bitwidth_mode": "int16",
"model_in_bitwidth_mode": "int16",
"model_out_bitwidth_mode": "int16",
"cpu_node_bitwidth_mode": "int16",
"lut_high_accuracy_mode": "2",
"quan_config": None,
"p_output": p_output,
"mode": 55,
"snr_max_in_pair": snr_max_in_pair,
"snr_layer_control": 0
}
p_onnx = Path(p_onnx)
m_name = p_onnx.stem
if m_name.endswith(".origin"):
m_name = m_name.replace(".origin", "")
if not p_onnx.exists():
msg = f"Given onnx {p_onnx} does not exist!"
raise FileExistsError(msg)
env_workdir = os.environ.get("KTC_WORKDIR")
p_working = Path(env_workdir or "/workspace/.tmp/models")
p_model = p_working / m_name / m_name
p_model_16 = p_working / f"{m_name}_16" / f"{m_name}_16"
p_model_light = p_working / f"{m_name}_light" / f"{m_name}_light"
p_model_target = p_working / f"{m_name}_target" / f"{m_name}_target"
p_onnx_decomposed_16 = p_model_16 /"output"/ "knerex_730" / f"{m_name}.kdp730.decomposed.onnx"
opt_model_data = None
if opt_model_json is not None:
with open(opt_model_json, 'r') as file:
opt_model_data = json.load(file)
# 1.1 run output 16
# load from p_working directory, guarantee p_model_16 exists
success_16, [snr_16, cfunc_time_16] = mbu.load_single_rst(p_model_16)
if not success_16.value:
gen_fx_model_params_16 = gen_fx_model_params.copy()
# run output 16
gen_fx_model(**gen_fx_model_params_16)
rename_directory(p_model, p_model_16)
success_16, [snr_16, cfunc_time_16] = mbu.load_single_rst(p_model_16)
assert success_16.value, f"Failed to run output 16 for {m_name}"
print('=====================================')
print('success_16', success_16.name)
print("snr_16", snr_16)
print("cfunc_time_16", cfunc_time_16)
print('=====================================')
# # 1.2 run output mix light
success_light, [snr_light, cfunc_time_light] = mbu.load_json_rst(opt_model_data, 'mix light')
if not success_light.value:
success_light, [snr_light, cfunc_time_light] = mbu.load_single_rst(p_model_light)
if not success_light.value:
gen_fx_model_params_light = gen_fx_model_params.copy()
gen_fx_model_params_light['datapath_bitwidth_mode']='mix light'
gen_fx_model(**gen_fx_model_params_light)
rename_directory(p_model, p_model_light)
success_light, [snr_light, cfunc_time_light] = mbu.load_single_rst(p_model_light)
assert success_light.value, f"Failed to run output light for {m_name}"
print('=====================================')
print('success_light', success_light.name)
print("snr_light", snr_light)
print("cfunc_time_light", cfunc_time_light)
print('=====================================')
# 2. get all conv value_info name via output_16
p_single = p_working / f"{m_name}_single"
conv_nodes = mbu.get_conv_nodes_skip_input(p_onnx_decomposed_16)
snr_list_single, cfunc_time_list_single = [], []
print("conv_nodes", [node.name for node in conv_nodes])
print("conv_nodes ", len(conv_nodes))
# 3. run single 8bit case
success_single, [snr_single, cfunc_time_single] = mbu.load_json_rst(opt_model_data, 'single')
if success_single.value and len(snr_single) == len(conv_nodes):
snr_list_single = snr_single
cfunc_time_list_single = cfunc_time_single
else:
# re-run single 8bit case
for node in conv_nodes:
node_name = node.name.replace("/", "_")
user_config_single = mbu.generate_user_config([node.input[0]])
p_single_model = p_single / node_name
success_single, [snr, cfunc_time] = mbu.load_single_rst(p_single_model)
mbu.post_clean(p_single_model)
if not success_single.value:
gen_fx_model_params_single = gen_fx_model_params.copy()
gen_fx_model_params_single['quan_config'] = user_config_single
gen_fx_model(**gen_fx_model_params_single)
rename_directory(p_model, p_single_model)
success_single, [snr, cfunc_time] = mbu.load_single_rst(p_single_model)
mbu.post_clean(p_single_model)
assert success_single.value, f"Failed to run output single 8bit for {m_name}"
snr_list_single.append(snr)
cfunc_time_list_single.append(cfunc_time)
print('=====================================')
print('success_single', success_single.name)
print("snr_list_single ", snr_list_single)
print("cfunc_time_list_single ", cfunc_time_list_single)
print('=====================================')
# 4 run cumculative 8bit case
# 4.1 get the dp_name_list in sorted order
dp_name_list = [node.input[0] for node in conv_nodes]
sorted_dp_idx_list = mbu.get_sorted_dp_idx_by_snr(dp_name_list, snr_list_single)
print("sorted_dp_idx", sorted_dp_idx_list)
p_cum = p_working / f"{m_name}_cum"
snr_list_cum, cfunc_time_list_cum = [], []
length, step = len(dp_name_list), max(len(dp_name_list)//40+1, 1)
# 4.2 run cumulative 8bit case
success_cum, [snr_cum, cfunc_time_cum] = mbu.load_json_rst(opt_model_data, 'cumulative')
if success_cum.value and len(snr_cum) == len(range(0, length, step)):
snr_list_cum = snr_cum
cfunc_time_list_cum = cfunc_time_cum
else:
for i in range(0, length, step):
begin, end = 0, min(i+step, length)
user_config_cum = mbu.generate_user_config_by_idx(dp_name_list, sorted_dp_idx_list, begin, end)
print(begin, end)
p_cum_model = p_cum / str(i)
success_cum, [snr, cfunc_time] = mbu.load_single_rst(p_cum_model)
mbu.post_clean(p_cum_model)
if not success_cum.value:
gen_fx_model_params_cum = gen_fx_model_params.copy()
gen_fx_model_params_cum['quan_config'] = user_config_cum
gen_fx_model(**gen_fx_model_params_cum)
rename_directory(p_model, p_cum_model)
success_cum, [snr, cfunc_time] = mbu.load_single_rst(p_cum_model)
mbu.post_clean(p_cum_model)
assert success_cum.value, f"Failed to run output cumulative 8bit for {m_name}"
snr_list_cum.append(snr)
cfunc_time_list_cum.append(cfunc_time)
print('=====================================')
print("success_cum", success_cum.name)
print("snr_list_cum ", snr_list_cum)
print("cfunc_time_list_cum ", cfunc_time_list_cum)
print('=====================================')
# save gen_opt_json
opt_model_json = mbu.dump_opt_json(snr_list_single, cfunc_time_list_single, snr_list_cum, cfunc_time_list_cum, snr_16, cfunc_time_16, snr_light, cfunc_time_light, sorted_dp_idx_list, m_name, p_working)
mbu.plot_cum_snr_cfunc_time(opt_model_json, p_working)
# 5. generate a new user_config.json and run -> target fx model
if target_snr is not None:
user_config_target = mbu.generate_user_config_by_target_snr(snr_16, snr_light, snr_list_cum, target_snr, dp_name_list, sorted_dp_idx_list, step)
if user_config_target is not None:
# run with user_config_tagret_snr
gen_fx_model_params_target = gen_fx_model_params.copy()
gen_fx_model_params_target['quan_config'] = user_config_target
gen_fx_model(**gen_fx_model_params_target)
rename_directory(p_model, p_model_target)
success_target, [snr_target, cfunc_time_target] = mbu.load_single_rst(p_model_target)
assert success_target.value, f"Failed to run output target for {m_name}"
print('=====================================')
print('success_target', success_target.name)
print("snr_target", snr_target)
print("cfunc_time_target", cfunc_time_target)
print('=====================================')
return p_output
def gen_opt_model_v2(
p_onnx,
np_txt,
data_analysis_threads=8,
weight_bitwidth_mode="int16",
mixbw_mode='data', # ['data', 'weight', 'both']
flops_ratio=0.5,
p_output="/data1/kneron_flow",
p_cache="/workspace/.tmp/models",
clean_cache=False, # whether to clean cache
num_of_processors=16,
snr_max_in_pair=3, # max number pairs of input for dynasty+snr
):
"""
使用15bit的conv浮点运算量占比flops_ratio=0, 所有conv使用8bitflops_ratio=1.0 所有conv使用15bit.
"""
env_output = os.environ.get("KTC_OUTPUT_DIR")
env_workdir = os.environ.get("KTC_WORKDIR")
if env_output and p_output == "/data1/kneron_flow":
p_output = env_output
if env_workdir and p_cache == "/workspace/.tmp/models":
p_cache = env_workdir
gen_fx_model_params = {
"p_onnx": p_onnx,
"np_txt": np_txt,
"platform": 730,
"optimize": "o0",
"data_analysis_threads": data_analysis_threads,
"datapath_bitwidth_mode": "int16",
"weight_bitwidth_mode": weight_bitwidth_mode,
"model_in_bitwidth_mode": "int16",
"model_out_bitwidth_mode": "int16",
"cpu_node_bitwidth_mode": "int16",
"lut_high_accuracy_mode": "2",
"quan_config": None,
"p_output": p_output,
"p_cache": p_cache,
"clean_cache": False,
"mode": 55,
"snr_max_in_pair": snr_max_in_pair,
"snr_layer_control": 2
}
p_onnx = Path(p_onnx)
m_name = p_onnx.stem
time_start = time.perf_counter()
if m_name.endswith(".origin"):
m_name = m_name.replace(".origin", "")
if not p_onnx.exists():
msg = f"Given onnx {p_onnx} does not exist!"
raise FileExistsError(msg)
p_temp = Path(p_cache)
p_subgraphs = Path(os.path.join(p_temp, f'ng5_subgraphs'))
p_bm_txt = p_temp / f"bm_mixbw.txt"
p_model = p_temp / m_name / m_name
p_model_16 = p_temp / f"{m_name}_16" / f"{m_name}_16"
p_model_target = p_temp / f"{m_name}_target" / f"{m_name}_target"
p_model_data_png = p_subgraphs / "data"
p_model_weight_png = p_subgraphs / "weight"
p_model_log = p_subgraphs / "snr_debug.txt"
p_model_release = p_temp / "release"
p_output_target = os.path.join(p_output, f"{m_name}_target")
p_output_log = os.path.join(p_output, f"snr_debug.txt")
p_output_data_png = os.path.join(p_output, f"data")
p_output_weight_png = os.path.join(p_output, f"weight")
# run output 16
gen_fx_model_params_16 = gen_fx_model_params.copy()
gen_fx_model_params_16['p_output'] = p_model_release
gen_fx_model(**gen_fx_model_params_16)
rename_directory(p_model, p_model_16)
xbu_v2 = mbu_v2.MixBitwidthUtilsFast(workspace_dir=p_model_16,
model_dir='.',
subgraph_dir=p_temp,
model_name=m_name,
f_name_16='output',
mixbw_mode=mixbw_mode,
flops_ratio=flops_ratio
)
# step1
path_subgraphs = Path(p_subgraphs)
if os.path.exists(p_subgraphs):
shutil.rmtree(p_subgraphs)
p_subgraphs.mkdir(parents=True, exist_ok=True)
xbu_v2.run_subgraph_creation()
time_step1_end = time.perf_counter()
print("\n======= run subgraph creation success =======\n")
# step2
xbu_v2.run_sub_cases_creation(float_input=False, pre_clean=True)
xbu_v2.run_sub_cases_creation(float_input=True, pre_clean=True)
time_step2_end = time.perf_counter()
print("\n======= run subcases creation success =======\n")
# step3
m_name_dict = xbu_v2.get_m_name_dict()
all_fx_m_names = [item for item in m_name_dict.keys() if item.endswith('fx')]
def _process_parallel_models(config, task_mode):
valid_modes = ("regression", "dynasty")
if task_mode not in valid_modes:
raise ValueError(f"Invalid task_mode: {task_mode}. Must be one of {valid_modes}")
mode_configurations = {
"regression": {
"module_run": {
"piano_knerex": True,
"compiler_piano": True
},
"pre_clean_up": {
"dynasty_output": False,
"all_output": True
}
},
"dynasty": {
"module_run": {
"piano_knerex": False,
"compiler_piano": False
},
"pre_clean_up": {
"dynasty_output": True,
"all_output": False
}
}
}
for section, settings in mode_configurations[task_mode].items():
config[section].update(settings)
rfs, success_list, df_report, *_ = run_flow(config)
n_good = len([a for a in success_list if a])
n_all = len(success_list)
assert n_good == n_all, f"Successed cases are {n_good}/{n_all}"
d_status, d_time = df_row2status(df_report)
return success_list, rfs[0], d_status, d_time
p_mixbw_template = p_script / "template" / "regression_mixbw.json"
with open(p_mixbw_template, "r") as f:
conf_mixbw = json.load(f)
conf_mixbw["path"]["cases"] = str(p_temp)
conf_mixbw["path"]["search"] = ["-f", str(p_bm_txt)]
conf_mixbw["knerex"]["data_analysis_threads"] = data_analysis_threads
conf_mixbw["dynasty"]["num_input_samples"] = snr_max_in_pair
conf_mixbw["dynasty"]["n_parallel_model"] = num_of_processors
def _chunker(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
def _update_regression_case_name(bm_path, m_names):
with open(bm_path, 'w') as f:
for m_name in m_names:
f.write(f'{m_name}\n')
def _link_knerex_output(base_m_names):
for base_m_name in base_m_names:
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
fl_knerex_output = os.path.join(p_subgraphs, fl_m_name, 'output', 'knerex_730')
fx_knerex_output = os.path.join(p_subgraphs, fx_m_name, 'output', 'knerex_730')
assert os.path.exists(fl_knerex_output), f'fl_knerex_output {fl_knerex_output} does not exist'
os.makedirs(fx_knerex_output, exist_ok=True)
assert os.path.exists(fx_knerex_output), f'fx_knerex_output {fx_knerex_output} does not exist'
for f_name in os.listdir(fl_knerex_output):
fl_f_name = os.path.join(fl_knerex_output, f_name)
# Find the position of the last occurrence of '_fl'
last_fl_index = f_name.rfind('fl')
if last_fl_index != -1:
replaced_f_name = f_name[:last_fl_index] + 'fx' + f_name[last_fl_index + 2:]
else:
replaced_f_name = f_name
fx_f_name = os.path.join(fx_knerex_output, replaced_f_name)
if os.path.exists(fx_f_name):
os.remove(fx_f_name)
os.symlink(fl_f_name, fx_f_name)
def _move_result(base_m_names):
for base_m_name in base_m_names:
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
fx_p_model, fl_p_model = m_name_dict[fx_m_name][0], m_name_dict[fl_m_name][0]
p_source = os.path.join(fl_p_model, 'output', 'results')
path_source = Path(p_source)
for item in path_source.iterdir():
if not item.is_dir():
continue
# cp mode_730graphopt_piano to ori_p_model
dir_src = os.path.join(fl_p_model, 'output', 'results', item.name, 'mode_730graphopt_piano')
dir_dst = os.path.join(fx_p_model, 'output', 'results', item.name, 'mode_730graphopt_piano')
# replace dir_src with dir_dst
if os.path.exists(dir_dst):
if os.path.islink(dir_dst):
os.remove(dir_dst)
elif os.path.isdir(dir_dst):
shutil.rmtree(dir_dst)
os.symlink(dir_src, dir_dst)
def _regression_snr(fx_m_names):
for fx_m_name in fx_m_names:
fx_p_model = m_name_dict[fx_m_name][0]
p_subgraph_output = os.path.join(fx_p_model, 'output')
p_subgraph_results = os.path.join(p_subgraph_output, 'results')
dir_output_list = [f.path for f in os.scandir(p_subgraph_results) if f.is_dir()]
xbu_v2.run_dynasty_snr(dir_output_list)
xbu_v2.generate_snr_report(p_subgraph_output)
def _remove_subgraph_results(base_m_names):
for base_m_name in base_m_names:
fx_m_name, fl_m_name = f'{base_m_name}fx', f'{base_m_name}fl'
shutil.rmtree(os.path.join(p_subgraphs, fx_m_name, 'output', 'results'))
shutil.rmtree(os.path.join(p_subgraphs, fl_m_name, 'output', 'results'))
for fx_m_names in _chunker(all_fx_m_names, num_of_processors):
print("fx_m_names", fx_m_names)
base_m_names = [item[:-2] for item in fx_m_names]
fl_m_names = [item + 'fl' for item in base_m_names]
# 3.1 run fl regression
_update_regression_case_name(p_bm_txt, fl_m_names)
success, _, d_status, d_time = _process_parallel_models(conf_mixbw, "regression")
# 3.2 link fl knerex_output to fx
_link_knerex_output(base_m_names)
# 3.3 run fx dynasty& snr
_update_regression_case_name(p_bm_txt, fx_m_names)
success, _, d_status, d_time = _process_parallel_models(conf_mixbw, "dynasty")
# 3.4 move result
_move_result(base_m_names)
# 3.5 run snr
_regression_snr(fx_m_names)
# 3.6 post-process
_remove_subgraph_results(base_m_names)
time_step3_end = time.perf_counter()
print("\n======= run subcases regression success =======\n")
# step4
xbu_v2.run_offset_calculation(dump=False)
time_step4_end = time.perf_counter()
print("\n======= run offset calculation success =======\n")
# step5 run simulation
xbu_v2.run_snr_simulation(dump_plot=True, dump_diff=False)
time_step5_end = time.perf_counter()
print("\n======= run snr simulation success =======\n")
# step6 run target
user_config_target = xbu_v2.run_user_config_generation(dump=False)
time_step6_end = time.perf_counter()
print("\n======= run user_config generation success =======\n")
gen_fx_model_params_target = gen_fx_model_params.copy()
gen_fx_model_params_target['quan_config'] = user_config_target
gen_fx_model_params_target['snr_max_in_pair'] = 1000
gen_fx_model_params_target['snr_layer_control'] = 0
success, _, d_status, d_time = gen_fx_model(**gen_fx_model_params_target)
rename_directory(p_model, p_model_target)
time_step7_end = time.perf_counter()
# release png, snr_debug.txt and target to p_output if MIXBW_DEBUG
if MIXBW_DEBUG:
def safe_copytree(src, dst):
if os.path.exists(dst):
shutil.rmtree(dst)
if os.path.exists(src):
shutil.copytree(src, dst)
def safe_copy(src, dst):
if os.path.exists(dst):
os.remove(dst)
if os.path.exists(src):
shutil.copy(src, dst)
# Copy directories
for src, dst in [
(p_model_data_png, p_output_data_png),
(p_model_weight_png, p_output_weight_png),
]:
safe_copytree(src, dst)
# Copy file
safe_copy(p_model_log, p_output_log)
# post-clean
if clean_cache:
time.sleep(10) # waiting for test_case() to finish
shutil.rmtree(p_temp)
time_end = time.perf_counter()
release_time_dict = {
'mixbw/subgraph creation:t': time_step1_end - time_start,
'mixbw/subcases creation:t': time_step2_end - time_step1_end,
'mixbw/subcases regression total:t': time_step3_end - time_step2_end,
'mixbw/offset calculatio:t': time_step4_end - time_step3_end,
'mixbw/snr simulation:t': time_step5_end - time_step4_end,
'mixbw/user config gneration:t': time_step6_end - time_step5_end,
'mixbw/target total:t': time_step7_end - time_step6_end,
'mixbw/post process:t': time_end - time_step7_end,
'mixbw/total:t': time_end - time_start
}
d_status.update(release_time_dict)
return p_output, d_status
def gen_fx_model(
p_onnx,
np_txt,
platform, # choose "520" / "720" / "530" / "630"
optimize="o0", # choose from "o0", "o1", "o2"
limit_input_formats=False,
input_fmt=None, # 可以是None由编译器决定、字符串或字典类型。字符串表示统一格式字典用于指定每个输入节点的格式
datapath_range_method="percentage",
data_analysis_pct=0.999, # set to 1.0 if detection model
data_analysis_16b_pct=0.999999, # set to 1.0 if detection model
data_analysis_threads=8,
datapath_bitwidth_mode="int8",
weight_bitwidth_mode="int8",
model_in_bitwidth_mode="int8",
model_out_bitwidth_mode="int8",
cpu_node_bitwidth_mode="int8", # from 0.24.0
lut_high_accuracy_mode="2", # from 0.25.0
per_channel_radix=1,
percentile=0.001,
outlier_factor=1.0,
quantize_mode="default", # choose from "default", "post_sigmoid"
quan_config=None, # let user to set constraints for quantization.
qat_config=None, # optional config for qat
compiler_tiling="default", # changed from fm_cut, From 0.24.0
p_output="/data1/kneron_flow", # where to put result
p_cache="/workspace/.tmp/models", # where to put temp folder
clean_cache=False, # whether to clean cache
weight_bandwidth=None, # None will use default.
dma_bandwidth=None, # None will use default.
unlock_size_limit=False, # set to True if need to use huge onnx file.
mode=2, # choose from 0/1/2/3. See document for details.
snr_max_in_pair=1000, # max number pairs of input for dynasty+snr. From 0.26.0
snr_layer_control=None, # to control how dynsty dump results for snr check. From 0.26.0
need_copy_prepare_model=True
):
"""Generate fix-point model for kneron NPUs.
Entrypoint for toolchain. Suppose only 1 model per flow run.
Args:
p_onnx (pathlib / str): path to onnx file. it should have passed through onnx2onnx.py.
np_txt (dict): a dictionary of list of images in numpy format.
The keys are the names of input nodes of model.
e.g., `{"input1": [img1, img2]}`, here img1/img2 are two images -> preprocess -> numpy 3D array (HWC)
if set to None, will run ip evaluator only, ignore knerenx+dynasty+compiler+csim
platform:
- "520"
- "530"
- "540"
- "630"
- "720"
- "730"
- "1140"
mode:
- 0: run ip_evaluator only.
- 1: run knerex (for quantization) + compiler only.
- 2: run knerex + compiler + dynasty + csim + bit-true-match check.
dynasty will inference only 1 image and only check quantization accuracy of output layers.
- 3: run knerex + compiler + dynasty + csim + bit-true-match check + SNR calcuation.
dynasty will inference all images (or up to snr_max_in_pair) and dump results of all layers.
It will provide most detailed analysis but will take much longer time.
- 55: run knerex + compiler + dynasty + snr. For internal snr analysis.
- 56: run dynasty + snr.
snr_max_in_pair: max number of input to run dynasty+snr. Default is 1000. Change to smaller number to decrease time.
only valid for mode 3, 55, 56. from 0.26.0
snr_layer_control: dynasty will dump results for snr check. New from 0.26.0
- None: Each mode had a preset value for dump level. Will use the preset value if snr_layer_control is None. User can specify below to override (in some modes).
- 0: only dump output layer.
- 1: only dump cpu layer and output layer.
- 2: dump all layers.
optimize: choose "o0" / "o1" / "o2"
- "o0": the knerex generated quantization model.
- "o1": bias adjust parallel, without fm cut improve
- "o2": bias adjust parallel, with fm cut improve
- "o3": bias adjust sequential, no fm cut improve. SLOW! Not recommended.
- "o4": bias adjust sequential, w fm cut improve. SLOW! Not recommended.
limit_input_formats: Default False. If set to True, will force all
input nodes to have only one hardware format.
If a input node is connected to multiple computational nodes,
compiler may set different formats for each connection by default.
input_fmt: Default None.
- None: 由编译器自动决定输入格式
- str: 字符串类型,指定所有输入节点使用统一格式
- dict: 字典类型,用于分别指定每个输入节点的格式
可用格式请参考 `compiler_v2.get_support_formats(hw_mode)`
datapath_range_method:
- "percentage"
- "mmse"
data_analysis_pct: It is used to exclude extreme values for int8 mode.
The default setting is 0.999. It means 0.1% of absolute maximum value
will be removed among all data. set to 1.0 if detection model.
(Appliable when datapath_range_method set to "percentage").
data_analysis_16b_pct: It is used to exclude extreme values for int16 mode.
The default setting is `0.999999`. It means `0.0001%` of absolute
maximum value will be removed among all data.
set to `1.0` if `detection` model.
(Appliable when datapath_range_method set to "percentage").
data_analysis_threads: how many threads to use for data analsysis for
quantization. Default value is 8. Increase if more cpu cores / memory available.
datapath_bitwidth_mode:
- "int8", default value. (and only choice for `520`)
- "int16".
- "mix balance". A combined bitwidth of int8 and int16, with a preference for int16.
- "mix light". A combined bitwidth of int8 and int16, with a preference for int8.
weight_bitwidth_mode:
- "int8", default value. (and only choice for `520`)
- "int16".
- "int4". (not supported in `520`/`720`)
- "mix balance". A combined bitwidth of int8 and int16, with a preference for int16.
- "mix light". A combined bitwidth of int8 and int16, with a preference for int8.
model_in_bitwidth_mode:
- "int8", default value.
- "int16". (not supported in `520`).
model_out_bitwidth_mode:
- "int8", default value.
- "int16". (not supported in `520`).
cpu_node_bitwidth_mode:
- "int8", default value.
- "int16". (not supported in `520`).
lut_high_accuracy_mode:
- "0": only use one look-up table for Exp/Log.
- "1": use multiple LUTs to support high accuracy and a wide input range in Exp/Log with a 15-bit bitwidth.
- "2": (default) support Exp/Log for higher accuracy, but restrict the input andd output range.
per_channel_radix: default 1. 0 for per layer radix, 1 for per channel radix (better accuracy). Change to 0 for debug. From 0.26.0
percentile: default value 0.001. Appliable when datapath_range_method set to "mmse".
Increase this parameter will increase the search range for optimized range.
outlier_factor: default 1.0. Appliable when datapath_range_method set to "mmse".
Increase this parameter will give weight on outliers so the final range will increased. Vice vesa.
quantize_mode:
- "default": no extra tuning.
- "post_sigmoid": If a model's output nodes were ALL sigmoids and had been removed, choose "post_sigmoid" for better performance.
quan_config: Default: `None`. User can pass in a dictionary to
set constraints for quantization.
compiler_tiling: methods to search for best feature map cut. choose from:
- "default" (default)
- "deep_search" (slow when calling this function, but will improve inference speed when deployed on NPU.)
- "partial_graph_search" (search runtime optimization based on partial graph comparison. Less performance than deep_search) Available from 0.29.0.
p_output: where to save the generated fix models. Default: "/data1/kneron_flow",
weight_bandwidth: set the weight bandwidth. Set to `None` to use default value.
dma_bandwidth: set the dma bandwidth. Set to `None` to use default value.
unlock_size_limit:
- False (default), will raise exceptions if onnx is larger than 3G.
- True. the limitation of origin.onnx is 100G.
Returns:
- success: bool, whether the fix model generation is successful. The caller function should check this value before following actions.
- released: dict of released files. including the html report.
- d_status: dict of module status on each submodule.
- d_time: dict of time on each submodule.
"""
# check platforms
assert platform in fconsts.MODE_HW_LIMIT["inc_in_toolchain"]
# working directory
# NOTE: p_working must be same as specified in template/regression_tc.json/path/cases
env_workdir = os.environ.get("KTC_WORKDIR")
if env_workdir and p_cache == "/workspace/.tmp/models":
p_cache = env_workdir
p_working = Path(p_cache)
p_working.mkdir(parents=True, exist_ok=True)
# prepare working_model_folder
env_output = os.environ.get("KTC_OUTPUT_DIR")
if env_output and p_output == "/data1/kneron_flow":
p_output = env_output
p_export = Path(p_output)
p_export.mkdir(parents=True, exist_ok=True)
p_onnx = Path(p_onnx)
if not p_onnx.exists():
msg = f"Given onnx {p_onnx} does not exist!"
raise FileExistsError(msg)
m_name = futils.remove_appendix(futils.clean_file_name(p_onnx.name))
if DEBUG:
print(f"given onnx: {p_onnx.name}, use cleanup model name: {m_name}")
# check input shapes
if mode > 0:
# no need check npy if ip-eval only
assert np_txt is not None, f"mode {mode} need a valid np_input."
# Make sure input npy is dict
np_txt = futils.load_np_in(np_txt)
futils.verify_input_shape_onnx_npy(p_onnx, np_txt)
platform = int(platform) # platform must be like 520/720/... type: integers
opt_map = {
"o0": "scaled", # no bias adjust, no fmcut
"o1": "wqbi-p", # bias adjust parallel, no fmcut
"o2": "wqbi-p", # bias adjust parallel, w fmcut
"o3": "wqbi-s", # bias adjust sequential, no fmcut. slow. don't use.
"o4": "wqbi-s", # bias adjust sequential, w fmcut. slow. don't use.
}
if optimize not in opt_map:
msg = f"""Given optimize ({optimize}) not in {list(opt_map.keys())}. """
raise ValueError(msg)
# to keep same interface
user_config = quantize_mode
p_template = p_script / "template" / "regression_tc.json"
with open(p_template, "r") as f:
template = json.load(f)
template["path"]["cases"] = str(p_working)
# verify knerex parameters
# choose from mmse / percentage
valid_dp_range = ["percentage", "mmse"]
if datapath_range_method not in valid_dp_range:
raise ValueError(f"datapath_range_method should be {valid_dp_range}. But got: {datapath_range_method}")
# Percentage to keep data: 0.999 (default), 1.0 (Keep all data, e.g., for detection model)
# verify: 0.9 <= data_analysis_pct <= data_analysis_16b_pct <= 1.0
if not 0.9 <= data_analysis_pct <= 1.0:
raise ValueError(f"data_analysis_pct shoud be between 0.9 and 1.0. But got: {data_analysis_pct}")
if not 0.9 <= data_analysis_16b_pct <= 1.0:
raise ValueError(f"data_analysis_16b_pct shoud be between 0.9 and 1.0. But got: {data_analysis_16b_pct}")
if data_analysis_pct > data_analysis_16b_pct:
raise ValueError(f"data_analysis_pct should be less than or equal to data_analysis_16b_pct. But got: {data_analysis_pct} > {data_analysis_16b_pct}")
if not 0 <= percentile <= 0.2:
raise ValueError(f"percentile must be between 0 and 0.2. But got: {percentile}")
if (datapath_range_method == "percentage") and (percentile > 0):
# print(f"WARNING: using '{datapath_range_method}' datapath analysis. Percetile reset to 0.")
percentile = 0
if outlier_factor <= 0:
raise ValueError(f"outlier_factor must > 0. But got: {outlier_factor}")
# verify compiler parameters
valid_tiling = ["default", "deep_search", "partial_graph_search"]
if compiler_tiling not in valid_tiling:
raise ValueError(f"compiler_tiling should be in {valid_tiling}. But got {compiler_tiling}")
# possible override
if platform == 520:
# no compiler_tiling for 520
compiler_tiling = "default"
if optimize in ["o2", "o4"]:
# force to use deep_search
compiler_tiling = "deep_search"
# verify input_fmt
# did not check input node number here.
check_input_fmt(input_fmt, platform)
# create config for whole flow.
try:
conf_reg, _ = generate_conf(template,
platform,
optimize=opt_map[optimize],
mode=mode,
snr_max_in_pair=snr_max_in_pair,
snr_layer_control=snr_layer_control,
limit_input_formats=limit_input_formats,
input_fmt=input_fmt,
dp_bw=datapath_bitwidth_mode,
wt_bw=weight_bitwidth_mode,
in_bw=model_in_bitwidth_mode,
out_bw=model_out_bitwidth_mode,
cpu_bw=cpu_node_bitwidth_mode,
lut_high_accuracy_mode=lut_high_accuracy_mode,
per_channel_radix=per_channel_radix,
datapath_range_method=datapath_range_method,
data_analysis_pct=data_analysis_pct,
data_analysis_16b_pct=data_analysis_16b_pct,
data_analysis_threads=data_analysis_threads,
percentile=percentile,
outlier_factor=outlier_factor,
fm_cut=compiler_tiling
)
except Exception as e:
# probably bad configuration
pp(e)
raise ValueError("Wrong configuration for ktc.analysis().")
# save conf_reg to disk for debug. it will not be used in gen_fx_model later.
p_json = p_working / "regression_config.json"
futils.dict2json(conf_reg, p_json)
def update_config_ip_val(weight_bandwidth, dma_bandwidth, platform):
# NOTE: if running multiple platform at same time,
# one setting for dma_bandwidth / weight_bandwidth may not be accurate
# override the ip_evaluator in toolchain.
# s1.json will based on this file. if necessary.
ip_config = gen_ip_config(platform, weight_bandwidth, dma_bandwidth)
res_dir = os.environ.get("KTC_SCRIPT_RES", "/workspace/scripts/res")
os.makedirs(res_dir, exist_ok=True)
fn_ip_config = os.path.join(res_dir, f"ip_config_{platform}.json")
futils.dict2json(ip_config, fn_ip_config)
update_config_ip_val(weight_bandwidth, dma_bandwidth, platform)
# prepare model folder
def copy_onnx(p_onnx, p_to):
shutil.copy(p_onnx, p_to)
# copy onnx_data
p_from = p_onnx.parent / f"{p_onnx.name}_data"
if p_from.exists():
p_to_data = p_to.parent / f"{p_to.name}_data"
if DEBUG:
print(f"Found {p_from}, copy to {p_to_data}")
shutil.copy(p_from, p_to_data)
def prepare_model(p_user_config=None, quan_config=None, qat_config=None, hw_mode=platform):
"""Prepare model structure: onnx / input / configs."""
# our model name convention require "cat/model" structure
# use `m_name`/`m_name` will limit the flow will run only one category (which include only one model)
p_model = p_working / m_name / m_name
if p_model.exists():
shutil.rmtree(str(p_model))
p_input = p_model / "input"
p_input.mkdir(parents=True, exist_ok=False)
# copy onnx
p_to = p_input / f"{m_name}.origin.onnx"
copy_onnx(p_onnx, p_to)
# read onnx for input and get input nodes info
input_names, output_names, opset = get_ioinfo_onnx(str(p_to))
futils.npy2txt(np_txt, input_names, p_input)
# copy user_config.json which apply some constraints for better performance.
if (p_user_config is not None) and p_user_config.exists():
f_to = p_input / "user_config.json"
shutil.copy(p_user_config, f_to)
elif quan_config is not None:
# BUG: need to merge with existing json (e.g., p_user_config from quantize_mode).
futils.dict2json(quan_config, p_input / "user_config.json")
# save qat_xxx_config.json
if qat_config:
futils.dict2json(qat_config, p_input / f"qat_{hw_mode}_config.json")
return p_model
def prepare_model_only_ip_eval():
"""Simpler version of prepare_model."""
# our model name convention require "cat/model" structure
# use `m_name`/`m_name` will limit the flow will run only one category (which include only one model)
p_model = p_working / m_name / m_name
if p_model.exists():
shutil.rmtree(str(p_model))
p_input = p_model / "input" # need this folder to be find.
p_knerex_in = p_input / "knerex_input"
p_knerex_in.mkdir(parents=True, exist_ok=False)
# TODO: only_ip_eval need to support both onnx and bie format!
# but we need .origin.onnx to be find a model
ext = p_onnx.suffix
p_to = p_input / f"{m_name}.origin{ext}"
# NOTE: only .onnx_data possible. if in .bie format, _data info in included in the bie.
copy_onnx(p_onnx, p_to)
return p_model
def run_ip_evaluator_only():
"""Mode 0 for ip evaluator only."""
try:
p_model = prepare_model_only_ip_eval()
rfs, success_list, df_report, _, _ = run_flow(conf_reg, [m_name])
success = success_list[0], rfs[0], df_report # only one model
except Exception as e:
success = False, None, None
return success
def copy_release_file(fn_to_release: dict, p_export):
fn_released = {}
if not fn_to_release:
# no files to copy
return fn_released
for k, fn_from in fn_to_release.items():
fn_to = p_export / fn_from.name
shutil.copy(fn_from, fn_to, follow_symlinks=False)
fn_released[k] = fn_to
return fn_released
def run_btm_and_release(need_copy_prepare_model):
"""Mode 1/2/3 to generate fix models.
TODO: init model in the given folder and run regression in it.
currently we create in a temp folder then copy to given folder
"""
# check user_config
assert user_config in ["default", "post_sigmoid"]
user_config_available = {
"post_sigmoid": p_script / "template" / "user_config_post_sigmoid.json"
}
p_user_config = user_config_available.get(user_config, None)
if need_copy_prepare_model:
p_model = prepare_model(p_user_config, quan_config, qat_config)
# now the model should be ready to generate fx models
rfs, success_list, df_report, _, _ = run_flow(conf_reg, [m_name])
# if release_time_report:
# # TODO: save time.json. btm_report is a dataframe,
# df_time = df_only_time(btm_report)
# temp_pkl = Path(tempfile.gettempdir()) / "time_report.pkl.xz"
# futils.df2pkl(df_time, str(temp_pkl))
# rfs[0]["time_report"] = temp_pkl
# only one model
return success_list[0], rfs[0], df_report
# force to have same output
try:
if mode == 0:
success, fn_to_release, df_report = run_ip_evaluator_only()
else:
success, fn_to_release, df_report = run_btm_and_release(need_copy_prepare_model)
d_status, d_time = df_row2status(df_report)
except Exception as e:
pp(e)
success, fn_to_release, df_report, d_status, d_time = False, {}, None, {}, {}
released = copy_release_file(fn_to_release, p_export)
if clean_cache:
time.sleep(10) # waiting for test_case() to finish
shutil.rmtree(p_working)
return success, released, d_status, d_time
def run_flow(fn_json, keywords=None):
"""Core function for kneron regression flow.
1. init regresson config
2. run regression on each model, using multi-processing if appliable
3. generate compiled report on btm and snr
Returns: list of released files.
- btm_report: a dataframe on module status on each model
- snr_reports: a dictionary
- key is platform, e.g., 520, 720, if turned on in this regression
- value is a dataframe, with snr of output nodes for each model.
"""
r = regression(fn_json)
time_start = time.perf_counter()
selected_case = r.filter_cases(keywords)
logger = futils.create_logger("flow", None, r.config["regression"]["logging_level"])
# this object is to record status/timestamp of all models long whole regression
rep = report()
signal("time_sender").connect(rep.receive_time_usage)
signal("data_sender").connect(rep.receive_info)
if len(selected_case) == 0:
logger.critical("Error: found 0 test case matching keywords ({}). ".format(keywords))
exit(1)
logger.info("total models are: {}".format(len(selected_case)))
n_parallel_model = r.config["dynasty"]["n_parallel_model"]
is_customer = not r.config["path"]["internal"]
# TODO: this condition may be wrong
is_big_model = any(["big_model" in str(test_case_path) for test_case_path in selected_case])
print_each_model = n_parallel_model == 1 and (is_customer or is_big_model)
if n_parallel_model > 1:
p = multiprocessing.Pool(n_parallel_model)
ts_w_c = [(sc, r.config) for sc in selected_case]
# NOTE: the run_single_case must be serializable. it should be on top level, not local function
w = p.map_async(run_single_case, ts_w_c) # , callback=rep.add_err_record
w.wait()
# collect reports and released files
success_list = []
for e in [a[0] for a in w.get()]:
rep.add_err_record([e])
success_list.append(fconsts.is_success(e))
released_files = [a[1] for a in w.get()]
p.close()
else:
# only 1 model at 1 time
# usually SNR regression & toolchain will be in this setting.
released_files = []
success_list = []
for one_case in selected_case:
e, rel_fn = run_single_case((one_case, r.config))
# if run_single_case failed, rel_fn will be None
released_files.append(rel_fn)
rep.add_err_record([e])
success_list.append(fconsts.is_success(e))
if print_each_model:
btm_report, btm_summary = rep.compile(r.report_csv)
snr_reports = generate_snr_reports(r, rep, selected_case)
# this run is finished.
time_end = time.perf_counter()
time_used_m = max(int((time_end - time_start) / 60), 1)
r.commit_info.append(f"Duration for this run: {time_used_m} minutes\n")
r.write_info()
# generate reports for whole regression. not for only 1 test case.
if not print_each_model:
# final print of results. skip if print already.
# compile report on errors
btm_report, btm_summary = rep.compile(r.report_csv)
# compile report on snr when piano_dynasty run
snr_reports = generate_snr_reports(r, rep, selected_case)
return released_files, success_list, btm_report, btm_summary, snr_reports
def check_reg_success_by_keys(d):
"""Quick way to examine a model flow executed successfully or not."""
for k in d.keys():
if k.endswith("/bie"):
# at least one bie is released.
return True
# failed. but at leased report.html / report.json released.
# MAYBE: use len(d) > 2
return False
if __name__ == "__main__":
arguments = docopt(__doc__, version="run regression 1.2")
# print(arguments)
# check commit folder
fn_json = Path(arguments["<fn_json>"])
if not fn_json.exists():
print(f"Given config file: {fn_json} does not exist. quit...")
exit(1)
keywords = arguments["<keys>"]
released_files, success_list, btm_report, btm_summary, snr_reports = run_flow(fn_json, keywords)
n_good = len([a for a in success_list if a])
n_all = len(success_list)
print(f"Successed cases are {n_good}/{n_all} for {fn_json.name}.")
print(f""*140 + "\n\n\n")
# check all cases success or not. needed in CI.
if arguments["--all-pass"]:
if not all(success_list):
exit(99)
# otherwise will always return 0 even if regression failed.
exit(0)