2026-01-28 06:16:04 +00:00

3450 lines
153 KiB
Python

#! /usr/bin/env python3
import os
import shutil
import copy
import tempfile
import pathlib
import json # sometime commentjson is too slow
import re
import random
from collections import OrderedDict, defaultdict
from dict_recursive_update import recursive_update
from blinker import signal
import subprocess
import pandas as pd
from jinja2 import Environment, FileSystemLoader
import sys_flow.flow_utils as futils
import sys_flow.util_lib as util_lib
import sys_flow.flow_constants as fconsts
import sys_flow.dynasty_v3 as dynasty
import sys_flow.compiler_v2 as compiler
import sys_flow.csim_utils as csim
from sys_flow.exceptions import RegressionError, MultiRegressionError, GeneralError, print_err, print_command, run_module
from sys_flow.onnx_op_stats import onnx_info
from sys_flow.snr_calculator_v2 import combine_snr, calculate_statistics, get_case_output, get_weight_bin_stats
import snoop
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
snoop.install(enabled=DEBUG)
def release_test_case(path_to_model, path_to_base, dump_dynasty=False):
"""a helper function to release generated model.
inputs:
- dump_dynasty: dump the dynasty output for debug purpose, in mode 2/3.
"""
files_selected = [
"input/*.origin.onnx",
"input/knerex_input*",
"input/simulator_input*",
# "*/*.json",
"output/knerex_*/*.onnx",
"output/knerex_*/*.bie",
"output/*.xlsx",
"output/compiler_*/*command.bin",
"output/compiler_*/*setup.bin",
"output/compiler_*/*weight.bin",
"output/compiler_*/apb.npu",
"output/compiler_*/*.nef",
"output/compiler_*/*.kne",
]
p_from = pathlib.Path(path_to_model)
p_to = pathlib.Path(path_to_base) / p_from.name
for pat in files_selected:
fns = p_from.glob(pat)
for fn in fns:
# copy to relative path to base.
fn_r = futils.relative_path(fn, p_from)
fn_to = p_to / fn_r
pp(f"{fn} -> {fn_to}") # noqa
if fn_to.exists():
pp(f"{fn_to} exists! skip") # noqa
continue
if not fn_to.parent.exists():
fn_to.parent.mkdir(exist_ok=True, parents=True)
if fn.is_symlink():
# fn_to.symlink_to(fn.readlink()) # TODO: after toolchain use py 3.9
# NOTE: assume all released symbolic links in released files are relatively link
# NOTE: check symlink before check is_dir
fn_to.symlink_to(os.readlink(fn))
elif fn.is_dir():
shutil.copytree(fn, fn_to)
else:
shutil.copy(fn, fn_to, follow_symlinks=False)
return p_to
class test_case:
"""The class to provide unified interface for test_case.
input: model path, where model and files should be orgazed already.
output: model infomation.
* run_flow is the function to run all modules, with a `config` input
* the config will define which modules to run.
"""
def __init__(self, model_path, config=None):
"""
The `test_case` class wrap up the interface of model.
It support unprocessed model and load pre-existing fx model.
"""
# the model may be unprocessed or processed (with fx model)
# the config may be string or a path to a json saved for THIS model.
if config is None:
p_regression_config = pathlib.Path(model_path) / "output" / "regression_config.json"
if p_regression_config.exists():
# use existing config
config = p_regression_config
if config and type(config) in [str, pathlib.PosixPath]:
p_config = pathlib.Path(config)
if p_config.exists():
config = futils.load_regression_json(p_config)
# TODO: or should I skip some steps? where operate on self.config
self.initial_test_case(model_path, config)
if config:
# NOTE: config will be deepcopyed. so no lock in it.
self.prepare_flow(config)
self.check_this_case()
def initial_test_case(self, model_path, config=None):
"""initial test case. set up pre-defined path for this test case.
* set up name/path for onnx / input, etc
* verify input images for knerex / dynasty
* set up logger.
NOTE: do not use self.config in this function.
Suppose to be independant from regression/config
"""
try:
self.model_path = pathlib.Path(model_path)
self.model_name = self.model_path.name
self.cat_name = self.model_path.parent.name
self.model_id = "{}/{}".format(self.cat_name, self.model_name)
self.btm_txt = "test_input.txt" # default input text file.
# create logger. Try to keep this as early as possible
self.logger = futils.create_logger("model {}".format(self.model_name), None, "WARNING")
self.logger.info("run initial_test_case")
if not self.model_path.exists():
raise RegressionError("general/initial", self.model_id, msg="model does not exist.")
self.prepare_path(config)
# pre-defined onnx names
self.map_onnx, self.onnx_infos = self.get_onnx_name_map()
except Exception as e:
self.logger.error(e) # what if logger not ready yet?
raise RegressionError("general/initial", self.model_id)
@run_module(module_name="general/model oversize")
def check_onnx_size(self, p_origin):
"""Examine the file size of origin.onnx.
Internal regression will skip onnx too large.
"""
onnx_size = int(pathlib.Path(p_origin).resolve().stat().st_size / (1024 * 1024))
max_MB = self.config["compiler_piano"]["max_onnx_MB"]
signal("data_sender").send((self.model_id, "general/onnx size (MB)", onnx_size))
self.onnx_size = onnx_size
if onnx_size > max_MB:
raise RegressionError("general/model oversize", self.model_id, msg=f"onnx {onnx_size}Mb//max size {max_MB}Mb")
def check_this_case(self):
"""Some special check on this case."""
if pathlib.Path(self.map_onnx["origin"]).name.endswith(".bie"):
# NOTE: origin.bie is only supported in only_ip_evaluator.
assert self.config["module_run"]["only_ip_evaluator"], "origin.bie is only for only_ip_evaluator !!!"
def check_csim_error(self, cp, platform):
"""Find detail reason for csim crash.
CSIM will return 33 as exit code for some known errors.
TODO: move to csim_utils.py?
"""
cat1 = f"kdp{platform}"
if cp.returncode == 0:
# success
return
elif cp.returncode == 33:
pat = re.compile("\[\[\[(.*?)\]\]\]", re.MULTILINE | re.DOTALL)
log = "\n".join([cp.stdout, cp.stderr])
msg = "\n".join(pat.findall(log))
raise RegressionError(f"{cat1}/compiler error", self.model_id, msg=msg)
elif cp.returncode == 111:
# timeout
raise RegressionError(f"{cat1}/csim", self.model_id, msg=cp.stderr)
else:
raise RegressionError(f"{cat1}/csim", self.model_id)
def check_knerex_error(self, cp, platform):
"""Find detailed report for calling knerex.
There are some submodules in knerex, e.g., datapath analysis, may went wrong.
This step is to improve debug process by reporting specific reasons.
"""
cat1 = f"kdp{platform}"
log = "\n".join([str(cp.stdout), str(cp.stderr)])
fn_log = self.path[f"knerex_output_{platform}"] / "knerex_run.log"
if self.config["path"]["internal"]:
# cp.returncode > 0 and
# now save the log if run internal
with open(fn_log, "w") as f:
f.write(f"knerex return with code {cp.returncode}\n\n")
f.writelines(log)
# check memory estimation for datapath analysis
re_mem_est = re.compile("Datapath Analysis takes (\d+)KB=\((\d+)KB for model buffer \+ (\d+)KB for results\) per thread")
try:
dpm_total, dpm_buf, dpm_rslt = re_mem_est.findall(log)[0]
# buffer related to thread number
# dpm_rslt related to image number
signal("data_sender").send((self.model_id, f"{cat1}/dp analysis total (KB)", dpm_total))
signal("data_sender").send((self.model_id, f"{cat1}/dp analysis buf (KB)", dpm_buf))
signal("data_sender").send((self.model_id, f"{cat1}/dp_analysis result (KB)", dpm_rslt))
except:
pass
# check memory estimation for sequential bias adjust
re_mem_est = re.compile("Sequential Bias Adjustment takes (\d+)KB memory to hold (\d+) samples of (\d+)KB each")
try:
spb_total, spb_n, spb_x1 = re_mem_est.findall(log)[0]
signal("data_sender").send((self.model_id, f"{cat1}/seq bias adjust total (KB)", spb_total))
signal("data_sender").send((self.model_id, f"{cat1}/seq bias adjust n", spb_n))
signal("data_sender").send((self.model_id, f"{cat1}/seq bias adjust mem x1 (KB)", spb_x1))
except:
pass
# check memory estimation for parallel bias adjust
re_mem_est = re.compile("Parallel Bias Adjustment takes (\d+)KB=\((\d+)KB for model buffer \+ (\d+)KB for results\) per thread")
try:
ppb_total, ppb_buf, ppb_rslt = re_mem_est.findall(log)[0]
signal("data_sender").send((self.model_id, f"{cat1}/prll bias adjust total (KB)", ppb_total))
signal("data_sender").send((self.model_id, f"{cat1}/prll bias adjust buf (KB)", ppb_buf))
signal("data_sender").send((self.model_id, f"{cat1}/prll bias adjust result (KB)", ppb_rslt))
except:
pass
s1 = {
"knerex": "KnerexERROR:\s*(.*)",
"HW not support": "HW_NOT_SUPPORT:\s*(.*)",
"unimplemented feature": "UNIMPLEMENTED_FEATURE:\s*(.*)"
}
for m1, p1 in s1.items():
p2 = re.compile(p1).findall(log)
if len(p2) > 0:
msg = p2[0]
self.model_fx_report[(f"{cat1}/ERROR")] = msg
raise RegressionError(f"{cat1}/{m1}", self.model_id, msg=msg)
if cp.returncode == 0:
return
elif cp.returncode == 111:
# stderr.startswith("TIMEOUT"):
raise RegressionError(f"{cat1}/knerex", self.model_id, msg=cp.stderr)
elif cp.returncode == 11:
# DELETE below
raise RegressionError(f"{cat1}/knerex", self.model_id, msg="datapath analysis failed")
elif cp.returncode == 30:
raise RegressionError(f"{cat1}/knerex", self.model_id, msg="KnerexMemoryInsufficient")
else:
# NOTE: check knerex log for specific errors
spec_err = {"deadloop": ["Deadloop", "Loop Maxed out"]}
for cat2, msgs in spec_err.items():
for msg in msgs:
if len(re.compile(msg).findall(log)) > 0:
raise RegressionError(f"{cat1}/knerex", self.model_id, msg=cat2)
# by default
raise RegressionError(f"{cat1}/knerex", self.model_id, msg=f"err: {cp.returncode}")
def get_onnx_name_map(self):
"""
There are a few onnx used/generated during the quantization process.
This step is to create map of possible onnx.
NOTE:
The keys here are widely used in this project. DO NOT change any.
Follow the name rules of "kdp{hw_mode}_{optimization}_{dev_v}_{fmt}"
Factors:
- dev_v: develop version. currently only "piano"
- hw_mode: float, kdp520/kdp720/etc
- optimization: origin / scaled / bias adjust / ...
- format: onnx / bie
"""
map_onnx = {}
onnx_infos = {}
# there must be a origin.onnx (or origin.bie for only_ip_evaluator)
origin_onnx = f"{self.model_path}/input/{self.model_name}.origin.onnx"
p_origin = pathlib.Path(origin_onnx)
using_bie = False
if not p_origin.exists():
# second choice is origin.bie
origin_bie = f"{self.model_path}/input/{self.model_name}.origin.bie"
p_origin = pathlib.Path(origin_bie)
if not p_origin.exists():
raise RegressionError("general/Missing origin.onnx", self.model_id)
using_bie = True
map_onnx["origin"] = p_origin
# read in the origin.onnx for latter usage
# TODO: can we skip to save time?
# TODO: make this block work on bie?
if not using_bie:
onnx_infos["origin"] = onnx_info(p_origin)
_, _, self.est_mac_kB = onnx_infos["origin"].get_mac_memory()
self.check_onnx_io(onnx_infos["origin"])
for hw_mode in fconsts.MODE_HARDWARE: # 520/720/530
for fmt in fconsts.MODEL_FORMAT: # piano, onnx / bie
# piano, normal. the only develop version for now. treat as constant
dev_v = "piano"
p_knerex_out = self.path[f"knerex_output_{hw_mode}"]
prefix = f"{self.model_name}.kdp{hw_mode}"
# this is copied fron compiler frontend
map_onnx[f"kdp{hw_mode}_opt_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.graph_opt.{fmt}"
# below generated by knerex
map_onnx[f"kdp{hw_mode}_scaled_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.scaled.{fmt}"
map_onnx[f"kdp{hw_mode}_decomp_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.decomposed.{fmt}"
map_onnx[f"kdp{hw_mode}_quan_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.scaled.quan.{fmt}"
map_onnx[f"kdp{hw_mode}_release_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.release.{fmt}"
# piano, bias_adjust
for bi_name in ["wqbi", "hwbi", "hwbi-mse"]:
map_onnx[f"kdp{hw_mode}_{bi_name}_{dev_v}_{fmt}"] = p_knerex_out / f"{prefix}.scaled.quan.{bi_name}.{fmt}"
# NOTE: the quantized model to release should have ".scaled" in it.
# example: kdp720.scaled.bie, kdp530.scaled.quan.wqbi.onnx
return map_onnx, onnx_infos
def load_per_model_config(self, p_model_config):
"""A user-config json file (model_config.json) may be provide for fine-tune quantization process. """
if p_model_config.exists():
# deep copy of origin config
config_new = copy.deepcopy(self.config)
with open(p_model_config, "r") as f:
per_model_config = json.load(f)
recursive_update(config_new, per_model_config)
self.config = config_new
def get_nef_model_id(self):
"""As name implies.
HACK: get model_id for kneron solutions
may in pre-defined.
we should try best to assign one model id for internal cases.
"""
k = (self.cat_name, self.model_name)
if k in self.config["map_model_id"]:
return self.config["map_model_id"][k]
s = re.compile("model_(\d+)")
try:
# come here if kneron app release
return int(s.findall(str(self.model_name))[0])
except:
if self.config["path"]["internal"]:
return random.randint(20000, 30000)
else:
# 32768 is default
return 32768
def prepare_flow(self, config):
"""Prepare for the quantization flow.
Check the per-model config.
"""
try:
self.config = copy.deepcopy(config)
# update config if this model has specific config to change
p_model_config = self.model_path / "input" / "model_config.json"
self.load_per_model_config(p_model_config)
# save status to local
# TODO: send this out to report instead of signal
self.module_status = {"general": {"Success": False}}
for hw_mode in self.config["hw_mode_on"]:
self.module_status[hw_mode] = {}
# some special model types. default settings.
self.is_big_model = True
self.is_single_layer = False # for debug
self.is_multi_layer = False # for debug
self.is_multi_core = False # for debug
if self.config["path"]["internal"]:
# if internal, some special settings
self.is_big_model = "big_model" == self.config["regression"]["model_type"]
self.is_single_layer = "single_layer" == self.config["regression"]["model_type"]
self.is_multi_layer = "multi_layer" == self.config["regression"]["model_type"]
self.is_multi_core = "multi_core" == self.config["regression"]["model_type"]
# nef_model_id is needed for calling batch-compiler
self.nef_model_id = self.get_nef_model_id()
self.logger.info(f"{self.cat_name}/{self.model_name} with nef model id: {self.nef_model_id}")
if self.is_big_model:
signal("data_sender").send((self.model_id, "general/nef_model_id", str(self.nef_model_id)))
if len(str(self.path["user_config_json"])) > 4:
with open(self.path["user_config_json"], "r") as f:
self.config["user_config"] = json.load(f)
# need to check validation of onnx first
if self.config["module_run"]["validate_onnx"]:
self.check_onnx_valid()
if self.is_big_model:
self.check_onnx_size(self.map_onnx["origin"])
self.compiler_output = {}
# use model_report to save results for this fx model generating.
# then save to "output/model_fx_report.json"
self.model_fx_report = OrderedDict()
self.model_fx_report["docker_version"] = self.config["path"]["toolchain"]["version"]
self.model_fx_report["comments"] = self.config["comments"]
self.model_fx_release = OrderedDict()
self.pre_clean_up()
# create configs for datapath analysis, csim ini, etc
# initial jinja2
file_loader = FileSystemLoader(str(self.config["path"]["template"]))
self.jinja_env = Environment(loader=file_loader)
if not self.config["module_run"]["only_ip_evaluator"]:
self.check_input_files()
if self.config["dynasty"]["regression_input"] == "all":
self.fn_report = "{}/output/snr_analysis/snr_analysis_report.csv".format(self.model_path)
else:
self.fn_report = "{}/output/results/{}/snr_analysis_report.csv".format(self.model_path, self.btm_txt)
self.save_regression_json()
# save cli commands for debug purpose
self.commands = []
except Exception as e:
self.logger.error(e)
if type(e) is RegressionError: # TODO: MultiRegressionError
raise
else:
raise RegressionError("general/prepare", self.model_id)
@run_module(module_name="general/clean_opt")
def clean_opt(self):
"""Clean up opt_compile generated by compiler submodules (fm-cut, etc)."""
# clean up opt_compile which is from fm_cut but sometime not cleaned.
p_out = self.path["dir_output"]
p_opt_cmpls = list(p_out.glob("compiler_*/opt_compile"))
for p_opt in p_opt_cmpls:
cmd = f"pkill -f {self.model_name} ; sleep 1; rm -rf {p_opt}"
cp2 = futils.run_bash_script(cmd, do_echo=False)
# cp2.returncode == -15
@run_module(module_name="general/post_clean")
def post_clean_up(self):
"""To clean up before finish.
This used be `__del__` method but it may not be triggerd immediately
after the flow finihs. It has been renamed and put into run_flow.
The "run_flow" will not be called multiple times according to our experience.
If any submodule failed, this function will be called in `run_single_case`
"""
# save commands to file. but dynasty related are not included yet.
self.generate_bash_script()
if hasattr(self, "work_in_memory") and self.work_in_memory and hasattr(self, "path"):
# per compiler team request, dont use zip, just copy back
d_from = self.path["dir_output_memory"].absolute()
d_to = self.path["dir_output"].absolute()
# if d_to.is_symlink():
# d_to.unlink()
command = f"if mountpoint -q {d_to}; then umount {d_to}; fi; pushd {d_from} > /dev/null; tar cf - . | (mkdir -p {d_to}; cd {d_to}; tar xvf -)"
if DEBUG:
print("recovering from work_in_memory")
print(command)
cp = futils.run_bash_script(command)
# TODO: check cp.returncode
shutil.rmtree(self.path["dir_output_memory"].parent.absolute())
self.set_permission_output()
for handler in self.logger.handlers[:]:
handler.close()
self.logger.removeHandler(handler)
if hasattr(self, "dir_output_list"):
self.clean_dynasty_output(self.dir_output_list)
def __repr__(self):
"""Provide brief info on the model."""
return "Model {}".format(self.model_path)
def prepare_path(self, config=None):
"""
Examine essential files/folders for model.
All essential paths are saved in a dictionary.
"""
self.path = {}
# input folder
# output folder. this will be used many times
dir_out = self.model_path / "output"
self.path["user_config_json"] = self.model_path / "input/user_config.json"
if not pathlib.Path(self.path["user_config_json"]).exists():
self.path["user_config_json"] = ""
for hw_mode in fconsts.MODE_HARDWARE: # 520/720/530/730/630
p_knerex_out = dir_out / f"knerex_{hw_mode}"
self.path[f"knerex_output_{hw_mode}"] = p_knerex_out
self.path[f"updater_{hw_mode}_json"] = p_knerex_out / f"updater_{hw_mode}.json"
self.path["fn_json_radix"] = self.model_path / "input/input_radix.json" # User defined json
# NOTE: why use knerex_input instead of node_input name?
# 1. the node_input name may include "/", which will cause great trouble if used as char in diretory name.
# 2. the node_input name could be arbitariely ANYTHING. we cannot ganrantee safety or conflicts with our other files.
# NOTE: for multiple inputs, we assume each PAIR/GROUP file are put into knerex_input/knerex_input_1/... with SAME name
# here we assume knerex_input is for the 1st input node given by ONNX, and knerex_input_1 is for 2nd input node.
# We also assume the input node given by ONNX is same as in piano graph. otherwise BIG PROBLEM.
p_knerex_in = self.model_path / "input/knerex_input"
self.path["dir_knerex"] = p_knerex_in
if not p_knerex_in.exists():
raise RegressionError("general/Missing input", self.model_id, msg="Mising knerex_input folder.")
self.path["dir_simulator"] = self.model_path / "input/simulator_input"
if not self.path["dir_simulator"].exists():
# will use same as knerex_input
self.path["dir_simulator"] = p_knerex_in
# if dir_out is symlink, which is leftover from last UNSUCCESSFUL run, not cleaned up
if dir_out.is_symlink():
# NOTE: dir_out is a symlink but will not exist() if the target does not exist
dir_out.unlink()
# HACK: work_in_memory is to make output folder in memory. to avaoid disk io block.
# especially for big model with feature map cut. which need to write many times in compiler output
try:
self.work_in_memory = config["regression"]["work_in_memory"]
except:
self.work_in_memory = False
if self.work_in_memory:
# if need to work_in_memory, then work at /dev/shm
# will be saved as zip file later.
# the whole output folder is in memory
d_temp = pathlib.Path(tempfile.mkdtemp(prefix="/dev/shm/wim_"))
dir_out_memory = d_temp / "output"
dir_out_memory.mkdir(parents=True, exist_ok=True)
dir_out.mkdir(parents=True, exist_ok=True)
# NOTE: work_in_memory means old results cleaned up.
# it used to copy datapath_analysis temp results but the folder had been changed.
# so skip it now.
# TODELETE
# dir_out will be deleted if exists
# futils.safe_link(dir_out_memory, dir_out, relative=False, delete_exists=True)
# use mount
command = f"mount --bind {dir_out_memory} {dir_out}"
cp = futils.run_bash_script(command)
# save for future usage
self.path["dir_output_memory"] = dir_out_memory
if DEBUG:
print(f"work_in_memory: {dir_out_memory} mount to output folder: {dir_out}")
print(command)
self.path["dir_input"] = self.model_path / "input"
self.path["dir_output"] = dir_out
dir_out.mkdir(mode=0o770, parents=True, exist_ok=True)
# selected one input (test_input.txt by default) for bit-true-match
p_btm_dump = dir_out / "results" / self.btm_txt
self.path["btm_dump"] = p_btm_dump
# TODO: remove platform variables
platform = "_piano" # only support piano platform now. no more renaissance
for hw_mode in fconsts.MODE_HARDWARE: # 520 / 720 / 530 / etc
p_knerex_out = dir_out / f"knerex_{hw_mode}"
# knerex temporally analysis results
self.path[f"temp_dpa{platform}_{hw_mode}"] = p_knerex_out / f"analysis_datapath{platform}_{hw_mode}.tmp"
self.path[f"temp_wta{platform}_{hw_mode}"] = p_knerex_out / f"analysis_weight{platform}_{hw_mode}.tmp"
# compiler and nef output directory
compiler_out = dir_out / f"compiler_{hw_mode}"
nef_out = dir_out / f"nef_{hw_mode}"
self.path[f"compiler{platform}_{hw_mode}_out"] = compiler_out
# example: compiler_piano_output_530/compiler_piano.config.kdp530.json
self.path[f"compiler{platform}_{hw_mode}_json"] = compiler_out / f"compiler{platform}.config.kdp{hw_mode}.json"
self.path[f"nef_output_{hw_mode}"] = nef_out
# to fill in later after run compiler
self.path["ioinfo_json"] = {}
self.path["calculation_json"] = {}
# qat config json for knerex
self.path[f"qat_{hw_mode}_config_json"] = self.model_path / "input/qat_{}_config.json".format(hw_mode)
if not self.path[f"qat_{hw_mode}_config_json"].exists():
self.path[f"qat_{hw_mode}_config_json"] = ""
# snr file to check.
if config:
if config["dynasty"]["regression_input"] == "all":
self.path["snr_csv"] = dir_out / "snr_analysis" / "snr_analysis_per_layer.csv"
else:
self.path["snr_csv"] = dir_out / "results" / self.btm_txt / "snr_analysis_per_layer.csv"
self.path["snr_excel"] = dir_out / f"{self.model_name}_snr_report.xlsx"
# fx model report. for every run
self.path["model_fx_html"] = dir_out / "model_fx_report.html"
# for app release only
self.path["model_fx_json"] = dir_out / "model_fx_report.json"
# where to save self.config to this file for future reference.
self.path["export_regression_json"] = dir_out / "regression_config.json"
# back up bash commands
self.path["fn_cmd"] = self.model_path / "output/flow_commands.sh"
def set_permission_output(self):
"""Set permission for test cases so that other users can access.
If not using docker, One can only set permissions for file created by themselves.
If using docker, you can anything
Diretory set to 755, files set to 644.
Using pathlib.Path.chmod in docker will NOT work. so we use bash
"""
dir_out = self.path["dir_output"]
try:
futils.set_folder_public(dir_out)
except Exception as e:
self.logger.error(e)
def find_simulator_input_list(self, p_txt):
"""
Find the input images in simluator_input folder.
The `simulator_input` contains input for dynasty/csim/dongle inference.
Our regression are using the file name `test_input.txt` as default file name for bit-true-match. Users may limit the number of input groups for inference. The `test_input.txt` will be used at first by default.
# TODO: refactor this function
# TODO: if no test_input.txt exist, randomly pick it for bit-true-match
"""
if self.config["dynasty"]["regression_input"] == "default":
default_txt = list(p_txt.glob(self.btm_txt))[0]
sim_lists = [default_txt]
else: # otherwise runn dynasty on all txt
sim_lists = list(p_txt.glob("*.txt"))
# sort input texts by names. but move "test_input.txt" to the 1st if exists
sim_lists = sorted(sim_lists, key=lambda x: "" if x.name == self.btm_txt else x.name)
if self.config["dynasty"]["sample_seed"] is not None and len(sim_lists) > 2:
# randomize
ram_list = sim_lists[1:]
random.seed(self.config["dynasty"]["sample_seed"])
random.shuffle(ram_list)
sim_lists = sim_lists[:1] + ram_list
list_input_simulator = [self.find_multiple_input(a) for a in sim_lists]
assert len(list_input_simulator) > 0, "NO input images in simulator_input folder."
# apply num_input_samples to limit number of images. // to save time in regression for quicker test.
n_max_input = self.config["dynasty"]["num_input_samples"]
list_input_simulator = list_input_simulator[:n_max_input]
return list_input_simulator
def check_input_files(self):
"""Examine the input text files in knerex_input / simlulator_input folder
There should be at least 1 input images in knerex_input for datapath analysis, which is essential for quantization.
There should be at least 1 input images in simulator_input folder, which is used for dynasty / csim / dongle inference. Our regression are using the file name `test_input.txt` as default file name for bit-true-match. If there is no file named "test_input.txt", a random file in the simulator_input folder will be picked and linked as test_input.txt.
For models with multiple input nodes, there should be SAME filename, e.g., `camera_002.txt` in
* knerex_input / simulator_input , for 1st input node
* knerex_input_1 / simulator_input_1, for 2nd input node
* knerex_input_2 / simulator_input_2, for 3rd input node
* ... if necessary
"""
# '**/*.txt' will find all txt files
# knerex will use all txt in knerex_input folder
self.list_input_knerex = [self.find_multiple_input(a) for a in list(pathlib.Path(self.path["dir_knerex"]).glob("*.txt"))]
assert len(self.list_input_knerex) > 0, "NO input images in knerex_input folder."
# dynasty will pick text from simulator_input folder
self.list_input_simulator = self.find_simulator_input_list(pathlib.Path(self.path["dir_simulator"]))
assert len(self.list_input_simulator) > 0, "NO input images in simulator_input folder."
# `test_input.txt` in `simulator_input` will be used for bit-true-match check by default
self.list_input_btm = [self.find_multiple_input(a) for a in list(pathlib.Path(self.path["dir_simulator"]).glob("test_input.txt"))]
assert len(self.list_input_btm) == 1, f"""NO test_input.txt in {self.path["dir_simulator"]} folder."""
# check input files
self.logger.info("Found {} input image for knerex".format(len(self.list_input_knerex)))
self.logger.info("Found {} input image for simulator".format(len(self.list_input_simulator)))
# HACK: Create noise input
if futils.get_switch_value(self.config["module_run"], "piano_dynasty_noise", False):
sigma_levels = self.config["dynasty"]["noise_sigma"]
p_input = self.model_path / "input"
self.list_input_simulator_noise = {}
for p_simu in p_input.glob("simulator_input*"):
if "_sigma" in p_simu.name: # don't repeat itself
continue
futils.create_noise_input_folder(p_simu, sigma_levels)
for sigma in sigma_levels:
p_simu = p_input / "simulator_input_sigma{}".format(sigma)
assert p_simu.exists(), f"{p_simu} does not exists."
self.list_input_simulator_noise[sigma] = self.find_simulator_input_list(p_simu)
# creat link for test_input.txt if necessary
# as use models linked from model_source, this may fail.
if self.config["dynasty"]["regression_input"] == "default":
self.fn_input_default = [self.find_multiple_input(self.path["dir_simulator"] / self.btm_txt, verify_exist=False)]
if not pathlib.Path(self.fn_input_default[0][0]).exists():
self.logger.warn("missing simulator_input/{}. trying to link.".format(self.btm_txt))
for i_from, i_to in zip(self.list_input_simulator[0], self.fn_input_default[0]):
futils.safe_link(i_from, i_to)
def check_onnx_io(self, origin_info):
"""Get onnx ioinfo from onnx file. This will only get some simple information about input/output nodes. Example: .
Output:
* self.io_nodes["input"] will contain input nodes name and their order
* needed by knerex / dynasty before compiler
A more accurate way is to call load_compiler_ioinfo() which will update self.io_nodes with more information. However this must run after compiler generate ioinfo.csv
"""
self.io_nodes = {}
input_nodes, output_nodes, opset = origin_info.get_ioinfo()
assert len(input_nodes) > 0, "Onnx: found no inputs nodes!"
# NOTE: we suppose all the input nodes are same order for 520/720/etc.
# otherwise the input_lots.json will be different for different hardware
self.io_nodes["input"] = input_nodes
def save_regression_json(self):
"""Dump this regression config for debug"""
if self.is_big_model:
with open(self.path["export_regression_json"], "w") as f:
# remove "snr_ref" from self.config before saving.
d = copy.deepcopy(self.config)
d.pop('snr_ref', None)
d.pop('map_model_id', None)
# d.pop('hw_mode_on', None)
json.dump(d, f, indent=4, sort_keys=False, default=str)
def get_scaled_onnx_source(self, hw_mode):
""" Find the targeted onnx file by config for btm.
- Format: onnx/bie
- Optimization: scaled/wqbi
"""
model_format = futils.get_switch_value(self.config["compiler_piano"], "model_format", "bie")
model_opt = futils.get_switch_value(self.config["compiler_piano"], "model_optimize", "wqbi")
model_key = "kdp{}_{}_piano_{}".format(hw_mode, model_opt, model_format)
fn_knerex = self.map_onnx[model_key]
fn_json = "{}.json".format(fn_knerex)
dynasty_mode = "{}{}".format(hw_mode, fconsts.MODEL_RELEASE[model_opt])
# need to release this in toolchain
decomp_onnx = pathlib.Path(self.map_onnx[f"kdp{hw_mode}_decomp_piano_onnx"])
return pathlib.Path(fn_knerex), pathlib.Path(fn_json), dynasty_mode, decomp_onnx
def get_input_folders(self, input_nodes, first_input_folder):
"""Generate dictionary of input folders for knerex."""
if not os.path.exists(first_input_folder):
raise RegressionError("general/Missing input", self.model_id)
input_folders = {}
# at least one input
input_folders[input_nodes[0]] = first_input_folder
# if multi inputs
for i_name, this_name in enumerate(input_nodes[1:]):
# NOTE: verify multi input node folder
self.logger.info("Check input folder {}/{}: \"{}\". ".format(i_name + 2, len(input_nodes), this_name))
this_dir = "{}_{}".format(first_input_folder, i_name + 1)
input_folders[this_name] = this_dir
if not os.path.exists(this_dir):
self.logger.critical(
"MISSING input folder {}/{}: node \"{}\", input folder expect at \"{}\". "
.format(i_name + 2, len(input_nodes), this_name, this_dir))
raise RegressionError("general/Missing input", self.model_id)
return input_folders
def generate_knerex_config(self, *, hw_mode):
"""
Generate config json for knerex using template.
Settings include per regression / per model.
Output file:
* `updater_NNN.json` for platform `NNN`.
"""
input_nodes = self.io_nodes["input"]
fn_json, dir_input_1st = self.path[f"updater_{hw_mode}_json"], self.path["dir_knerex"]
fn_json.parent.mkdir(parents=True, exist_ok=True)
input_folders = self.get_input_folders(input_nodes, dir_input_1st)
conf = {}
# TODO: remove t, use keys from config["knerex"]
t = [
"verbose",
"percentile",
"same_scale",
"per_channel_radix",
"output_scale",
"output_radix",
"cpu_scale",
"cpu_radix",
"fixed_scale_mode",
"max_scale",
"data_analysis_threads",
"datapath_range_method",
"outlier_factor",
"bn_weight_pct",
"conv_weight_pct",
"num_input_samples",
"dump_level",
"datapath_bitwidth_mode",
"weight_bitwidth_mode",
"model_in_bitwidth_mode",
"model_out_bitwidth_mode",
"cpu_bitwidth_mode",
"datapath_mix_percentile",
"weight_mix_percentile",
"data_analysis_pct", # outliers
"need_additional_data_analysis_pct",
"additional_data_analysis_pcts",
"dynamic_range_based_on_bitwidth"
]
# copy knerex configs from config
for k in t:
conf[k] = self.config["knerex"][k]
input_shape = self.config["dynasty"]["input_shape"]
convert = {"onnx_shape": "1", "channel_last": "0"}
conf["shape_order"] = convert.get(input_shape, "1")
conf["type"] = fconsts.KNEREX_UPDATER_TYPE[hw_mode]
# TODELETE
# def get_test_config():
# # test_config.json for stc, but with some exceptions.
# if self.is_big_model or hw_mode in [520]:
# test_config = ""
# else:
# # for stc / mtc / etc
# test_config = self.path[f"json_hack_{hw_mode}"]
# bw_dp = self.config["knerex"]["datapath_bitwidth_mode"]
# if hw_mode in [720, 730] and bw_dp in ["int16"]:
# test_config = ""
# return test_config
# per model settings.
# input files for knerex
# will only use decomposed.bie from compiler frontend from 0.24.0
conf["fn_origin_onnx"] = self.map_onnx[f"kdp{hw_mode}_opt_piano_bie"]
conf["test_config"] = ""
conf["user_config_json"] = self.path["user_config_json"]
conf["qat_config"] = self.path[f"qat_{hw_mode}_config_json"]
# temp files.
conf["fn_dp_analysis_piano"] = self.path[f"temp_dpa_piano_{hw_mode}"]
conf["fn_wt_analysis_piano"] = self.path[f"temp_wta_piano_{hw_mode}"]
# output
conf["outmodel"] = self.map_onnx[f"kdp{hw_mode}_scaled_piano_bie"]
# render the json file
template = self.jinja_env.get_template(f"updater_{hw_mode}.json")
output = template.render(input_nodes=input_nodes, input_folders=input_folders, conf=conf)
with open(fn_json, "w") as f:
f.write(output)
# check before finish
assert pathlib.Path(fn_json).exists(), f"failed to create {fn_json}"
@run_module(module_name="auto/check compiler output")
def load_compiler_dump(self, *, hw_mode):
"""Check the output of compiler / batch compiler.
The command.bin/etc had a prefix if generate by batch compiler
"""
module_name = f"kdp{hw_mode}/load compiler dump"
self.logger.info(f"{module_name}")
dir_out = self.path["compiler_piano_{}_out".format(hw_mode)]
self.compiler_output[hw_mode] = compiler.locate_compiler_dump(dir_out, hw_mode)
def load_ioinfo_520(self):
"""Load ioinfo from radix.json.
Will use knerex generated radix.json and shape.json.
"""
hw_mode = 520
module_name = f"kdp{hw_mode}/load_ioinfo"
self.logger.info(f"check {module_name}")
_, fn_knerex_json, _, _ = self.get_scaled_onnx_source(hw_mode)
with open(fn_knerex_json, "r") as f:
d_radix = json.load(f)
t = list(self.path[f"knerex_output_{hw_mode}"].glob("*kdp520*SnrShapeInfo.json"))
fn_json_shape = t[0]
with open(fn_json_shape, "r") as f:
d_shape = json.load(f)
ioinfo = futils.get_ioinfo_from_knerex_json(d_radix, d_shape)
return ioinfo
@run_module(module_name="auto/parse_ioinfo")
def load_compiler_ioinfo(self, *, hw_mode):
"""Parse `ioinfo.csv` yielded by compiler to determine input nodes shapes.
NOTE:
this method requires compiler ouptut, so call it after compiler.
This function will load the ioinfo from compiler output,
- load `ioinfo.json` in compier output folder
- save to `self.io_nodes`, which include
- input nodes shapes / data format.
- output nodes shapes / data format.
- cpu nodes.
This function will also find corresponding the dynasty dump for golden.
It need to decide:
- which dynasty mode output folder (related to knerex optimization)
- which format (fx or fl)
"""
assert hw_mode in self.config["hw_mode_on"], "hw_mode is: {}, not in hw_mode_on {}".format(hw_mode, self.config["hw_mode_on"])
module_name = f"kdp{hw_mode}/parse_ioinfo"
self.logger.info(f"{module_name}")
if hw_mode in [520]:
ioinfo = self.load_ioinfo_520()
else:
fn_ioinfo = self.compiler_output[hw_mode]["ioinfo_json"]
ioinfo = compiler.load_ioinfo_json(fn_ioinfo)
# TODO: patch dp_in_names for later reference
input_nodes = [a["name"] for a in ioinfo["input"]]
output_nodes = [a["name"] for a in ioinfo["output"]]
cpu_nodes = [] # TODO
if len(input_nodes) == 0:
self.logger.critical("Input nodes cannot be found")
if len(output_nodes) == 0:
self.logger.critical("Output nodes cannot be found")
# find the golden in dynasty for btm
_, _, dynasty_mode, _ = self.get_scaled_onnx_source(hw_mode)
p_dump = self.path["btm_dump"]
p_dynasty_dump = p_dump / "mode_{}_piano".format(dynasty_mode)
p_csim_dump = p_dump / f"csim_{hw_mode}"
p_pld_report = p_dump / "pld_report"
# ini file for csim btm dump. default is test_input.txt
self.path[f"csim_{hw_mode}_ini"] = p_csim_dump / f"run_csim_{hw_mode}.ini"
self.path[f"csim_{hw_mode}_ini_pld"] = p_csim_dump / f"run_csim_{hw_mode}.pld.ini"
# prepare dynasty golden
if hw_mode in [720, 530]:
# could be fx.txt or fl.txt
golden_txt_fns = []
for i_dp, info_o in enumerate(ioinfo["output"]):
fmt = info_o["data_format"]
# TODO: confirm with Kai
if fmt == "RAW_FLOAT":
fn_output = "layer_output_{}_fl.txt".format(info_o["name"])
else:
fn_output = "layer_output_{}_fx.txt".format(info_o["name"])
golden_txt_fns.append(fn_output)
else: # only fx txt
golden_txt_fns = ["layer_output_{}_fx.txt".format(a["name"]) for a in ioinfo["output"]]
p_dynasty_golden = [p_dynasty_dump / fn for fn in golden_txt_fns]
# record information for bit-true-match. this is related to which text_input
self.io_nodes[("btm_text_input", hw_mode)] = self.btm_txt
self.io_nodes[("btm_dynasty_mode", hw_mode)] = dynasty_mode
self.io_nodes[("btm_dynasty_path", hw_mode)] = p_dynasty_dump
self.io_nodes[("btm_dynasty_golden_txt_fn", hw_mode)] = golden_txt_fns
self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)] = p_dynasty_golden
self.io_nodes[("btm_csim_path", hw_mode)] = p_csim_dump
# need for dynasty / csim btm debug
self.io_nodes[("pld_report", hw_mode)] = p_pld_report
# general info
self.io_nodes[("ioinfo", hw_mode)] = ioinfo
self.io_nodes[("input_node", hw_mode)] = input_nodes
self.io_nodes[("out_node", hw_mode)] = output_nodes
self.io_nodes[("cpu_node", hw_mode)] = cpu_nodes
# save for reference but only internal regression
if self.config["path"]["internal"]:
self.model_fx_report[(f"kdp{hw_mode}/btm_dynasty_path")] = p_dynasty_dump
for i in range(self.config["nef"]["inference_count"]):
p_nef_dump = p_dump / "nef_{}_output_{}".format(hw_mode, i)
self.io_nodes[("btm_nef_path", hw_mode, i)] = p_nef_dump
p_nef_kneron_plus_dump = p_dump / "nef_kneron_plus_{}_output_{}".format(hw_mode, i)
self.io_nodes[("btm_nef_kneron_plus_path", hw_mode, i)] = p_nef_kneron_plus_dump
@run_module("auto/gen_csim_ini")
def generate_csim_ini(self, *, hw_mode):
"""
create .ini config for csim using jinja2 template
per 520/720/530/730/630.
CSIM 520 will not use this .ini config
CSIM 720/530/730/630 will use this .ini file directly
Input files:
* ioinfo.csv from compiler output.
* model files for 520/720/530/530:
* weight.bin
* command.bin
* setup.bin
* apb.npu
* model files for 540/730:
* model_NNN.kne
* input file for inference
* dynasty dumped input file, prepared by `data_convert`
* `output/results/FN_INPUT/model_520-wqbi_piano/layer_input_*.bin`
Output files:
* run_csim_NNN.ini
"""
self.logger.info(f"generating csim ini for {hw_mode}")
assert hw_mode in self.config["hw_mode_on"], "hw_mode is: {}, not in hw_mode_on {}".format(hw_mode, self.config["hw_mode_on"])
# for piano compiler output
p_compiler = pathlib.Path(self.path["compiler_piano_{}_out".format(hw_mode)])
p_csim_dump = self.io_nodes[("btm_csim_path", hw_mode)]
bin_pair = self.io_nodes[("btm_csim_in_bin", hw_mode)]
golden_txt = self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)]
# RTL-release need to set this to 3
dump_core_opt = self.config["csim"]["dump_core_opt"]
# generate ini for normal csim
template = self.jinja_env.get_template(f"run_csim_{hw_mode}.ini")
fn_ini = self.path["csim_{}_ini".format(hw_mode)]
csim.gen_csim_ini(bin_pair, p_compiler, hw_mode,
template=template,
fn_ini=fn_ini,
golden_txts=golden_txt,
dump_core_opt=dump_core_opt)
# function output
self.io_nodes[("btm_csim_in", hw_mode)] = [[p_csim_dump, fn_ini]]
# generate ini for pld csim
template_pld_dump = self.jinja_env.get_template(f"run_csim_{hw_mode}.pld.ini")
fn_ini_pld = self.path["csim_{}_ini_pld".format(hw_mode)]
csim.gen_csim_ini(bin_pair, p_compiler, hw_mode,
template=template_pld_dump,
fn_ini=fn_ini_pld,
golden_txts=golden_txt)
# function output
self.io_nodes[("btm_csim_in_pld", hw_mode)] = [[p_csim_dump, fn_ini_pld]]
@run_module(module_name="kdp520/convert_rgba")
def data_convert_520(self, *, hw_mode):
"""Convert input.txt pair to csim.bin. """
module_name = "kdp520/data_convert"
self.logger.info(f"check {module_name}")
# Generate input bins for csim
# previously using self.io_nodes["input"] which is same as onnx input node order
p_csim_dump = self.io_nodes[("btm_csim_path", hw_mode)]
p_csim_dump.mkdir(exist_ok=True, parents=True)
info_in = self.io_nodes[("ioinfo", hw_mode)]["input"]
if self.is_big_model:
list_input_bin = csim.txt2bin_rgba(self.list_input_btm, info_in, p_csim_dump)
else: # only stc, no mtc
list_input_bin = csim.txt2bin_seq(self.list_input_btm, info_in, p_csim_dump)
# assert list_input_bin.keys() == [0]
# function output
self.io_nodes[("btm_csim_in_bin", hw_mode)] = list_input_bin[0]
# TODO: why we need list_input_bin_rtl?
# TODO: if compiler specify RAW_FLOAT, need to use dynasty/_fl.bin?
return
@run_module(module_name="auto/data_convert")
def data_convert(self, *, hw_mode):
"""Convert input.txt pair to csim.bin.
* no supporting 520.
Input files:
* dynasty input text files.
"""
module_name = f"kdp{hw_mode}/data_convert"
self.logger.info(f"check {module_name}")
# Get input bins for csim
# previously using self.io_nodes["input"] which is same as onnx input node order
# but compiler may use different order. refer to ioinfo.csv
# NOTE: when write to ini file, file refered to are in relative path to the ini (a.k.a, output folder)
p_csim_dump = self.io_nodes[("btm_csim_path", hw_mode)]
info_in = self.io_nodes[("ioinfo", hw_mode)]["input"]
csim_bin_sqt = csim.txt2bin_seq(self.list_input_btm, info_in, p_csim_dump)
list_input_bin, cmds = csim.data_convert(csim_bin_sqt,
info_in,
p_out=p_csim_dump)
self.save_command(module_name, "\n".join(cmds))
# assert list_input_bin.keys() == [0]
# function output
self.io_nodes[("btm_csim_in_bin", hw_mode)] = list_input_bin[0]
# TODO: why we need list_input_bin_rtl?
# TODO: if compiler specify RAW_FLOAT, need to use dynasty/_fl.bin?
return
def find_multiple_input(self, fn_input0, verify_exist=True):
"""Look for (possible) multiple INPUT NODES for this MODEL.
give 1st input image name, give a list with whole input set (might be 1 or more.)
TODO: need refactor into utils
"""
fn_base = fn_input0.name
p_base = fn_input0.parent.parent
path_prefix = fn_input0.parent.name.rstrip("_0")
if verify_exist:
assert fn_input0.exists()
list_inputs = [str(fn_input0)]
input_nodes, _, _ = self.onnx_infos["origin"].get_ioinfo()
# NOTE: current by search input folders.
# TODO: verify with onnx input number
for i_dir in range(1, len(input_nodes)):
next_input = p_base / f"{path_prefix}_{i_dir}" / fn_base
if verify_exist and not next_input.exists():
raise RegressionError("general/Missing input", self.model_id, msg="missing input: {}".format(next_input))
list_inputs.append(str(next_input))
return list_inputs
def est_memory_dynasty_fx(self):
"""
Estimate how much memory needed for dynasty-fx inference
"""
# only some need to estimate
platforms_large_memory = [520, 720]
plts = [hw_mode for hw_mode in self.config["hw_mode_on"] if hw_mode in platforms_large_memory]
if len(plts) == 0:
return
est_avl_kB = futils.estimate_mem_available()
# TODO: what if multi-thread?
if self.est_mac_kB > est_avl_kB:
self.logger.error(f"WARNING: Estimated max memory need for dynasty fx {plts} is {self.est_mac_kB} kB.")
self.logger.error(f" Current available memory is {est_avl_kB} kB.")
@run_module(module_name="general/invalid_onnx")
def check_onnx_valid(self):
"""Report if this onnx is invalid
"""
if not self.onnx_infos["origin"].is_valid_onnx():
raise RegressionError("general/invalid_onnx", self.model_id)
def run_flow(self):
"""The main function for the kneron internal quantization flow.
Here it controls the sequence of module execution.
`config` defines which module to run.
For complicated process, e.g., bias adjust,
you can define multiple configs and call `run_flow(conf1)` and `run_flow(conf2)`, etc
"""
# TODO: better flow control per platform. aka. one platform fail will not affect another one
# some shortcuts
do_dynasty = self.config["module_run"]["piano_dynasty"]
do_csim = self.config["module_run"]["csim"]
do_dongle = self.config["module_run"]["run_nef_kneron_plus"]
self.logger.setLevel(self.config["regression"]["logging_level"])
# compiler frontend
if self.config["module_run"]["only_ip_evaluator"] or self.config["module_run"]["piano_knerex"]:
for hw_mode in self.config["hw_mode_on"]:
# generate cpu node list and nod mapping
self.run_compiler_frontend(hw_mode=hw_mode)
# quantizaion
if self.config["module_run"]["piano_knerex"]:
for hw_mode in self.config["hw_mode_on"]:
# generate quantized model
self.generate_knerex_config(hw_mode=hw_mode)
self.run_knerex(hw_mode=hw_mode)
if self.config["compiler_piano"]["convert_enc"]:
self.convert_enc(hw_mode=hw_mode)
# generate nef for hardward
if self.config["module_run"]["compiler_piano"]:
for hw_mode in self.config["hw_mode_on"]:
p_out = pathlib.Path(self.path["compiler_piano_{}_out".format(hw_mode)])
self.generate_nef(hw_mode=hw_mode, p_nef=p_out)
self.clean_opt()
if self.config["layer_statistics"]["weight_stats"]:
self.load_weight_bin_stats()
if do_dynasty:
if self.is_big_model:
# provide some early warning for dynasty memory usage
self.est_memory_dynasty_fx()
self.dir_output_list = self.run_dynasty_inference()
else:
# if no dynasty scheduled to run, search the results folder for existing dynasty dumps.
dir_results = self.path["dir_output"] / "results"
self.dir_output_list = list(dir_results.glob("*.txt"))
if self.config["module_run"]["tflite"]:
self.run_tflite(self.list_input_simulator)
if self.config["module_run"]["onnxruntime"]:
self.run_onnxruntime(self.list_input_simulator)
if self.config["module_run"]["snr_calculation"]:
# for SNR of dynasty v2 calling.
self.run_dynasty_snr(self.dir_output_list)
if self.config["dynasty"]["regression_input"] == "all":
# combine snr to overal report
self.generate_snr_report()
self.clean_dynasty_output(self.dir_output_list)
# self.path["snr_csv"]
# snr collection to regression report
# redundant to verify_snr. TODELETE this function
# self.load_dynasty_snr_output()
if not self.config["path"]["internal"]:
# used by customer in toolchain
self.convert_snr_report()
for hw_mode in self.config["hw_mode_on"]:
self.verify_snr(hw_mode=hw_mode)
if self.config["module_run"]["verify_decomp_snr"]:
for hw_mode in self.config["hw_mode_on"]:
self.verify_decomp_snr(hw_mode=hw_mode)
if self.config["module_run"]["any_bi_enable"]:
self.verify_bias_adjust_performance()
if self.config["module_run"]["calculate_layer_statistics"]:
self.load_layer_statistics()
# PREPARE for csim/nef btm
if do_csim or do_dongle:
# NOTE: load io_info.csv in last time run (supposed to have)
for hw_mode in self.config["hw_mode_on"]:
self.load_compiler_dump(hw_mode=hw_mode)
self.load_compiler_ioinfo(hw_mode=hw_mode)
if hw_mode not in [520]:
# convert dynasty input for csim. no need for 520
# NOTE: in regression, we will only convert "test_input.txt" by default
self.data_convert(hw_mode=hw_mode)
else:
self.data_convert_520(hw_mode=hw_mode)
if do_csim:
for hw_mode in self.config["hw_mode_on"]:
if hw_mode == 520:
self.run_csim_520()
else:
self.generate_csim_ini(hw_mode=hw_mode)
self.run_csim(hw_mode=hw_mode)
self.btm_dyn_csim(hw_mode=hw_mode)
if self.config["module_run"]["csim_ci"] and hw_mode not in [520]:
self.run_csim_ci(hw_mode=hw_mode)
if self.config["module_run"]["rtl_cmd_check"] and hw_mode not in [520, 720]:
self.check_rtl_cmd(hw_mode=hw_mode)
if do_dongle:
inference_count = self.config["nef"]["inference_count"]
hw_dongle_available = [520, 720, 630] # 530
for hw_mode in hw_dongle_available:
if hw_mode in self.config["hw_mode_on"]:
self.run_nef_kneron_plus(hw_mode=hw_mode, number_try=inference_count)
for i in range(inference_count):
self.btm_csim_nef(hw_mode=hw_mode, number_try=i)
# self.btm_dyn_nef_kneron_plus(hw_mode=hw_mode, number_try=i)
self.module_status["general"]["Success"] = True
self.gen_fx_report()
self.post_clean_up()
# model_fx_release is a list of files to released after gen_fx_model
return self.model_fx_release
@staticmethod
def load_compiler_bie_json(fn_bie, hw_mode):
"""Load js_fns from compiler frontend generated bie. """
t1_j = util_lib.load_zip_jsons(fn_bie)
raw_reports = {}
raw_reports["fe2origin"] = t1_j["node_mapping_opt_fe_to_origin.json"]
raw_reports["fe2be"] = t1_j["node_mapping_opt_fe_to_opt_be.json"]
raw_reports["ori_node_type"] = t1_j["node_types_origin.json"]
if hw_mode not in [520]:
# not available for 520
raw_reports["fe_node_type"] = t1_j["node_types_opt_fe.json"]
raw_reports["be_node_format"] = t1_j["node_format_opt_be.json"]
return raw_reports
@staticmethod
def load_knerex_bie_json(bie_release):
"""Load the jsons from knerex bie2 for fx report."""
# we assume: bie will always generated. bie could be scaled, wqbi, ... optimized
# this step will not work if no knerex ran.
# for example, in mode 0 (ip-eval-only)
# TODELETE: temp check. this should be bie.
assert not bie_release.name.endswith(".onnx"), f"should not release onnx: {bie_release}"
t2_j = util_lib.load_zip_jsons(bie_release)
d = {}
for k, v in {
"node_type": "model_info.json",
"node_shape": "shape_info.json",
"node_radix": "radix_info.json"
}.items():
d[k] = t2_j[v]
return d
def load_compiler_ip_eval_info(self, hw_mode):
"""Load json from compiler backend (w iip eval) info."""
d = {} # to save results
p_compiler_out = self.path["compiler_piano_{}_out".format(hw_mode)]
js_fns = {} # file list
js_fns["be_node_analysis"] = p_compiler_out / "BE_node_evaluator_result.json"
# load all json report files into:
for k, p in js_fns.items():
if p.exists():
with open(p, "r") as f:
d[k] = json.load(f)
if d[k] is None:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg=f"{p.name} is empty.")
return d
@staticmethod
def get_node_type(raw_reports, node_fe, nodes_origin):
"""Find the type (NPU/CPU/FUSED) for node_fe."""
try:
# get the info from knerex first
node_type = raw_reports["node_type"][node_fe]["Mode"]
except:
try:
node_type = raw_reports["fe_node_type"][node_fe]
except:
try:
# for 520, it fallback to origin_node_type
# BUG: just use the first origin node
node_type = raw_reports["ori_node_type"][nodes_origin[0]]
except:
# print(raw_reports.keys())
node_type = "FUSED"
if node_type == "NONE":
node_type = "FUSED"
return node_type
def load_snr_report(self, hw_mode, raw_reports):
"""Load snr report for hw_mode."""
try:
if not self.path["snr_csv"].exists():
return {}, []
ref_name = "mode_{}_piano".format(self.config["snr"]["ref"][hw_mode])
deg_name = "mode_{}_piano".format(self.config["snr"]["deg"][hw_mode])
snr_types = self.config["snr"]["report_snr_col"]
snr_result = get_case_output(self.path["snr_csv"], ref_mode=ref_name, deg_mode=deg_name, col_snr=snr_types, out_dp="all")
d_snr = snr_result.droplevel(["Category", "Model", "Mode_deg", "Mode_ref"], axis=0).to_dict("index")
# HACK: special process for output node. extra copy for easier lookup
for dp_out in raw_reports["node_shape"]["dp_out"]:
# NOTE: dp_out in dynasty dump / snr need to be called with clean_name
dp_out = futils.clean_name(dp_out)
dpo2 = f"output_{dp_out}"
if (dp_out not in d_snr) and (dpo2 in d_snr):
d_snr[dp_out] = d_snr[dpo2]
return d_snr, snr_result.columns
except:
return {}, []
@staticmethod
def load_fe_nodes(raw_reports):
if "node_shape" in raw_reports:
nodes_decomp, _, node_decomp2dp, _, _, _, _, _, _, _ = futils.parse_shape_info(raw_reports["node_shape"])
sort_on_cmd_idx = False
else:
# detour for ip eval. no knerex results
sort_on_cmd_idx = True
nodes_decomp = list(raw_reports["fe2origin"].keys())
node_decomp2dp = {}
return nodes_decomp, node_decomp2dp, sort_on_cmd_idx
def load_raw_json_reports(self, hw_mode):
"""Collect raw json from compiler frontend / knerex / compiler ip eval."""
raw_reports = {}
# loaded json from compiler frontend bie
f_bie = self.map_onnx[f"kdp{hw_mode}_opt_piano_bie"]
d = self.load_compiler_bie_json(f_bie, hw_mode)
raw_reports.update(d)
# load js_fns from bie generated bie
# we assume: bie will always generated. bie could be scaled, wqbi, ... optimized
# this step will not work if no knerex ran.
# for example, not available in mode 0 (ip-eval-only)
k = f"kdp{hw_mode}/bie"
if k in self.model_fx_release:
bie_release = self.model_fx_release[k]
d = self.load_knerex_bie_json(bie_release)
raw_reports.update(d)
# load hw info per node (from ip evaluator)
# acutally it is backend node evaluation
d = self.load_compiler_ip_eval_info(hw_mode)
raw_reports.update(d)
return raw_reports
@staticmethod
def record2df_fx(temp_rec, sort_on_cmd_idx, snr_cols):
"""Convert records to dataframe for fx report."""
# some columns may have NaN, not possible to use .astype
rep_dtld = pd.DataFrame.from_records(temp_rec)
# clean up. remove columns which are all None, all 0, all N/A
cols_to_drop = [
col for col in rep_dtld.columns
if all(rep_dtld[col].isna()) or all(
rep_dtld[col] == 'N/A') or all(rep_dtld[col] == 0)
]
rep_dtld.drop(columns=cols_to_drop, inplace=True)
# in case ip-eval-only
if sort_on_cmd_idx and "CMD_node_idx" in rep_dtld.columns:
rep_dtld.loc[rep_dtld['CMD_node_idx'].isna(), 'type'] = 'FUSED'
rep_dtld['CMD_node_idx'] = pd.to_numeric(rep_dtld['CMD_node_idx'], errors='coerce').astype('Int64')
rep_dtld.sort_values(by='CMD_node_idx', na_position='last', inplace=True)
# move snr columns to front of df
for name_col in snr_cols:
if name_col in rep_dtld.columns:
t_column = rep_dtld.pop(name_col)
rep_dtld.insert(1, name_col, t_column)
return rep_dtld
@run_module(module_name="general/gen_fx_report")
def gen_fx_report(self):
"""Generate the fx report for quantization process.
The report will contain:
- ModelInfo.json from knerex dump.
- bitwidth info
- snr info
- hw info from ip_evaluator
"""
detailed_reports = OrderedDict()
for hw_mode in self.config["hw_mode_on"]:
###################################################################################
# collect report files
raw_reports = self.load_raw_json_reports(hw_mode)
fmt_col_cvrt = {"inputs": "in_fmt", "outputs": "out_fmt"}
d_snr, snr_cols = self.load_snr_report(hw_mode, raw_reports)
nodes_decomp, node_decomp2dp, sort_on_cmd_idx = self.load_fe_nodes(raw_reports)
###################################################################################
# now combine all into a detailed report
temp_rec = []
for node_fe in nodes_decomp:
# node frontend is the KEY for table
# find all nodes backend that include this node_fe
if node_fe not in raw_reports["fe2be"]:
nodes_be = [None]
else:
nodes_be = raw_reports["fe2be"][node_fe]
if len(nodes_be) == 0:
nodes_be = [None]
# find all nodes origin
nodes_origin = raw_reports["fe2origin"].get(node_fe, [None])
# find node type
node_type = self.get_node_type(raw_reports, node_fe, nodes_origin)
# snr info, if available. this is per dp
# TODO: currently we assume one fe -> one dp. but soon we need to support multiple output
try:
this_dp = futils.clean_name(node_decomp2dp.get(node_fe, [None])[0])
this_snr = d_snr.get(this_dp, None)
except:
this_snr = None
# get bitwidth info
try:
bw_in = raw_reports["node_radix"][node_fe].get("input_datapath_bitwidth", "N/A")
bw_out = raw_reports["node_radix"][node_fe].get("output_datapath_bitwidth", "N/A")
bw_wt = raw_reports["node_radix"][node_fe].get("weight_bitwidth", "N/A")
add_bw = True
except:
add_bw = False
for node_be in nodes_be:
# loop through backend nodes
for node_org in nodes_origin:
# first, node mapping
temp_d = OrderedDict()
temp_d["node"] = node_fe
temp_d["node origin"] = node_org
temp_d["type"] = node_type
if this_snr:
temp_d.update(this_snr)
# insert bw info
if add_bw:
temp_d["bw in"] = bw_in
temp_d["bw out"] = bw_out
temp_d["bw weight"] = bw_wt
# backend node ip evaluate
skip_be = False
if len(temp_rec) > 0 and "node backend" in temp_rec[-1]:
i = -1
last_node_be = ""
while last_node_be == "":
last_node_be = temp_rec[i]["node backend"]
i -= 1
if (not sort_on_cmd_idx) and node_be == last_node_be:
# if full run and
# if same as above, put empty or ↑
skip_be = True
# full run
temp_d["node backend"] = ""
if "be_node_analysis" in raw_reports and node_be in raw_reports["be_node_analysis"]:
for k in raw_reports["be_node_analysis"][node_be]:
temp_d[k] = ""
if "be_node_format" in raw_reports and node_be in raw_reports["be_node_format"]:
for k in raw_reports["be_node_format"][node_be]:
temp_d[fmt_col_cvrt[k]] = ""
if not skip_be:
temp_d["node backend"] = node_be
if "be_node_analysis" in raw_reports and node_be in raw_reports["be_node_analysis"]:
# NOTE: no node analysis for 520
temp_d.update(raw_reports["be_node_analysis"][node_be])
if "be_node_format" in raw_reports and node_be in raw_reports["be_node_format"]:
iofmt = raw_reports["be_node_format"][node_be]
for k1, v1 in iofmt.items():
temp_d[fmt_col_cvrt[k1]] = futils.pprint_dict(v1)
temp_rec.append(temp_d)
detailed_reports[hw_mode] = self.record2df_fx(temp_rec, sort_on_cmd_idx, snr_cols)
# now collect overal summary
self.model_fx_release["gen fx model report"] = self.path["model_fx_html"]
self.model_fx_release["gen fx model json"] = self.path["model_fx_json"]
for k, v in self.model_fx_release.items():
# those files will be moved to release folder. so just print file name
self.model_fx_report[k] = v.name
df_summary = pd.DataFrame.from_dict(self.model_fx_report, orient="index", columns=["info"])
# we need this file for app_release and gen_fx_model call
with open(self.path["model_fx_json"], "w") as f:
json.dump(self.model_fx_report, f, indent=4, sort_keys=False, default=str)
# write multi-dataframe to html
with open(self.path["model_fx_html"], 'w') as f:
f.write('<h1>Summary</h1><br><hr>')
f.write(f"{df_summary.to_html(border=2)}<br><hr>")
for k, df in detailed_reports.items():
f.write(f"<h2>kdp{k}</h2><br><hr>")
f.write(f"{df.to_html(border=1)}<br><hr>")
def save_summary(self):
"""Save summary html only, when submoudles failed.
NOTE: this method will be called in run_single_case.
Not supposed to call in run_flow here.
"""
# now collect overal summary
self.model_fx_release["gen fx model report"] = self.path["model_fx_html"]
self.model_fx_release["gen fx model json"] = self.path["model_fx_json"]
for k, v in self.model_fx_release.items():
# those files will be moved to release folder. so just print file name
self.model_fx_report[k] = v.name
# we need this file for app_release and gen_fx_model call
with open(self.path["model_fx_json"], "w") as f:
json.dump(self.model_fx_report, f, indent=4, sort_keys=False, default=str)
df_summary = pd.DataFrame.from_dict(self.model_fx_report, orient="index", columns=["info"])
# write multi-dataframe to html
with open(self.path["model_fx_html"], 'w') as f:
f.write('<h1>Summary</h1><br><hr>')
f.write(f"{df_summary.to_html(border=2)}<br><hr>")
# even case failed, we will try to provide summary report as well.
return self.model_fx_release
@run_module(module_name="auto/csim_ci")
def run_csim_ci(self, *, hw_mode):
"""
Internal use only. for csim release.
only keep files needed by csim ci
"""
model_dir = self.model_path
target_dir = pathlib.Path("{}/{}/{}".format(self.config["path"][f"csim_{hw_mode}_ci_dir"], model_dir.parent.name, model_dir.name))
target_output_dir = pathlib.Path("{}/{}/{}/output/".format(self.config["path"][f"csim_{hw_mode}_ci_dir"], model_dir.parent.name, model_dir.name))
compiler_dir = f"{self.model_path}/output/compiler_piano_output_{hw_mode}/"
target_compiler_dir = pathlib.Path("{}/{}/{}/output/compiler_piano_output_{}/".format(self.config["path"][f"csim_{hw_mode}_ci_dir"], model_dir.parent.name, model_dir.name, hw_mode))
dynasty_dump_dir = f"{self.model_path}/output/results/{self.btm_txt}/mode_{hw_mode}_piano/"
target_dynasty_dump_dir = pathlib.Path("{}/{}/{}/output/results/{}/mode_{}_piano/".format(self.config["path"][f"csim_{hw_mode}_ci_dir"], model_dir.parent.name, model_dir.name, self.btm_txt, hw_mode))
if os.path.exists(target_dir):
shutil.rmtree(target_dir)
shutil.copytree(dynasty_dump_dir, target_dynasty_dump_dir)
shutil.copytree(compiler_dir, target_compiler_dir)
combine_cmd = f"cp -r {model_dir}/output/run_csim_{hw_mode}.ini {target_output_dir}"
cp = futils.run_bash_script(combine_cmd)
if cp.returncode != 0:
raise RegressionError(f"kdp{hw_mode}/csim ci", self.model_id, msg=f"Err: {cp.returncode}")
@run_module(module_name="auto/rtl_cmd_check")
def check_rtl_cmd(self, *, hw_mode):
"""compare command.bin inst.hex
# Usage: python3 ./rtlCmdCmpBinTxt.py command.bin inst.hex.opt
# TODO: check who will use this.
"""
# TODO: link_bin had been removed.
raise NotImplementedError()
rtl_cmd_cmp = self.config["path"]["binary"]["csim"]["rtl_cmd_cmp"]
link_bin = self.config["path"]["binary"]["compiler"]["link_bin"]
compile_and_gen_conv_all = self.config["path"]["binary"]["compiler"]["compile_and_gen_conv_all"]
dir_rtl = "{}/rtl".format(self.model_path)
dir_rtl_cmd_cmp = pathlib.Path("{}/rtl/cmd_cmp".format(self.model_path))
inst_hex_opt = "{}/output.rtl.{}.testcase/cmd_cmp/inst.hex.opt".format(dir_rtl_cmd_cmp, hw_mode)
model_output_dir = "{}/output/".format(self.model_path)
if dir_rtl_cmd_cmp.exists():
shutil.rmtree(dir_rtl_cmd_cmp)
pathlib.Path(dir_rtl_cmd_cmp).mkdir(mode=0o770, parents=True, exist_ok=True)
cp_case_for_rtl_gen = "cp -r {} {}".format(model_output_dir, dir_rtl_cmd_cmp)
subprocess.run(cp_case_for_rtl_gen, shell=True, executable="/bin/bash", check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
compiler_bin = self.config["path"]["binary"]["compiler"]["compiler"]
if self.is_big_model:
gen_rtl_case_command = "pushd {} > /dev/null && {} {}; {} {} {} model_opt && popd > /dev/null".format(dir_rtl_cmd_cmp, link_bin, compiler_bin, compile_and_gen_conv_all, dir_rtl, hw_mode)
elif self.is_multi_layer:
gen_rtl_case_command = "pushd {} > /dev/null && {} {}; {} {} {} multi && popd > /dev/null".format(dir_rtl_cmd_cmp, link_bin, compiler_bin, compile_and_gen_conv_all, dir_rtl, hw_mode)
elif self.is_single_layer:
gen_rtl_case_command = "pushd {} > /dev/null && {} {}; {} {} {} single && popd > /dev/null".format(dir_rtl_cmd_cmp, link_bin, compiler_bin, compile_and_gen_conv_all, dir_rtl, hw_mode)
subprocess.run(gen_rtl_case_command, shell=True, executable="/bin/bash", check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
cmd_cmp_command = "{} {}/output/compiler_piano_output_{}/command.bin {}".format(rtl_cmd_cmp, self.model_path, hw_mode, inst_hex_opt)
subprocess.run(cmd_cmp_command, shell=True, executable="/bin/bash", check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
@run_module(module_name="auto/verify_decomp_snr")
def verify_decomp_snr(self, *, hw_mode):
"""
should this be combined into snr_calculate?
"""
snr_min = 80 # SNR must larger than 80dB
df = pd.read_csv(self.fn_report, index_col=["ref", "deg", "layer"])
out_layer_names = set(df.index.get_level_values("layer"))
deg_modes = set(df.index.get_level_values("deg"))
pairs = []
mode_ref = "mode_float_piano"
mode_deg = "mode_{}decomp_piano".format(self.config["snr"]["deg"][hw_mode])
if mode_deg in deg_modes:
# check corresponding SNR results exists
for out_name in out_layer_names:
pairs.append((mode_ref, mode_deg, out_name))
# pairs are SNR we want to verify
snr_name = "SNR_With_Mean"
# TODO: put this into columns. NOT using assert
for i_deg in pairs:
assert df.loc[i_deg, snr_name] > snr_min
@run_module(module_name="auto/verify_snr")
def verify_snr(self, *, hw_mode):
"""Quick check on model snr reach threshold
After snr_calculation, the snr_per_layer.csv is generated.
The snr_report.csv was extract from per_layer.csv which include output nodes only.
This function is to pick one or both snr columns from snr_report.csv
according to settings.
TODO:
- should this be combined into snr_calculate?
it used to work for multi platform/hw_mode at same time
removed to simplify
"""
if self.is_big_model:
snr_min = 10 # big_model must large than 10dB
else:
snr_min = 20 # layer must larger than 20dB
df = pd.read_csv(self.fn_report, index_col=["ref", "deg", "layer"])
out_layer_names = set(df.index.get_level_values("layer"))
deg_modes = set(df.index.get_level_values("deg"))
pairs = []
mode_ref = "mode_{}_piano".format(self.config["snr"]["ref"][hw_mode])
mode_deg = "mode_{}_piano".format(self.config["snr"]["deg"][hw_mode])
if mode_deg in deg_modes:
# check corresponding SNR results exists
for out_name in out_layer_names:
pairs.append((mode_ref, mode_deg, out_name))
# pairs are SNR we want to verify
# TODELETE
# # HACK: maxRoi snr use snr wo mean
# if "maxRoi" in self.model_name:
# snr_name = "snr wo mean"
# else:
# snr_name = "snr w/ mean"
snr_names = self.config["snr"]["report_snr_col"]
for snr_name in snr_names:
details = []
for i_deg in pairs:
# per output
this_snr = df.loc[i_deg, snr_name]
if this_snr < snr_min:
prefix = "⋖T:"
else:
prefix = "⋗T:"
msg = f"{prefix} {this_snr:5.1f}dB ({i_deg[2]})"
details.append(msg)
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/{snr_name} (T={snr_min:.0f}dB)", "//".join(sorted(details))))
@run_module(module_name="general/verify_bias_adjust")
def verify_bias_adjust_performance(self):
"""this verify step is to report on module success/fail in flow report.
bias adjust performance detailed compare report are generated in during regression.py:
snr_calculator.py/gather_all_bi_improve
"""
df = pd.read_csv(self.fn_report, index_col=["ref", "deg", "layer"])
out_layer_names = set(df.index.get_level_values("layer"))
ref_modes = set(df.index.get_level_values("ref"))
deg_modes = set(df.index.get_level_values("deg"))
pairs = []
for out_name in out_layer_names:
for comp, (ref, deg1, deg2) in fconsts.SNR_BI_IMPROVE.items():
mode_ref = "mode_{}_piano".format(ref)
mode_deg1 = "mode_{}_piano".format(deg1)
mode_deg2 = "mode_{}_piano".format(deg2)
if mode_deg1 in deg_modes and mode_deg2 in deg_modes and mode_ref in ref_modes:
# only if all three modes are running.
pairs.append(((mode_ref, mode_deg1, out_name), (mode_ref, mode_deg2, out_name)))
snr_name = "SNR_With_Mean"
for i_ref, i_deg in pairs:
improve = df.loc[i_deg, snr_name] - df.loc[i_ref, snr_name]
self.logger.info(
"Bias Adj improved = {} db = {} - {}. {}, {}".format(
improve, df.loc[i_deg, snr_name], df.loc[i_ref, snr_name],
i_deg, self.path["dir_output"]))
# TODO: just send the improve to some column. platform independent?
# TODO: remove run_module for this function
if improve < -0.5:
# Dont use assert here. it will suppress compiler/csim behind it
self.logger.error(f" ATTENTION: Bias adjust snr drop by {improve}")
def load_weight_bin_stats(self):
# only some out of hw_mode_on
modes_on = self.config["hw_mode_on"]
for mode in modes_on:
compiler_output_path = self.path["dir_output"] / "compiler_{}".format(mode)
weight_bin_path = compiler_output_path / "weight.bin"
if os.path.exists(weight_bin_path):
get_weight_bin_stats(weight_bin_path, do_tile_analysis=self.config["layer_statistics"]["tile_analysis"])
else:
all_weight_bins = list(compiler_output_path.glob("**/*weight.bin"))
for subg_weight_bin in all_weight_bins:
subg_index = subg_weight_bin.parent.name
if subg_weight_bin.stat().st_size > 0:
get_weight_bin_stats(
str(subg_weight_bin),
subg_index,
do_tile_analysis=self.config["layer_statistics"]
["tile_analysis"])
return
@run_module("auto/convert_enc")
def convert_enc(self, *, hw_mode):
"""Encrypt select onnx of given platform and otimized level"""
model_convertor_bin = self.config["path"]["binary"]["compiler"]["model_converter"]
model_optized_type = self.config["compiler_piano"]["model_optimize"]
if model_optized_type == "scaled":
optimized_onnx = self.model_path / "output" / "knerex_{}".format(hw_mode) / "{}.kdp{}.{}.onnx".format(self.model_name, hw_mode, "scaled.quan")
assert optimized_onnx.exists(), "knerex opt onnx is scaled onnx, need to convert enc based on wq onnx, but wq onnx does not exist!!!"
elif model_optized_type == "wqbi":
optimized_onnx = self.model_path / "output" / "knerex_{}".format(hw_mode) / "{}.kdp{}.{}.onnx".format(self.model_name, hw_mode, "scaled.quan.wqbi")
assert optimized_onnx.exists(), "knerex opt onnx is wqbi onnx, but wqbi onnx does not exist!!!"
command = f"{model_convertor_bin} {optimized_onnx} {optimized_onnx}.enc > /dev/null"
cp = futils.run_bash_script(command, do_echo=True, fail_then_exit=True)
module_name = f"kdp{hw_mode}/convert_enc"
self.save_command(module_name, command)
return
def load_layer_statistics(self, base_dump="results"):
"""
collect some analysis/statistics on dynasty per layer dump/
"""
do_per_channel = self.config["layer_statistics"]["per_channel"]
do_difference_matrix = self.config["layer_statistics"]["do_difference_matrix"]
hw_code = self.config["hw_mode_on"][0]
dynasty_output_path = self.path["dir_output"] / base_dump
do_float = self.config["layer_statistics"]["do_float"]
stat_params = self.config["layer_statistics"]["params"]
no_plot = self.config["layer_statistics"]["no_plot"]
mode_list = self.config["layer_statistics"]["mode_on"]
self.logger.info("generating layer statistics, could be time consuming")
calculate_statistics(dynasty_output_path,
hw_code,
mode_list,
do_per_channel=do_per_channel,
do_diff_stat=do_difference_matrix,
do_float=do_float,
stat_params=stat_params,
no_plot=no_plot)
return
@run_module(module_name="general/tflite")
def run_tflite(self, input_list, base_dump="results"):
"""Inference with tflite and dump all layer float/fix result."""
module_name = "general/tflite"
tflite_dir = self.model_path / "input" / "{}.tflite".format(self.model_name)
tflite_dump_exec = self.config["path"]["binary"]["tflite"]["dump.py"]
# TODO: multi-thead
# TODO: call python function?
# TODO: why called mode_tflite_float_noise?
for input_path in input_list:
# DEBUG: input_path now is a list of path!!! in case for multi-inputs
if "quant" in self.model_name:
out_dir = "{}/{}/{}/mode_tflite_fix_noise/".format(self.path["dir_output"], base_dump, input_path.name)
else:
out_dir = "{}/{}/{}/mode_tflite_float_noise/".format(self.path["dir_output"], base_dump, input_path.name)
pathlib.Path(out_dir).mkdir(mode=0o770, parents=True, exist_ok=True)
command = "python3 {} -o {} -i {} -t {} -l {}".format(tflite_dump_exec, out_dir, input_path, tflite_dir, "True")
self.save_command(module_name, command)
cp = futils.run_bash_script(command)
if cp.returncode != 0:
raise RegressionError("general/tflite", self.model_id, msg=f"Err: {cp.returncode}")
return
@run_module(module_name="general/onnxruntime")
def run_onnxruntime(self, input_list, base_dump="results"):
"""Inference with onnxruntime and dump final layer float result."""
module_name = "general/onnxruntime"
onnxruntime_dump_exec = self.config["path"]["binary"]["tflite"]["onnxruntime.py"]
onnx_dir = self.map_onnx["origin"]
# TODO: multi-thead
# TODO: call python function?
# TODO: why called mode_onnxruntime_noise?
for input_path in input_list:
# DEBUG: input_path now is a list of path!!! in case for multi-inputs
out_dir = pathlib.Path("{}/{}/{}/mode_onnxruntime_noise/".format(self.path["dir_output"], base_dump, input_path.name))
out_dir.mkdir(parents=True, exist_ok=True)
command = "python3 {} -out {} -in {} -onnx {}".format(onnxruntime_dump_exec, out_dir, input_path, onnx_dir)
self.save_command(module_name, command)
cp = futils.run_bash_script(command)
if cp.returncode != 0:
raise RegressionError("general/onnxruntime", self.model_id, msg=f"Err: {cp.returncode}")
return
@run_module(module_name="general/snr cal")
def run_dynasty_snr(self, dir_output_list):
"""function to calculate snr for each input image
currently calculate when all input x mode done.
TODO: calculater per input file, after all modes done
"""
pc = "--pc" if self.config["snr"]["per_channel"] else ""
bin_snr = fconsts.P_FLOW / "snr_calculator_v2.py"
self.logger.info("calculating SNR for {} outputs.".format(len(dir_output_list)))
# precaution of bash input limit.
# if 1000 input txt, each txt output path is 50 chars,
# the command will be at least 50000 chars.
# bash call will fail if too long.
# Ref: https://stackoverflow.com/questions/19354870/bash-command-line-and-input-limit
for dol in futils.chunker(dir_output_list, 100):
s_outs = " ".join([str(a) for a in dol])
command = f"python3 {bin_snr} single {pc} {s_outs}"
cp = futils.run_bash_script(command)
if cp.returncode != 0:
raise RegressionError("general/snr cal", self.model_id, msg=f"Err: {cp.returncode}")
def load_dynasty_snr_output(self):
"""Read dynasty snr report, keeps only the output layers.
Optional:
- (internal regression) add snr reference from previous.
"""
snr_types = self.config["snr"]["report_snr_col"]
for hw_mode in self.config["hw_mode_on"]:
try:
ref_name = "mode_{}_piano".format(self.config["snr"]["ref"][hw_mode])
deg_name = "mode_{}_piano".format(self.config["snr"]["deg"][hw_mode])
snr_result = get_case_output(self.path["snr_csv"], ref_mode=ref_name, deg_mode=deg_name, col_snr=snr_types)
except:
continue
for snr_type in snr_types:
snr_vals = snr_result[snr_type].values
snr_vals_string = ",".join(str(format(snr_val, '.0f')) for snr_val in snr_vals)
snr_k = f"kdp{hw_mode}/{snr_type}(dB)"
self.model_fx_report[snr_k] = snr_vals_string
# add snr reference if internal
if self.is_big_model and self.config["path"]["internal"]:
try:
# load reference.
# TODO: need to update when use new benchmark. try to use snr_k
snr_k_old = f"{snr_type}_{hw_mode}(dB)"
snr_ref = self.config["snr_ref"][futils.clean_case_name(self.model_name)][snr_k_old]
# use // to split snr and ref_snr
snr_vals_string += "//{}".format(snr_ref)
except:
pass
signal("data_sender").send((self.model_id, snr_k, snr_vals_string))
def convert_snr_report(self):
"""
Read dynasty snr full report for release. will use "SNR_With_Mean" col
"""
if not self.path["snr_csv"].exists():
# snr need to be calculated. sometime not turned on. e.g., ip evaluator only.
return None # will not export excel
# NOTE: customer will run only 1 mode per regression
df_snr = pd.read_csv(self.path["snr_csv"], index_col=["Model", "Mode_deg", "Mode_ref", "dump name"])
cols = [col for col in df_snr.columns if col in ["Input", "Layer_index", "SNR_With_Mean"]]
df_snr = df_snr[cols]
df_snr.rename(columns={"SNR_With_Mean": "SNR"}, inplace=True)
df_snr.to_excel(self.path["snr_excel"])
return self.path["snr_excel"]
@run_module(module_name="general/dynasty")
def run_dynasty_inference(self):
"""Run normal dynasty as configed for this test case."""
module_name = "general/dynasty"
self.logger.info(f"Run {module_name}")
mode_list = [k for k, v in self.config["mode_run"].items() if v]
input_list = self.list_input_simulator
dump_level = self.config["dynasty"]["do_dump"]
info_in = self.io_nodes["input"]
p_output = self.path["dir_output"] / "results"
dynasty_bin = self.config["path"]["binary"]["dynasty"]["binary"]
onnx_map = self.map_onnx
model_id = self.model_id
fn_dynasty_sh = self.path["dir_output"] / "run_dynasty.sh"
n_thread = self.config["dynasty"]["n_parallel_input"]
onnx_type = self.config["dynasty"]["piano_dynasty"]["onnx_source"]
shape_in = self.config["dynasty"]["input_shape"]
# ioinfo.json from compiler
# OBSOLETE / TODELETE
# 主要是要看input_fmt + conv是否為first layer
ioinfo_map = self.path["ioinfo_json"]
# prepare dynasty list
mode_settings = [dynasty.gen_dynasty_mode_settings(mode_name,
onnx_map=onnx_map,
ioinfo_map=ioinfo_map,
which_onnx=onnx_type,
model_id=model_id)
for mode_name in mode_list]
d_list, dir_output_list = dynasty.gen_dynasty_list(mode_settings,
input_list,
info_in,
p_output,
dump_level=dump_level,
shape_in=shape_in)
# HACK: for noisy dynasty
if self.config["module_run"]["piano_dynasty_noise"]:
d_list_noise, d_out_list_noise = self.generate_dynasty_list_noise()
d_list.extend(d_list_noise)
dir_output_list.extend(d_out_list_noise)
# run all the dynasty inference
self.logger.info("Running dynasty with list of {}".format(len(d_list)))
cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh)
fn_log = p_output / "dynasty.log"
dynasty.run_dynasty_command_parallel(self.model_id, fn_dynasty_sh, n_thread=n_thread, fn_err=fn_log)
# save commands with others
self.save_command(module_name, f"bash {fn_dynasty_sh}")
return dir_output_list
@run_module(module_name="general/dynasty noise")
def run_dynasty_inference_noise(self):
"""TODO. re-write generate_dynasty_list_noise below."""
raise NotImplementedError
# return dir_output_list
def generate_dynasty_list_noise(self):
"""Create dynasty noise list (expand mode+input) for regression.
HACK: use noise input for dynasty float
TODELETE
"""
raise NotImplementedError
# create mode and input_list
# NOTE: only noise input for float inference now.
noise_list = []
ref_modes = ["float"]
noise_levels = self.config["dynasty"]["noise_sigma"]
for ref_mode in ref_modes:
for nl in noise_levels:
noise_mode = "{}_noise{}".format(ref_mode, nl)
# copy from ref mode
i_mode = self.generate_dynasty_mode_setting(ref_mode)
i_mode["name_mode"] = noise_mode
i_mode["dir_out"] = "mode_{}".format(noise_mode)
input_list = self.list_input_simulator_noise[nl]
noise_list.append((i_mode, input_list))
# create detailed dynasty run list
dynasty_list = []
dynasty_out_list = []
for noise_setting, noise_input in noise_list:
d_list, d_out_list, _ = self.generate_dynasty_list(noise_setting, noise_input)
dynasty_list.extend(d_list)
dynasty_out_list.extend(d_out_list)
return dynasty_list, dynasty_out_list
@run_module(module_name="auto/dynasty btm dump2")
def run_dynasty_inference_btm_dump2(self, *, hw_mode, dry_run=True):
"""Run dynasty for pld with dump 2."""
# prepare dynasty run list for later
selected_mode = str(hw_mode)
input_list = self.list_input_btm
dump_level = 2
info_in = self.io_nodes["input"]
p_output = self.path["dir_output"] / "results"
dynasty_bin = self.config["path"]["binary"]["dynasty"]["binary"]
onnx_map = self.map_onnx
model_id = self.model_id
fn_dynasty_sh = self.path["dir_output"] / "run_dynasty_btm_dump2.sh"
onnx_type = self.config["dynasty"]["piano_dynasty"]["onnx_source"]
shape_in = self.config["dynasty"]["input_shape"]
# ioinfo.json from compiler
ioinfo_map = self.path["ioinfo_json"]
# prepare dynasty mode setting x1
selected_mode_setting = dynasty.gen_dynasty_mode_settings(
selected_mode,
onnx_map=onnx_map,
ioinfo_map=ioinfo_map,
which_onnx=onnx_type,
model_id=model_id)
d_list, dir_output_list = dynasty.gen_dynasty_list([selected_mode_setting],
input_list,
info_in,
p_output,
dump_level=dump_level,
shape_in=shape_in)
# run dynasty
cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh)
if not dry_run:
dynasty.run_dynasty_command_parallel(self.model_id, fn_dynasty_sh)
return dir_output_list
@staticmethod
def compact_json(fn_json, fn_new=None):
"""
Helper function to make json more human-friendly.
"""
def compact_array(str_array):
a = str_array.group().replace("\n", "").replace("\t", "")
return a
with open(fn_json, "r") as f:
j = f.read()
j = re.sub(r"\[.*?\]", compact_array, j, flags=re.DOTALL)
j = re.sub(r":[ \n\t]*\[", ": [", j, flags=re.DOTALL)
if fn_new is None:
fn_new = fn_json
with open(fn_new, "w") as f:
f.write(j)
def postprocess_piano_knerex_json(self, hw_mode):
"""
Helper function: Prepare/link some knerex json file for compiler use.
"""
for appd in ["_scaled_piano_bie", "_scaled_piano_onnx", "_quan_piano_bie", "_quan_piano_onnx"]:
fn_json_scaled = "{}.json".format(self.map_onnx[f"kdp{hw_mode}{appd}"])
p = pathlib.Path(fn_json_scaled)
if p.exists() and not p.is_symlink():
self.compact_json(fn_json_scaled)
# HACK: for kai's script.
# TODO: confirm still needed?
fn_json_from = "{}.json".format(self.map_onnx[f"kdp{hw_mode}_scaled_piano_bie"])
fn_json_to = "{}.json".format(self.map_onnx[f"kdp{hw_mode}_scaled_piano_onnx"])
p_to = pathlib.Path(fn_json_to)
if p_to.exists():
p_to.unlink()
if os.path.exists(fn_json_from):
shutil.copy(fn_json_from, fn_json_to)
@run_module(module_name="auto/knerex")
def run_knerex(self, *, hw_mode):
"""run knerex piano (weight / data analysis, updater 520/720) for this model.
For knerex, no need for multi-processing.
(datapath analysis run multi-processing in C++, will not affect python flow).
input:
origin.onnx
compiler_xxx/graph_opt.onnx
intermedial files:
* analysis_datapath_piano_NNN.bin
* analysis_weight_piano_NNN.tmp
"""
module_name = f"kdp{hw_mode}/knerex"
self.logger.info(f"Run {module_name}")
openblas_num_threads = self.config["knerex"]["openblas_num_threads"]
para_bin = self.config["path"]["binary"]["knerex"]["normal"]
para_updater_json = self.path[f"updater_{hw_mode}_json"]
command = f"export OPENBLAS_NUM_THREADS={openblas_num_threads}; {para_bin} -i {para_updater_json}"
self.save_command(module_name, command)
TOS = self.config["knerex"]["timeout"]
cp = futils.run_bash_script(command, timeout=TOS)
self.check_knerex_error(cp, hw_mode)
self.postprocess_piano_knerex_json(hw_mode)
# release this bie
release_bie, _, _, release_onnx = self.get_scaled_onnx_source(hw_mode)
p_out = pathlib.Path(self.path["dir_output"])
self.model_fx_release[f"kdp{hw_mode}/bie"] = p_out / release_bie
self.model_fx_release[f"kdp{hw_mode}/onnx"] = p_out / release_onnx
def check_compiler_HardwareNotSupport(self, hw_mode):
"""Find detailed failure from gen_config/compiler log."""
p_compiler_out = pathlib.Path(self.path[f"compiler_piano_{hw_mode}_out"])
# common file names: batch_compile.log / compile.log / opt.log / backtrace.log
p_logs = list(p_compiler_out.glob("*.log"))
t = ""
for p_log in p_logs:
with open(p_log, "r") as f:
t += "".join(f.readlines())
if len(t) == 0:
return None
# t is a long line with \n in it.
prefixes_1 = {
"ERROR: run sub-module \"image_cut_search\" failed": ("fm_cut", "compiler report"),
"Invalid program input: Memory region \[weight\] .*? overlapps \[dram\]": ("compiler", "datapath oversize"),
# 720 old setup
"CSim only support CPU node in the end of model and write data to output buffer": ("compiler", "cpu node in middle"),
}
for keyw, (col_name, msg) in prefixes_1.items():
pat1 = re.compile(keyw)
if len(pat1.findall(t)) > 0:
self.model_fx_report[(f"kdp{hw_mode}/ERROR")] = msg
raise RegressionError(f"kdp{hw_mode}/{col_name}", self.model_id, msg=msg)
prefixes = {
"Common": ("compiler", ""),
"InvalidProgramInput": ("compiler", ""),
"InvalidONNXAttribute": ("compiler", ""),
"HardwareNotSupport": ("HW not support", "compiler: "),
"Hardware not support": ("HW not support", "compiler: "),
"UnexpectedGraph": ("compiler", ""),
"UnimplementedFeature": ("unimplemented feature", "compiler: "),
"ValueNotReady": ("compiler", ""),
"KnerexError": ("knerex", "compiler: "),
"UnexpectedValue": ("compiler", ""),
"creating an EmptyNode instance for op_type:": ("compiler", "unsupported nodes: //"),
}
for keyw, (col_name, prefix) in prefixes.items():
pat1 = re.compile(f"{keyw}[:\s]*(.*)")
if len(pat1.findall(t)) > 0:
msg = prefix + "//".join(pat1.findall(t))
self.model_fx_report[(f"kdp{hw_mode}/ERROR")] = msg
raise RegressionError(f"kdp{hw_mode}/{col_name}", self.model_id, msg=msg)
# otherwise will raise normal compiler error
return None
def get_compiler_config_helper1(self,
hw_mode,
p_out=None,
debug=False,
gen_nef_config=False,
skip_backend=False,
use_quan_model=True,
fmt_limit=None,
do_ip_eval=False):
"""Helper function to generate compiler config.
Args:
skip_backend (bool): True to run frontend only.
use_quan_model (bool): only valid when skip_backend is True.
set to True to use quantized model for accurate input bin format. (if needed.)
"""
if type(p_out) is not pathlib.PosixPath:
p_out = pathlib.Path(self.path[f"compiler_piano_{hw_mode}_out"])
p_out.mkdir(mode=0o770, parents=True, exist_ok=True)
# para_model_type for compiler
if self.is_multi_layer:
para_model_type = "-v multi"
if debug:
para_model_type = "-v model_dbg"
elif self.is_multi_core:
para_model_type = "-v multi"
elif self.is_single_layer:
para_model_type = "-v single"
elif self.is_big_model:
# big model
if gen_nef_config: # batch compile to generate nef
para_model_type = "-v model_rel"
else:
# normal compiler call
para_model_type = "-v model_opt"
# find corresponding onnx/bie/onnx+json
if self.config["module_run"]["only_ip_evaluator"] or (skip_backend and (not use_quan_model)):
# no scaled onnx yet. use origin.onnx or origin.bie
p_origin = pathlib.Path(self.map_onnx["origin"])
para_onnx = futils.relative_path(p_origin, p_out)
s_para_json = " " # no json
use_quan_model = False
else:
para_onnx, para_onnx_json, _, _ = self.get_scaled_onnx_source(hw_mode)
para_onnx = futils.relative_path(para_onnx, p_out)
use_quan_model = True
if para_onnx.name.endswith(".bie"):
# scaled.bie, no json
s_para_json = " "
else:
# scaled.onnx, need json
para_onnx_json = futils.relative_path(para_onnx_json, p_out)
s_para_json = f"-r {para_onnx_json}"
compiler_envs = ["echo"] # placeholder for bash
# extra config
extra_d = dict()
if hw_mode == 720:
extra_d["gen_setup_fbs"] = True
# TODO
if do_ip_eval:
env_ip_eval = "export RUN_IP_EVAL=1"
extra_d["ip_evaluator_cfg"] = self.config["compiler_piano"]["ip_evaluator_json"][hw_mode]
else:
env_ip_eval = "export RUN_IP_EVAL=0"
compiler_envs.append(env_ip_eval)
if self.config["module_run"]["only_ip_evaluator"]:
# NOTE: normal regression will have it as False,
# so batch compiler will fail at unsupported cpu nodes.
extra_d["skip_fw_cpu_op_impl_check"] = True
if hw_mode in [720, 730, 630, 540] and self.config["compiler_piano"]["weight_compress"]:
extra_d["weight_compress"] = True
if hw_mode in [720, 530, 730, 630, 540] and futils.need_compress_command_bin(self.cat_name, self.model_name):
extra_d["optimize"] = {"cmd_size": True}
if fmt_limit:
# should not be in ip_eval_only
extra_d["input_fmt"] = fmt_limit
if (not use_quan_model) and self.config["knerex"]["datapath_bitwidth_mode"] == "int16":
# run 16bit ip evaluator for ip_eval_only
extra_d["def_data_bitw"] = 16
extra_d["input_fmt"] = "8W1C16B"
extra_d["model_id"] = self.nef_model_id
if hw_mode == 720 and skip_backend:
# https://redmine.kneron.tw/issues/19020 for MO3
do_change = False
for case_end in ["1W16C8BHL_INTLV", "i15o15_INTLV", "1W16C8BHL_colAcc_INTLV"]:
if self.model_name.endswith(case_end):
do_change = True
break
if do_change:
extra_d["output_fmt"] = "1W16C8B_INTLV"
if skip_backend:
extra_d["skip_backend"] = True
env_gen_opt = "export KNERON_GEN_OPT_ONNX=1"
compiler_envs.append(env_gen_opt)
if self.config["compiler_piano"]["no_dummy_bn"] or (hw_mode in [520, 720] and self.is_single_layer):
# if configed
# HACK: for knerex only, stc, 520/720
compiler_envs.append("export KNERON_PIANO_OPT_NO_DUMMY_BN=1")
## read per model compiler extra settings and update to extra_d
## now only used for app_release, need to prepare this json ourself
p_extra_compiler_settings_config = self.path["dir_input"] / "extra_compiler_settings.json"
if p_extra_compiler_settings_config.exists():
with open(p_extra_compiler_settings_config, "r") as f:
extra_compiler_settings_config = json.load(f)
recursive_update(extra_d, extra_compiler_settings_config)
if len(extra_d) > 0:
extra_para = "-a '{}'".format(json.dumps(extra_d, default=str))
else:
extra_para = ""
# example: compiler_piano.config.kdp530.json
compiler_json_name = pathlib.Path(self.path[f"compiler_piano_{hw_mode}_json"]).name
# may save to different folder
p_compiler_json = p_out / compiler_json_name
p_img_cut_json = p_out / "image_cut_config.json"
para_compiler_json = "-o {}".format(compiler_json_name)
gen_py = self.config["path"]["binary"]["compiler"]["gen_py"]
# feature map cut
def get_fm_cut_parameter(skip_fm_cut):
if hw_mode == 520:
fm_cut_conf = ""
elif skip_fm_cut:
# no need for nef
fm_cut_conf = ""
else:
fm_cut_modes = {
"default": "",
"deep_search": f"-m {para_onnx}"
}
fm_cut_k = self.config["compiler_piano"]["node_schedule_mode"]
fm_cut_conf = fm_cut_modes[fm_cut_k]
return fm_cut_conf
fm_cut_conf = get_fm_cut_parameter(skip_backend)
# no need for get_cmd_gen_apb
env_compiler_lib = """export LD_LIBRARY_PATH="{}:$LD_LIBRARY_PATH" """.format(self.config["path"]["binary"]["compiler"]["lib_dir"])
env_compile_bin_path = "export COMPILER_BIN_DIR={}".format(self.config["path"]["binary"]["compiler"]["bin_dir"])
env_opt_bin_path = "export OPT_COMPILE_DIR={}".format(self.config["path"]["binary"]["compiler"]["opt_bin_dir"])
compiler_envs.extend([env_compiler_lib, env_compile_bin_path, env_opt_bin_path])
# HACK: stc compiler for 540/730, https://redmine.kneron.tw/issues/17275
if hw_mode in [540, 730] and self.is_single_layer:
compiler_envs.append("export KNERON_NMEM_FT_REORDER_OP=1")
# HACK: http://eip.kneron.com:8080/redmine/issues/16360#note-5
# for 720 16bit, knerex
if self.is_big_model and hw_mode in [720] and self.config["knerex"]["datapath_bitwidth_mode"] in ["int16"]:
compiler_envs.append("export KNERON_PIANO_OPT_ADD_DUMMY_BYPASS_NODE_FOR_PRELU_LRELU=1")
compiler_bin = "{} {}".format(self.config["path"]["binary"]["compiler"]["compiler"], hw_mode)
def get_gen_cfg_cmds():
cmd_gen_cfg = "{} -t {} {} {} {} {} {} 2>&1 > gen_config.log".format(
gen_py, hw_mode, para_model_type, s_para_json,
para_compiler_json, fm_cut_conf, extra_para)
# HACK: some hack files. may be used for some special models
p_input = self.model_path / "input"
p_in_compiler_customize = p_input / f"compiler_piano.config.kdp{hw_mode}.json"
p_in_img_cut_customize = p_input / "image_cut_config.json"
p_compiler_json_custom = None
cp_cmds = ["echo"] # echo is placeholder in bash
if p_in_compiler_customize.exists():
if gen_nef_config:
# for nef gen, p_compiler_json_custom is used
p_compiler_json_custom = p_out / "compiler_custom_config.json"
cp_1 = "cp {} {}".format(p_in_compiler_customize, p_compiler_json_custom)
# normal p_compiler_json will be generated anyway
else:
# for normal compiler
# normal p_compiler_json will be copied from input. not generated
cp_1 = "cp {} {}".format(p_in_compiler_customize, p_compiler_json)
cp_cmds.append(cp_1)
if p_in_img_cut_customize.exists(): # put inside above if?
cp_1 = "cp {} {}".format(p_in_img_cut_customize, p_img_cut_json)
cp_cmds.append(cp_1)
# has customized files?
cp_cmd = " && ".join(cp_cmds)
has_customized = len(cp_cmds) > 1
if gen_nef_config:
# for nef config. will run both
return cmd_gen_cfg, cp_cmd, p_compiler_json_custom
else:
# normal compiler calling
if has_customized:
return cp_cmd, "echo", p_compiler_json_custom
else:
return cmd_gen_cfg, "echo", p_compiler_json_custom
cmd_gen_cfg, cmd_gen_cfg_custom, p_compiler_json_custom = get_gen_cfg_cmds()
if self.config["path"]["internal"] and (not self.config["path"]["use_toolchain"]):
cmd_compiler = f"{compiler_bin} {para_onnx} {p_compiler_json.name} debug"
else:
cmd_compiler = f"{compiler_bin} {para_onnx} {p_compiler_json.name}"
# batch compiler json is generated by regression.
p_batch_config = self.generate_batch_compiler_json(hw_mode=hw_mode, p_out=p_out, p_compiler_json=p_compiler_json, p_config_to_custom=p_compiler_json_custom)
# batch compiler command
cmd_batch = self.generate_batch_compiler_cmd_v1(hw_mode=hw_mode, p_out=p_out, p_batch_config=p_batch_config)
return cmd_gen_cfg, cmd_compiler, cmd_batch, p_out, "; ".join(compiler_envs)
def generate_batch_compiler_cmd_v1(self, *, hw_mode, p_out, p_batch_config):
"""batch_compile to support ALL (+540/730) platforms since 0.21.1. """
compiler_commit = self.config["path"]["compiler_commit"]
bin_bc = self.config["path"]["binary"]["compiler"]["batch_compiler"]
command = f"pushd {p_out} > /dev/null && {bin_bc} {p_batch_config} -T {hw_mode} -t {compiler_commit} -o -D && popd > /dev/null"
return command
def generate_batch_compiler_json(self, *, hw_mode, p_out, p_compiler_json, p_config_to_custom):
""" Use template to generate batch_compile.json."""
# create batch_compile.json
if self.config["module_run"]["only_ip_evaluator"]:
# no scaled onnx yet. use origin.onnx
fn_knerex_onnx = futils.relative_path(self.map_onnx["origin"], p_out)
fn_knerex_json = ""
else:
# knerex should be ready now
fn_knerex_onnx, fn_knerex_json, _, _ = self.get_scaled_onnx_source(hw_mode)
c = {}
# nef are used for verify board output against csim.
c["flow_path"] = self.config["path"]["flow"]
c["hw_mode"] = hw_mode
c["model_id"] = self.nef_model_id
c["stamp"] = "1"
c["bie_path"] = str(fn_knerex_onnx)
if fn_knerex_onnx.name.endswith(".onnx"):
c["json"] = str(fn_knerex_json)
else:
# no json needed for bie files
c["json"] = ""
# TODO: make this relative path
c["gen_config_path"] = str(p_compiler_json)
# save using template
if p_config_to_custom and p_config_to_custom.exists():
template = self.jinja_env.get_template("batch_compile_bconfig_custom.json")
c["custom_config_path"] = str(p_config_to_custom)
else:
template = self.jinja_env.get_template("batch_compile_bconfig.json")
output = template.render(config=c)
fn_json_save = "{}/batch_compile.json".format(p_out)
with open(fn_json_save, "w") as f:
f.write(output)
return fn_json_save
def save_cp_log(self, p_log, cp):
with open(p_log, "w") as f:
f.write(f"bash run return code: {cp.returncode}")
f.write("\n".join([cp.stdout, cp.stderr]))
@run_module(module_name="auto/compiler_cfg")
def generate_compiler_config(self, *, hw_mode, command):
"""Generate config for compiler. may do feature-map cut which is time consuming.
Some optimize modules may be available.
- feature-map cut deep search.
- script will iterate compiler to find the best cut.
- script will copy opt_compile.log to compiler output folder (even if failed).
- This is time-consuming, may be killed by timeout. Will not have opt_compile.log if so.
"""
module_name = f"kdp{hw_mode}/compiler_cfg"
self.save_command(module_name, command)
# NOTE: usually generate compiler config is very fast.
# however, it maybe too long if fm_cut turned on. (deep_search)
TOS = self.config["compiler_piano"]["timeout"]
cp = futils.run_bash_script(command, timeout=TOS)
self.check_compiler_log(hw_mode, cp)
self.clean_opt_compile(hw_mode)
if cp.returncode != 0:
self.check_bc_returncode(cp, hw_mode, module="compiler_cfg")
def check_compiler_log(self, hw_mode, cp):
p_json = pathlib.Path(self.path[f"compiler_piano_{hw_mode}_json"])
# save log for debug
p_log = p_json.parent / "compiler_gen_config.log"
# DEBUG: check size of config. if empty, save log for debug
if not p_json.exists():
self.save_cp_log(p_log, cp)
raise RegressionError(f"kdp{hw_mode}/compiler_cfg", self.model_id, msg="no config generated.")
elif p_json.stat().st_size == 0:
self.save_cp_log(p_log, cp)
raise RegressionError(f"kdp{hw_mode}/compiler_cfg", self.model_id, msg="config empty.")
elif cp.returncode != 0:
# save log first.
self.save_cp_log(p_log, cp)
# will do detailed check below
def clean_opt_compile(self, hw_mode):
"""Clean up opt_compile which is from fm_cut but sometime not cleaned. """
p_json = pathlib.Path(self.path[f"compiler_piano_{hw_mode}_json"])
p_opt_cmpl = p_json.parent / "opt_compile"
if p_opt_cmpl.exists():
cmd = f"pkill -f {self.model_name} ; sleep 1; rm -rf {p_opt_cmpl}"
cp2 = futils.run_bash_script(cmd, do_echo=True)
# TODO: examine cp2 return code
# cp2.returncode == -15:
def check_bc_returncode(self, cp, hw_mode, module="compiler"):
"""Examine the return code of batch-compiler.
Ref: https://redmine.kneron.tw/issues/18389
Compiler return code is between 1-30.
gen_config.py will return 31-50 if fm_cut failed.
TODO: what about normal compiler frontend?
"""
rc = cp.returncode
if rc == 0:
return # success
elif rc == 1:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="compiler common")
elif rc == 2:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="compiler invalid input")
elif rc == 3:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="invlid onnx attribute")
elif rc == 4:
raise RegressionError(f"kdp{hw_mode}/HW not support", self.model_id, msg="Err: 4")
elif rc == 5:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="unexpected graph")
elif rc == 6:
raise RegressionError(f"kdp{hw_mode}/unimplemented feature", self.model_id, msg=f"compiler: {rc}")
elif rc == 7:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="value not ready")
elif rc == 8:
raise RegressionError(f"kdp{hw_mode}/knerex", self.model_id, msg="cmplr: knerex config error")
elif rc == 9:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg="unexpected value")
elif rc >= 1 and rc <= 30:
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg=f"Err: {rc}")
###################################################################################
elif rc == 111:
# compiler never timeout. it is mostly fm_cut search
raise RegressionError(f"kdp{hw_mode}/fm_cut", self.model_id, msg=cp.stderr)
elif rc == -15:
raise RegressionError(f"kdp{hw_mode}/fm_cut", self.model_id, msg="kille by SIGTERM")
###################################################################################
# gen_config.py will return 31-50 if fm_cut failed.
elif rc == 32:
msg = f"fm_cut does not support {hw_mode}."
raise RegressionError(f"kdp{hw_mode}/fm_cut", self.model_id, msg=msg)
elif rc == 33:
msg = "No info_cutting.log!"
raise RegressionError(f"kdp{hw_mode}/fm_cut", self.model_id, msg=msg)
elif rc >= 31 and rc <= 50:
# default report for fm_cut fail
msg = f"Err: {rc}"
raise RegressionError(f"kdp{hw_mode}/fm_cut", self.model_id, msg=msg)
###################################################################################
self.check_compiler_HardwareNotSupport(hw_mode)
###################################################################################
# default error
raise RegressionError(f"kdp{hw_mode}/{module}", self.model_id, msg=f"Err: {rc}")
@run_module(module_name="auto/compiler")
def run_batch_compile_command(self, *, hw_mode, command, dir_out):
module_name = f"kdp{hw_mode}/run batch compiler"
self.save_command(module_name, command)
cp = futils.run_bash_script(command, do_echo=False) # self.config["regression"]["print_error"]
self.check_bc_returncode(cp, hw_mode, module="compiler")
fn_outs = {}
if hw_mode in [540, 730]:
# for 730/540, no setup.bin, command.bin is optional if last one is cpu node
# and csim/firmware both use kne
fn_outs[f"kdp{hw_mode}/kne"] = f"{dir_out}/models_{hw_mode}.kne"
fn_outs[f"kdp{hw_mode}/nef"] = f"{dir_out}/models_{hw_mode}.nef"
else:
# old setup + nefv1, setup.bin+command.bin for csim
# nef for firmware
fn_outs[f"kdp{hw_mode}/nef"] = f"{dir_out}/models_{hw_mode}.nef"
if self.config["module_run"]["only_ip_evaluator"]:
# no need to release nef file which is useless
return
for k, fn_check in fn_outs.items():
p_check = pathlib.Path(fn_check)
if not p_check.exists():
raise RegressionError(f"kdp{hw_mode}/compiler", self.model_id, msg=f"{p_check.name} missing.")
self.model_fx_release[k] = p_check
@run_module("auto/compiler hw info")
def load_hw_stats(self, *, dir_out, hw_mode):
"""Collect FPS info / weight size / cpu nodes from compiler log."""
if hw_mode in self.config["hw_mode_on"]:
ip_eval_report = compiler.collect_FPS(dir_out, hw_mode)
if "fps" in ip_eval_report:
# this is a valid report
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/FPS", ip_eval_report["fps"]))
# Check cpu node info
# TODO: simplify this. it must be compulsary
k = "cpu_node"
if k in ip_eval_report:
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/{k}", ip_eval_report[k]))
# patch up 520 using preset value
if hw_mode == 520:
try:
ip_eval_bw = self.config["compiler_piano"]["ip_evaluator_bw"][hw_mode]
preset_keys = {
"bw_weight": "GETW bandwidth GB/s",
"bw_rdma": "RDMA bandwidth GB/s",
"bw_wdma": "WDMA bandwidth GB/s"}
for k1, k2 in preset_keys.items():
if ip_eval_bw[k1] is not None:
ip_eval_report[k2] = ip_eval_bw[k1]
except:
pass
for k, v in ip_eval_report.items():
self.model_fx_report[f"kdp{hw_mode}/ip_eval/{k}"] = v
fps_improved = compiler.collect_fps_improve(dir_out)
if fps_improved:
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/FPS_improved", fps_improved))
# Collect command size and weight size info
if self.is_big_model:
cmd_size, weight_size = compiler.collect_command_weight_size(dir_out)
if cmd_size:
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/cmd_size(KB)", cmd_size))
if weight_size:
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/wt_size(MB)", weight_size))
# TEMP: some temp analsysis on weight size. 8bit fx weight vs 32bit float
if self.onnx_size > 0:
wt_overhead = int(100 * (4 * weight_size / self.onnx_size - 1))
else:
wt_overhead = 0
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/wt_overhead (%)", wt_overhead))
# if self.config["module_run"]["filter_cpu_cases"]:
# if cpu_node_list_str not in ["None", "N/A"]:
# # there are cpu nodes
# raise RegressionError(f"kdp{hw_mode}/filter_cpu_node", self.model_id)
@run_module(module_name="auto/compiler frontend")
def run_compiler_frontend(self, *, hw_mode, use_quan_model=False):
"""Call compiler frontend to generate cpu node list and decomposed node mapping.
compiler has two steps:
* generate config: `generate_compiler_config`
* (optional) feature map search during gen_config, for better fps.
* actual compiler run: `run_batch_compiler_command`
Inputs:
- hw_mode: 520/530/... supported platform
- use_quan_model (bool): True if use knerex generated scaled.bie/onnx.
Set to False if run for i
Output files:
- decomposed.bie
- decomposed.onnx (for release)
"""
module_name = f"kdp{hw_mode}/compiler frontend"
(cmd_gen_cfg, cmd_compiler, cmd_batch_compiler, dir_out,
envs) = self.get_compiler_config_helper1(
hw_mode,
skip_backend=True,
use_quan_model=use_quan_model,
do_ip_eval=False)
command1 = f"pushd {dir_out} > /dev/null; {envs}; {cmd_gen_cfg}"
command2 = f"pushd {dir_out} > /dev/null; {envs}; {cmd_compiler}"
self.generate_compiler_config(command=command1, hw_mode=hw_mode)
self.save_command(module_name, command2)
cp = futils.run_bash_script(command2, do_echo=False)
self.check_bc_returncode(cp, hw_mode, module="compiler frontend")
# https://redmine.kneron.tw/issues/17758
# NOTE: old name is graph_opt.onnx
kvs = {
# name from compiler: new name in regression
"decomposed.onnx": self.map_onnx[f"kdp{hw_mode}_opt_piano_onnx"],
"decomposed.bie": self.map_onnx[f"kdp{hw_mode}_opt_piano_bie"],
}
# copy to knerex folder
p_knerex = self.path[f"knerex_output_{hw_mode}"]
p_knerex.mkdir(exist_ok=True)
for k, v in kvs.items():
fn_from = list(pathlib.Path(dir_out).glob(k))
if len(fn_from) == 0:
raise RegressionError(f"kdp{hw_mode}/compiler frontend", self.model_id, msg=f"NO {k} generated by frontend.")
shutil.copyfile(fn_from[0], v)
# load basic_info.json to check how many input bin formats for each input
if use_quan_model:
# load jsons from compiler frontend generated bie
jsons = util_lib.load_zip_jsons(self.map_onnx[f"kdp{hw_mode}_opt_piano_bie"])
basic_info = jsons["basic_info.json"]
self.io_nodes[("input_format", hw_mode)] = basic_info["input_fmt"]
bw_in = self.config["knerex"]["model_in_bitwidth_mode"]
bw_out = self.config["knerex"]["model_out_bitwidth_mode"]
bw_cpu = self.config["knerex"]["cpu_bitwidth_mode"]
bw_dp = self.config["knerex"]["datapath_bitwidth_mode"]
bw_wt = self.config["knerex"]["weight_bitwidth_mode"]
self.model_fx_report[f"kdp{hw_mode}/input bitwidth"] = bw_in
self.model_fx_report[f"kdp{hw_mode}/output bitwidth"] = bw_out
self.model_fx_report[f"kdp{hw_mode}/cpu bitwidth"] = bw_cpu
self.model_fx_report[f"kdp{hw_mode}/datapath bitwidth"] = bw_dp
self.model_fx_report[f"kdp{hw_mode}/weight bitwidth"] = bw_wt
# clean up folder
shutil.rmtree(dir_out)
@run_module(module_name="auto/pick bin format")
def pick_in_bin_format(self, *, hw_mode, limited_input):
"""Pick 1 format for each limited_input.
see https://redmine.kneron.tw/issues/18306
"""
k1 = ("input_format", hw_mode)
assert k1 in self.io_nodes, "Input formats are not generated with compiler frontend on quantized model. Check flow settings."
cmpl_fmts = self.io_nodes[k1]
results = {}
for in_name in limited_input:
if in_name not in cmpl_fmts:
self.logger.critical(f"Constraint on input format not applied!!! Given {in_name} not in {list(cmpl_fmts.keys())} given by compiler.")
continue
if len(cmpl_fmts[in_name]) == 1:
self.logger.critical(f"Constraint on input format not applied!!! Given {in_name} has only 1 format: {cmpl_fmts[in_name][0]}.")
continue
fmts = [f for f in cmpl_fmts[in_name] if not f.startswith("4W4C")]
if len(fmts) == 0:
self.logger.critical(f"Constraint on input format not applied!!! Given {in_name} has no valid format to limit: {cmpl_fmts[in_name]} -> remove 4W4B* -> [].")
continue
results[in_name] = fmts[0]
return results
@run_module(module_name="auto/compiler")
def generate_nef(self, *, hw_mode, p_nef=None, fmt_limit=None):
"""call batch compiler to generate nef.
The last and full run of compiler.
Inputs:
* hw_mode supported.
Output files:
* model_NNN.nef
* model_NNN.kne
"""
module_name = f"kdp{hw_mode}/gen_nef"
self.logger.info(f"run {module_name}")
if p_nef is None: # default path
# TODO: move to compiler_piano_
# p_nef = pathlib.Path(self.path["compiler_piano_{}_out".format(hw_mode)])
p_nef = pathlib.Path(self.path["nef_output_{}".format(hw_mode)])
p_nef.mkdir(mode=0o770, parents=True, exist_ok=True)
# generate compiler nef configs
do_ip_eval = self.config["compiler_piano"]["ip_evaluator"]
cmd_gen_cfg, cmd_compiler, cmd_batch_compiler, dir_out, envs = self.get_compiler_config_helper1(hw_mode,
gen_nef_config=True,
p_out=p_nef,
fmt_limit=fmt_limit,
do_ip_eval=do_ip_eval)
command1 = f"pushd {dir_out} > /dev/null; {envs}; {cmd_gen_cfg}"
# command2 = f"pushd {dir_out} > /dev/null; {envs}; {cmd_compiler}"
command3 = f"pushd {dir_out} > /dev/null; {envs}; {cmd_batch_compiler}"
# below functions has decorated by run_module. will calculate time and report specific columns
self.generate_compiler_config(command=command1, hw_mode=hw_mode)
self.run_batch_compile_command(command=command3, dir_out=dir_out, hw_mode=hw_mode)
self.load_hw_stats(dir_out=dir_out, hw_mode=hw_mode)
fn_knerex_bie, _, _, _ = self.get_scaled_onnx_source(hw_mode)
# collect ioinfo.json for future usage
# needed for csim
# needed for dynasty (especially for rgba)
# NOTE: ioinfo.json is obsoleted. using calculation_info.json
if fn_knerex_bie.name.endswith(".bie"):
js = [
# original name, key in regression, name in bie (for dynasty)
("ioinfo.json", "ioinfo_json", "ioinfo.json"),
("calculation.json", "calculation_json", "calculation_info.json"),
]
for n1, n2, n3 in js:
p_json = dir_out / n1
if p_json.exists():
self.path[n2][hw_mode] = p_json
# patch bie
util_lib.patch_bie_w_ioinfo_json(fn_knerex_bie, p_json, n3)
@run_module(module_name="auto/csim")
def run_csim(self, *, hw_mode):
"""
run csim for 720/530/730/630/540
Input files:
* run_csim_NNN.ini
* pointing to files needed for csim.
* refer to `generate_csim_ini` for reference. generate_csim_ini
Output files:
* `output/results/FN_INPUT/csim_NNN_output`
if 520 given, will run `run_csim_520` instead.
"""
module_name = f"kdp{hw_mode}/csim"
self.logger.info(f"run {module_name}")
list_csim = self.io_nodes[("btm_csim_in", hw_mode)]
d_csim = {i: v for i, v in enumerate(list_csim)}
bin_csim = fconsts.BIN_SET["csim"][hw_mode]
fn_sh = self.path["btm_dump"] / f"csim_{hw_mode}" / f"run_csim_{hw_mode}.sh"
cmd, cp = csim.run_csim(d_csim, bin_csim, fn_sh)
self.check_csim_error(cp, hw_mode)
@run_module(module_name="kdp520/csim")
def run_csim_520(self):
"""run csim 520.
520 is our first platform. This is different from later platforms.
Input files:
* command.bin
* setup.bin
* weight.bin
* dynasty dumped input file at `output/results/FN_INPUT/model_520-wqbi_piano/layer_input_*.bin`
Output files:
* `output/results/FN_INPUT/csim_520_output`
"""
hw_mode = 520
module_name = f"kdp{hw_mode}/csim"
self.logger.info(f"run {module_name}")
p_csim_out = pathlib.Path(self.io_nodes[("btm_csim_path", hw_mode)])
p_compiler_output = pathlib.Path(self.path["compiler_piano_{}_out".format(hw_mode)])
p_rel_compiler = futils.relative_path(p_compiler_output, p_csim_out)
cs = {}
for fn_key in ["command_bin", "setup_bin", "weight_bin"]:
p_bin = self.compiler_output[hw_mode][fn_key].name
cs[fn_key] = f"{p_rel_compiler}/{p_bin}"
para_bin = self.config["path"]["binary"]["csim"][520]
p_csim_out.mkdir(mode=0o770, parents=True, exist_ok=True)
p_dynasty_so = pathlib.Path(self.config["path"]["binary"]["dynasty"]["lib.so"])
ENV_DYNASTY_LIB = f"""export LD_LIBRARY_PATH="{p_dynasty_so.parent}:$LD_LIBRARY_PATH" """
if self.is_big_model:
# NOTE: only 1 input for 520. no need for ","?
fn_input_rgba = ",".join([str(a) for a in self.io_nodes[("btm_csim_in_bin", hw_mode)]])
c = f"""{para_bin} -d 0 --thread 1 {cs["command_bin"]} {cs["weight_bin"]} {fn_input_rgba} --setup {cs["setup_bin"]}"""
else:
# NOTE: 520 stc to use sequential.bin.
# NOTE: v016 category will have TWO inputs!!!
fn_input_sqtl = " ".join([str(a) for a in self.io_nodes[("btm_csim_in_bin", hw_mode)]])
c = f"""{para_bin} -d 0 --thread 1 {cs["command_bin"]} {cs["weight_bin"]} -t {fn_input_sqtl}"""
command = f"{ENV_DYNASTY_LIB}; pushd {p_csim_out} > /dev/null && {c} && popd > /dev/null"
self.save_command(module_name, command)
cp = futils.run_bash_script(command, timeout=60*60*6)
self.check_csim_error(cp, hw_mode)
@run_module(module_name="kdp520/btm dyn_csim")
def btm_dyn_csim_520(self):
"""
run bit-true-match check between dynasty / csim fix point results.
Will raise RegressionError if mismatch.
"""
module_name = "kdp520/btm dyn_csim"
self.logger.info(f"check {module_name}")
hw_mode = 520
dir_csim_output = self.io_nodes[("btm_csim_path", hw_mode)]
if self.is_big_model:
# Multiple outputs possible
golden_list = self.io_nodes[("btm_dynasty_golden_txt_path", 520)]
for i in range(len(golden_list)):
fn_csim_out = "{}/node_{:04d}_final_output.txt".format(dir_csim_output, i)
fn_d520_out = golden_list[i]
assert os.path.exists(fn_d520_out), "dynasty 520 output ({}) does not exist!".format(fn_d520_out)
# TODO: use futils.md5sum for bit-true-match? faster?
with open(fn_csim_out, "r") as f_csim, open(fn_d520_out, "r") as f_dyn:
out_csim = [int(a) for a in f_csim]
out_dyna = [int(a) for a in f_dyn]
# do report
cond1 = len(out_csim) == len(out_dyna)
msg1 = "dynasty dump size ({len(out_dyna)}) != csim dump size ({len(out_csim)})"
cond2 = all(a == b for a, b in zip(out_csim, out_dyna))
msg2 = "dynasty-csim mismatch! "
for cond, msg in [(cond1, msg1), (cond2, msg2)]:
if not cond:
self.model_fx_report["btm_520"] = msg
assert cond, msg
else:
self.model_fx_report["kdp520/btm"] = "bit-true-match (520) verified between dynasty and csim."
else:
# single layer. BUG: we assume only one output.
fn_csim_out = "{}/Lastlayer_final_output.txt".format(dir_csim_output)
fn_d520_out = self.io_nodes[("btm_dynasty_golden_txt_path", 520)][0]
assert os.path.exists(fn_d520_out), "dynasty 520 output ({}) does not exist!".format(fn_d520_out)
with open(fn_csim_out, "r") as f_csim, open(fn_d520_out, "r") as f_dyn:
out_csim = [int(a) for a in f_csim]
out_dyna = [int(a) for a in f_dyn]
assert len(out_csim) == len(out_dyna), "dynasty dump size ({}) != csim dump size ({})".format(len(out_dyna), len(out_csim))
assert all(a == b for a, b in zip(out_csim, out_dyna)), "dynasty-csim mismatch! "
try:
if self.config["post_clean_up"]["csim_output"]:
shutil.rmtree(dir_csim_output)
except:
self.logger.error("Failed to delete csim 520 dum folder. {}".format(dir_csim_output))
@run_module(module_name="auto/btm dyn_csim")
def btm_dyn_csim(self, *, hw_mode):
"""
run bit-true-match check between dynasty / csim fix point results.
Will raise RegressionError if mismatch.
NOTE: platform 520 see btm_dyn_csim_520
"""
# detour for 520
if hw_mode == 520:
self.btm_dyn_csim_520()
return
self.logger.info(f"check kdp{hw_mode}/btm_dym_csim")
# dynasty golden
p_d = self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)]
# the quick way.
# suppose all the text files are EXACTLY same, with same futils.md5sum
p_csim_dump = self.io_nodes[("btm_csim_path", hw_mode)]
# compare data from dma2seq. most easy.
p_c = pathlib.Path(p_csim_dump).glob("dma2seq_*.seq")
set_d = set(futils.md5sum(str(a)) for a in p_d)
set_c = set(futils.md5sum(str(a)) for a in p_c)
# DEBUG: if internal regression, mismatch will triger pld report automatically
if self.config["path"]["internal"]:
if set_d != set_c:
try:
self.generate_pld_report(hw_mode)
except Exception as e:
signal("data_sender").send((self.model_id, f"kdp{hw_mode}/pld dump", str(e)))
if set_d != set_c:
# do the report
msg = "mismatched: {}".format(set_d.difference(set_c))
self.model_fx_report[f"kdp{hw_mode}/btm"] = msg
self.module_status[hw_mode]["btm_dyn_csim"] = False
raise RegressionError(f"kdp{hw_mode}/btm dyn_csim", self.model_id, msg=msg)
else:
self.model_fx_report[f"kdp{hw_mode}/btm"] = f"bit-true-match ({hw_mode}) verified between dynasty and csim."
# NOTE: the hard way, for loop to compare
# self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)]
# dma2seq_*.seq
#################################################################################
@run_module(module_name="auto/kneron+")
def run_nef_kneron_plus(self, *, hw_mode, number_try=0):
"""run nef on kneron plus (dongle server).
NEF inference request send to kneron internal server,
which call hardware dongle to do the inference.
Dongle firmware may return either float or fix-point data on different request.
Current format: `BCHW`.
NOTE: the server will RESET dongle then sleep 15s !!!
Input files:
* For 520/720/530/630:
* model_NNN.nef
* For 540/730, dongle:
* model_NNN.kne
* dynasty dumped input bin at `output/results/FN_INPUT/model_NNN-wqbi_piano/layer_input_*.bin`
Output files:
* dongle inferenced results in BCHW, float or fix-point
"""
from nef_utils.dongle_inference import dongle_inference
module_name = f"kdp{hw_mode}/kneron+"
self.logger.info(f"run {module_name}")
dongle_server = self.config["nef"]["dongle_server"]
dir_rgba_list = ["{}".format(rgba_input) for rgba_input in self.io_nodes[("btm_csim_in_bin", hw_mode)]]
s_rgba = " ".join(dir_rgba_list)
dir_nef_model = "{}/models_{}.nef".format(self.path['compiler_piano_{}_out'.format(hw_mode)], hw_mode)
dir_nef_out_list = []
for i in range(number_try):
dir_nef_out_list.append(self.io_nodes[("btm_nef_kneron_plus_path", hw_mode, i)])
dir_nef_out_list[i].mkdir(parents=True, exist_ok=True)
dir_nef_out = str(self.io_nodes[("btm_nef_kneron_plus_path", hw_mode, 0)])[:-2]
if hw_mode == 520:
fn_ioinfo = "{}/ioinfo.csv".format(self.path["compiler_piano_{}_out".format(hw_mode)])
ioinfo = pd.read_csv(fn_ioinfo, header=None)
output_order = []
for i in range(len(ioinfo)):
in_or_out = ioinfo[0][i]
if in_or_out == "o":
output_order.append(str(ioinfo[2][i]).replace("/", "_"))
else:
fn_ioinfo = "{}/ioinfo.json".format(self.path["compiler_piano_{}_out".format(hw_mode)])
with open(fn_ioinfo, "r") as f:
ioinfo = json.load(f)
output_order = []
for output_item in ioinfo["output"]:
output_order.append(output_item["name"].replace("/", "_"))
# save the bash command for debug. regression will actually call python functions
# TODO: why no output folder specified?
dir_nef_script = self.config["path"]["binary"]["nef"]["nef_client.py"]
command = f"python3 {dir_nef_script} -i {s_rgba} -m {dir_nef_model} -p {hw_mode} -mid {self.nef_model_id} -g {dongle_server} -fix"
self.save_command(module_name, command)
# acutally call dongle inference server from python function
try:
fix_output_list, dongle_client_log = dongle_inference(
dir_nef_model,
dir_rgba_list,
model_id=self.nef_model_id,
platform=hw_mode,
group=dongle_server,
inference_times=number_try,
is_fixed_output=True,
output_path=dir_nef_out,
output_order=output_order)
except GeneralError as e:
self.logger.error(e.details)
raise RegressionError(f"kdp{hw_mode}/{e.msg}", self.model_id, msg=e.details)
fn_log = self.path["btm_dump"] / "dongle_client.log"
with open(fn_log, "w") as f:
f.writelines([line + '\n' for line in dongle_client_log])
def generate_pld_report(self, hw_mode, dry_run=True):
"""
Internal process of generating pld report when dynasty/csim mismatch.
Inputs:
- hw_mode: platform (520 not supported)
- dry_run: True to only create scripts. False will actually run them
Steps included:
* re-run dynasty per layer
* re-run csim per layer
* run pld.py to generate pld report
Output files:
* pld report
"""
if hw_mode == 520:
self.logger.error("PLD dump does not support 520")
raise NotImplementedError
module_name = f"kdp{hw_mode}/pld dump"
self.logger.info(f"run {module_name}")
# re-run csim with special config, already generated when run normal csim
list_csim = self.io_nodes[("btm_csim_in_pld", hw_mode)]
d_csim = {i: v for i, v in enumerate(list_csim)}
bin_csim = self.config["path"]["binary"]["csim"][hw_mode]
fn_sh = self.path["dir_output"] / f"run_csim_{hw_mode}_pld.sh"
cmd, cp = csim.run_csim(d_csim, bin_csim, fn_sh, dry_run=dry_run)
# self.check_csim_error(cp, hw_mode)
# re-run dynasty on test_input.txt with dump 2
if self.config["dynasty"]["do_dump"] < 2:
# it maybe 730 or 730-wqbi or ...
_, _, btm_mode, _ = self.get_scaled_onnx_source(hw_mode)
# if dry_run, the dynasty script will be created without running.
self.run_dynasty_inference_btm_dump2(hw_mode=btm_mode, dry_run=dry_run)
# run pld.py for report
p_compiler = pathlib.Path(self.path["compiler_piano_{}_out".format(hw_mode)])
p_dynasty = self.io_nodes[("btm_dynasty_path", hw_mode)]
p_csim = self.io_nodes[("btm_csim_path", hw_mode)]
p_report = self.io_nodes[("pld_report", hw_mode)]
p_report.mkdir(parents=True, exist_ok=True)
bin_pld_report = "python3 {}".format(self.config["path"]["binary"]["pld"]["pld.py"])
command_pld_report = f"{bin_pld_report} {hw_mode} {p_compiler} {p_csim} {p_dynasty} {p_report}"
self.save_command(module_name, command_pld_report)
fn_cmd = self.path["dir_output"] / f"run_pld_report_{hw_mode}.sh"
with open(fn_cmd, "w") as f:
f.write(f"{command_pld_report}\n\n")
if not dry_run:
cp = futils.run_bash_script(command_pld_report, do_echo=False, timeout=60*60*6)
# run generate_pld_report scrip failed, save the .sh file for debug
if cp.returncode != 0:
fn_log = self.path["dir_output"] / f"run_pld_report_{hw_mode}.log"
with open(fn_log, "w") as f:
f.write("\n".join([cp.stdout, cp.stderr]))
if cp.returncode == 111:
msg = cp.stderr
else:
msg = f"Err: {cp.returncode}"
signal("data_sender").send((self.model_id, "kdp{hw_mode}/pld dump", msg))
@run_module(module_name="auto/btm csim_vs_dongle")
def btm_csim_nef(self, *, hw_mode, number_try):
"""csim vs nef, 520/530/720
# NOTE: we suppose NEF will only run on big_model
# if need to run on stc, the csim reference may need to adjust, refer to btm_dyn_csim
"""
try:
module_name = f"kdp{hw_mode}/btm_csim_nef/try{number_try}"
self.logger.info("check {}".format(module_name))
# find all nef inferenced results
p_nef = pathlib.Path(self.io_nodes[("btm_nef_kneron_plus_path", hw_mode, number_try)]).glob("layer_*_fx.txt")
# find all csim inferenced results
if hw_mode != 520:
if self.config["knerex"]["model_out_bitwidth_mode"] in ["int16"]:
# dongle output is 16B
str_search = "dma2seq_*.seq.16B"
else:
# 8B / 15B, can vs dynasty directly
str_search = "dma2seq_*.seq"
else:
str_search = "node_*_final_output.txt"
p_csim = pathlib.Path(self.io_nodes[("btm_csim_path", hw_mode)]).glob(str_search)
# NOTE: does not btm on dynasty here
# p_dynasty = self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)]
# set_dynasty = set(futils.md5sum(str(a)) for a in p_dynasty)
set_nef = set(futils.md5sum(str(a)) for a in p_nef)
set_csim = set(futils.md5sum(str(a)) for a in p_csim)
if set_nef != set_csim:
msg = f"mismatched: {set_nef.difference(set_csim)}"
self.model_fx_report[f"kdp{hw_mode}/btm"] = msg
raise RegressionError(f"kdp{hw_mode}/btm csim_vs_dongle", self.model_id, msg=msg)
except Exception as e:
print_err(e, self.config["regression"]["print_error"])
raise RegressionError(f"kdp{hw_mode}/btm csim_vs_dongle", self.model_id)
@run_module(module_name="auto/btm_dyn_kneron+")
def btm_dyn_nef_kneron_plus(self, *, hw_mode, number_try):
"""dynasty vs nef, 520/530/720
# NOTE: we suppose NEF will only run on big_model
# if need to run on stc, the csim reference may need to adjust, refer to btm_dyn_csim
"""
module_name = f"kdp{hw_mode}/btm dyn_vs_kneron+ ({number_try})"
self.logger.info("check {}".format(module_name))
try:
dir_kneron_plus_output = self.io_nodes[("btm_nef_kneron_plus_path", hw_mode, number_try)]
# Multiple outputs possible
golden_list = self.io_nodes[("btm_dynasty_golden_txt_path", hw_mode)]
for i in range(len(golden_list)):
fn_dyn_out = str(golden_list[i])
assert os.path.exists(fn_dyn_out), "dynasty {} output ({}) does not exist!".format(hw_mode, fn_dyn_out)
fn_kneron_plus = "{}/{}".format(dir_kneron_plus_output, str(golden_list[i]).split("/")[-1])
# TODO: @weijie we can use futils.md5sum for fx results now.
with open(fn_kneron_plus, "r") as f_kneron_plus, open(fn_dyn_out, "r") as f_dyn:
out_kneron_plus = [int(float(a)) for a in f_kneron_plus]
out_dyna = [int(a) for a in f_dyn]
assert len(out_kneron_plus) == len(out_dyna), "dynasty dump size ({}) != kneron plus dump size ({})".format(len(out_dyna), len(out_kneron_plus))
# assert all(a == b for a, b in zip(out_kneron_plus, out_dyna)), "dynasty-kneron plus mismatch! "
assert all(a == b for a, b in zip(out_kneron_plus, out_dyna)), "dynasty-kneron plus mismatch! "
except Exception as e:
print_err(e, self.config["regression"]["print_error"])
raise RegressionError(module_name, self.model_id)
@run_module(module_name="general/combine_snr")
def generate_snr_report(self, base_dump="results"):
"""Generate an overall snr report from per-input-group snr reports.
"""
self.logger.info("generate snr report")
do_pc = self.config["snr"]["per_channel"]
do_plot_pc = self.config["snr"]["plot_snr_per_channel"]
combine_snr("{}/{}".format(self.path["dir_output"], base_dump), do_per_channel=do_pc, do_plot_per_channel=do_plot_pc)
def save_command(self, module_name, command):
self.commands.append((module_name, command))
print_command(command, self.config["regression"]["print_command"])
def generate_bash_script(self):
"""put all bash script called for this model in the flow into a bash script for future debug.
Scripts specified for this model:
- knerex: weight analysis, data analysis ...
- dynasty: multiple inputs, multiple modes ...
Each command are saved to self.commands before been executed.
"""
if not hasattr(self, "commands") or len(self.commands) == 0:
return
with open(self.path["fn_cmd"], "w") as f:
for submodule, command in self.commands:
f.write(f"# {submodule}\n")
f.write(command)
f.write("\n\n")
def pre_clean_up(self, base_dump="results"):
"""delete temp files / outputs before flow actually start."""
try:
flags = self.config["pre_clean_up"]
dir_o = pathlib.Path(self.path["dir_output"])
# self.logger.debug("pre clean up {}/{}".format(self.cat_name, self.model_name))
if flags["all_output"]:
command = f"rm -rf {dir_o}"
cp = futils.run_bash_script(command)
if cp.returncode > 0:
self.logger.warn(f"output folder ({dir_o}) cannot be deleted.")
dir_o.mkdir(mode=0o770, parents=True, exist_ok=True)
return
if flags["knerex_analysis"]:
for fn in dir_o.glob("analysis_*"):
fn.unlink()
if flags["knerex_output"]:
for fn in dir_o.glob("{}*scale*.onnx*".format(self.model_name)):
fn.unlink()
for fn in dir_o.glob("{}*scale*.bie*".format(self.model_name)):
fn.unlink()
if flags["dynasty_output"]:
for fn in dir_o.glob(base_dump):
shutil.rmtree(str(fn), ignore_errors=True)
if flags["compiler_output"]:
for fn in dir_o.glob("compiler_output_*"):
shutil.rmtree(str(fn), ignore_errors=True)
except (KeyError, TypeError):
self.logger.error("pre clean up not configured. skip ...")
def clean_knerex_output(self):
# TODO
raise NotImplementedError
def clean_dynasty_output(self, dir_output_list):
try:
config_clean = self.config["post_clean_up"]["dynasty_output"]
clean_only_success = self.config["post_clean_up"]["clean_when_success"]
is_success = self.module_status["general"]["Success"]
do_clean = config_clean and clean_only_success and is_success
except:
do_clean = False
if do_clean:
# skip in some case
if self.config["path"]["internal"]:
k = "btm_dyn_csim"
for hw_mode, status in self.module_status.items():
if k in status and not status[k]:
pp(f"{k} mismatch! skip post-clean dynasty output.") # noqa
return
for dir_o in dir_output_list:
p_o = pathlib.Path(dir_o)
if not p_o.exists():
continue
for dir_dumps in p_o.glob("mode_*"):
shutil.rmtree(str(dir_dumps))