611 lines
21 KiB
Python
611 lines
21 KiB
Python
#! /usr/bin/env python3
|
|
|
|
import os
|
|
import pathlib
|
|
import struct
|
|
import tempfile
|
|
|
|
import pickle
|
|
from collections import OrderedDict, defaultdict
|
|
|
|
import numpy as np
|
|
|
|
# 4 is best in test, reduce to 1/3 time.
|
|
os.environ["OMP_NUM_THREADS"] = "4"
|
|
|
|
from sys_flow_v2.kneron_round import kneron_round_array
|
|
import sys_flow_v2.flow_constants as fconsts
|
|
import sys_flow_v2.flow_utils as futils
|
|
import sys_flow_v2.compiler_v2 as compiler
|
|
|
|
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
|
import snoop
|
|
|
|
snoop.install(enabled=DEBUG)
|
|
|
|
|
|
###################################################################################
|
|
# setup binary
|
|
###################################################################################
|
|
|
|
p_dynasty_so = pathlib.Path(fconsts.BIN_SET["dynasty"]["lib.so"])
|
|
ENV_DYNASTY_LIB = f"""export LD_LIBRARY_PATH="{p_dynasty_so.parent}:$LD_LIBRARY_PATH" """
|
|
BIN_DC = fconsts.BIN_SET["data_converter"]["v2"]
|
|
|
|
|
|
###################################################################################
|
|
# convert input to bin file
|
|
###################################################################################
|
|
def array_fl2fx(np_arr, bitwidth, radix, scales=None, i_dim_is_ch=1, round_process="array"):
|
|
"""Convert (1 input) numpy float to integer.
|
|
|
|
.. parameters::
|
|
- round_process (from 0.27.0):
|
|
- "array": send in flattened array, loop in cython with kneron_round. 0.54s for 12M points.
|
|
- "no_btm": use np to do int32 convert. DONT use this. not for bit-true-match. for speed check only. 0.082 for 12M points. (0.05 if no convert)
|
|
|
|
.. seealso::
|
|
- `dynasty clamp <https://gitlab.kneron.tw/SYS/dynasty_fx/-/blob/master/include/common/FixUtils.h#L317>`_
|
|
- `dynasty quantization <https://gitlab.kneron.tw/SYS/dynasty_fx/-/blob/master/include/common/FixUtils.h#L763>`_
|
|
- `kneron round <https://gitlab.kneron.tw/SYS/dynasty_fx/-/blob/master/src/dsp/common_utils.c#L414>`_
|
|
|
|
15bit input: https://redmine.kneron.tw/issues/18689
|
|
"""
|
|
# TODO: apply per-channel-scale to this_np with scales + i_dim_is_ch
|
|
# size should match
|
|
|
|
# get per-channel radix
|
|
n_dim = np_arr.shape[i_dim_is_ch]
|
|
radix = futils.expand_array(radix, n_dim).astype('float32')
|
|
|
|
if scales is None:
|
|
scales = 1.0
|
|
scales = futils.expand_array(scales, n_dim)
|
|
# radix / scales must be list of n_dim size
|
|
|
|
# NOTE: use 16B16R for bitwidth == 15 to comply with firmware
|
|
# https://redmine.kneron.tw/issues/18689
|
|
if bitwidth == 15:
|
|
bitwidth = 15 + 1
|
|
radix = radix + 1
|
|
|
|
# clamp boundaries
|
|
v_max = (1<<(bitwidth-1))-1
|
|
v_min = -(1<<(bitwidth-1))
|
|
|
|
# expand per-channel value to matrix
|
|
scale_expand_dim = list(range(len(np_arr.shape)))
|
|
scale_expand_dim.remove(i_dim_is_ch)
|
|
scales *= 2**radix # in-place
|
|
scl_matx = np.expand_dims(scales, scale_expand_dim)
|
|
|
|
np_arr *= scl_matx # scale in-place
|
|
np.clip(np_arr, v_min, v_max, out=np_arr) # clip in-place
|
|
|
|
if round_process == "array":
|
|
# this should be fast
|
|
return kneron_round_array(np_arr.ravel(), bitwidth)
|
|
|
|
elif round_process == "no_btm":
|
|
# this is fastest! NOT calling kneron_round
|
|
return np_arr.flatten().astype(np.int32)
|
|
|
|
|
|
def bw2fmt(bitwidth):
|
|
"""Get format for this bitwidth."""
|
|
# NOTE: use struct: https://docs.python.org/3/library/struct.html
|
|
if bitwidth <= 8:
|
|
return "b" # 8bit
|
|
elif bitwidth <= 16:
|
|
return "h" # 16bit
|
|
elif bitwidth <= 32:
|
|
return "i" # 32bit
|
|
else:
|
|
return "q"
|
|
|
|
|
|
def fx2bin(arr_fx, fn_bin, bitwidth):
|
|
"""Write fx data to binary in sequential."""
|
|
# TODELETE
|
|
# # NOTE: use struct: https://docs.python.org/3/library/struct.html
|
|
# fmt = bw2fmt(bitwidth)
|
|
# with open(fn_bin, "wb") as f:
|
|
# f.write(struct.pack(f"<{len(arr_fx)}{fmt}", *arr_fx))
|
|
dtype_map = {8: np.int8, 16: np.int16, 32: np.int32}
|
|
try:
|
|
dtype = dtype_map[bitwidth]
|
|
except KeyError:
|
|
raise ValueError(f"Unsupported bitwidth: {bitwidth}")
|
|
|
|
# must be C style that memory continues
|
|
arr_fx = np.asarray(arr_fx, dtype=dtype, order="C")
|
|
arr_fx.tofile(fn_bin)
|
|
|
|
|
|
def idx2onnx_order(info_in, n_sqt_bin):
|
|
"""Guess onnx input name order."""
|
|
# NOTE: each pair of sqt_bin is same length as onnx input node, say 2: A,B
|
|
# the input number in ioinfo.json, say 5, A1, A2, B1, B2, B3.
|
|
# compiler guarantee that A-related nodes go first, then B-related nodes.
|
|
compiler_give_names = [a["name"] for a in info_in]
|
|
if len(compiler_give_names) == n_sqt_bin:
|
|
# most of cases, just 1:1
|
|
return {name:i for i, name in enumerate(compiler_give_names)}, {i:name for i, name in enumerate(compiler_give_names)}
|
|
|
|
# now each sequential.bin may convert to multiple dram.bin
|
|
# TODO: verify compiler_give_names should be continues (same name stays together)
|
|
name_order = {}
|
|
o_order2dp = {}
|
|
for name in compiler_give_names:
|
|
if name not in name_order:
|
|
n = len(name_order)
|
|
name_order[name] = n
|
|
o_order2dp[n] = name
|
|
assert len(name_order) == n_sqt_bin
|
|
|
|
return name_order, o_order2dp
|
|
|
|
|
|
def txt2bin_seq(in_pairs, ioinfo_in, p_out):
|
|
"""Convert fl text to sequential bin file.
|
|
|
|
For compability of regression.
|
|
"""
|
|
# load float txt into np
|
|
np_in = defaultdict(list)
|
|
# NOTE:
|
|
# in_pair(s) is specified by onnx, it may be [a.txt, b.txt, c.txt]
|
|
# compiler may specify dp_in to be [A, A, B, B, C].
|
|
# same node may appear multiple times
|
|
# TODO: we assume same input node always have same quantization
|
|
_, o_order2dp = idx2onnx_order(ioinfo_in, len(in_pairs[0]))
|
|
for i_pair, in_pair in enumerate(in_pairs):
|
|
# independant between pairs
|
|
for i_txt, p_txt in enumerate(in_pair):
|
|
dp_name = o_order2dp[i_txt]
|
|
# use the first shape info given by compiler
|
|
shape = [a["onnx_shape"] for i, a in enumerate(ioinfo_in) if a["name"] == dp_name][0]
|
|
# load txt into numpy array
|
|
np_in[dp_name].append(futils.txt2np_fl(p_txt, shape))
|
|
|
|
bin_pair = np2bin_seq(np_in, ioinfo_in, p_out)
|
|
# there should be only 1 element
|
|
return bin_pair
|
|
|
|
|
|
def txt2bin_rgba(in_pairs: list, info_in: list, p_out):
|
|
"""Convert fl text to rgba bin file for 520.
|
|
|
|
The info_in is from knerex json.
|
|
|
|
Args:
|
|
in_pairs (list): list of list, inside list is a pair of input.
|
|
info_in (list): quantization info per input node.
|
|
p_out (pathlib or str): where to save converted.bin files.
|
|
|
|
Special process for 520 csim input:
|
|
1. minimal 4 channels.
|
|
2. columns (w) should be multiple of 16.
|
|
3. channel last.
|
|
"""
|
|
# quick check on inputs.
|
|
l1 = len(in_pairs[0])
|
|
l2 = len(info_in)
|
|
assert l1 == l2, f"in_pairs len: {l1}, info_in len: {l2}"
|
|
|
|
# Load float txt into np
|
|
bin_pair = [[] for a in in_pairs[0]]
|
|
for i_pair, in_pair in enumerate(in_pairs):
|
|
for i_dp, p_txt in enumerate(in_pair):
|
|
# NOTE: 520 only support single input models (except stc.)
|
|
# so i_dp is always 0
|
|
shape = info_in[i_dp]["onnx_shape"]
|
|
bw = info_in[i_dp]["bitw"]
|
|
radix = info_in[i_dp]["radix"]
|
|
scale = info_in[i_dp]["scale"]
|
|
|
|
# padding to >= 4 dimension
|
|
|
|
if isinstance(p_txt, (str, pathlib.Path)):
|
|
fl = futils.txt2np_fl(p_txt, shape)
|
|
elif isinstance(p_txt, np.ndarray):
|
|
fl = p_txt
|
|
else:
|
|
raise NotImplementedError()
|
|
n_dim = len(fl.shape)
|
|
if n_dim < 4:
|
|
fl = np.expand_dims(fl, list(range(4-n_dim)))
|
|
n_dim = len(fl.shape)
|
|
# assert n_dim == 4
|
|
# NOTE: will fail when n_dim > 4
|
|
|
|
# channel last
|
|
fl = np.moveaxis(fl, 1, 3)
|
|
|
|
# pad channel to 4, columns to 16x
|
|
d_padding = [[0, 0] for i in range(n_dim)]
|
|
do_pad = False
|
|
if fl.shape[-1] < 4:
|
|
# padding channel to 4
|
|
n_pad = 4 - fl.shape[-1]
|
|
d_padding[-1][-1] = n_pad
|
|
arr_pad = np.zeros(n_pad) # zero is fine.
|
|
radix = np.append(radix, arr_pad)
|
|
scale = np.append(scale, arr_pad)
|
|
do_pad = True
|
|
if fl.shape[-2] % 16 != 0:
|
|
# padding columns
|
|
d_padding[-2][-1] = 16 - fl.shape[-2] % 16
|
|
do_pad = True
|
|
if do_pad:
|
|
fl = np.pad(fl, d_padding, mode="constant", constant_values=0)
|
|
|
|
# quantization. for the 1st and only one input
|
|
np_fx = array_fl2fx(fl, bw, radix, scale, n_dim-1)
|
|
|
|
# save to bin
|
|
p_bin = p_out / f"csim_p{i_pair:06}_i{i_pair:03}_rgba.bin"
|
|
fx2bin(np_fx, p_bin, bw)
|
|
bin_pair[i_dp].append(p_bin)
|
|
|
|
# bin_pair is [[in1_pair1, in1_pair2], [in2_pair1, in2_pair2]]
|
|
return bin_pair
|
|
|
|
|
|
def np2bin_seq(np_in, ioinfo_in, p_out=None, round_process="array"):
|
|
"""Convert numpy array to sequential bin file."""
|
|
# TODO: internal process assume np_in as list of list
|
|
# load np_in if not yet
|
|
if type(np_in) in [str, pathlib.PosixPath]:
|
|
with open(np_in, "rb") as f:
|
|
np_in = pickle.load(f)
|
|
|
|
# NOTE: ioinfo_in may have duplicated dp names: [A, A, B, B, C]
|
|
# we assume same DP have SAME quantization.
|
|
# use ioinfo_uni for now
|
|
def keep_unique(l):
|
|
uni = set()
|
|
results = []
|
|
|
|
for i, a in enumerate(l):
|
|
if a["name"] not in uni:
|
|
uni.add(a["name"])
|
|
results.append(a)
|
|
return results
|
|
|
|
ioinfo_uni = keep_unique(ioinfo_in)
|
|
|
|
# unpack data
|
|
dp_in = [a["name"] for a in ioinfo_uni]
|
|
bitwidth = [a["bitw"] for a in ioinfo_uni]
|
|
radix = [a["radix"] for a in ioinfo_uni]
|
|
scales = [a["scale"] for a in ioinfo_uni]
|
|
i_dim_is_ch = [a["ch_dim"] for a in ioinfo_uni]
|
|
# shape not used. maybe to check with np_in array shape?
|
|
|
|
# sanity check
|
|
assert set(np_in.keys()) == set(
|
|
dp_in
|
|
), f"dp_in \"{dp_in}\" should be same as np_in \"{list(np_in.keys())}\""
|
|
|
|
temp = [len(v) for k, v in np_in.items()]
|
|
assert len(set(temp)) == 1, f"np list should have same length, but got {temp}"
|
|
N_pairs = temp[0]
|
|
|
|
if p_out is None:
|
|
p_out = tempfile.mkdtemp(prefix="csim_")
|
|
p_out = pathlib.Path(p_out)
|
|
p_out.mkdir(exist_ok=True, parents=True)
|
|
|
|
# TODO: save the radix/ scale quantization info
|
|
|
|
# np_in_pairs = [[np_in[k1][n] for k1 in dp_in] for n in range(N_pairs)]
|
|
bin_in_pairs = OrderedDict()
|
|
for i_pair in range(N_pairs):
|
|
pair = []
|
|
for i_dp, dp_name in enumerate(dp_in):
|
|
p_bin = p_out / f"seq_p{i_pair:06}_node{i_dp:03}.bin"
|
|
pair.append(p_bin)
|
|
this_np = np_in[dp_name][i_pair]
|
|
|
|
## channel dimension is not in onnx shape, need to expand
|
|
if i_dim_is_ch[i_dp] == -1:
|
|
this_np = np.expand_dims(this_np, 1)
|
|
# i_dim_is_ch[i_dp] = 0
|
|
|
|
# the real work done here
|
|
np_fx = array_fl2fx(this_np,
|
|
bitwidth[i_dp],
|
|
radix[i_dp],
|
|
scales[i_dp],
|
|
i_dim_is_ch[i_dp],
|
|
round_process=round_process)
|
|
fx2bin(np_fx, p_bin, bitwidth[i_dp])
|
|
|
|
# NOTE: the pair_name include input pair order.
|
|
# will use this to return inferenced results
|
|
bin_in_pairs[i_pair] = pair
|
|
|
|
return bin_in_pairs
|
|
|
|
|
|
def data_convert_1(bin_seq, bin_rgba,
|
|
fmt_in: str, fmt_out: str,
|
|
stride_in: list, stride_out: list,
|
|
shape: list, ndim: int):
|
|
"""Convert one sequential.bin to rgba.bin."""
|
|
# TODO: what if fmt_out is RAW_FLOAT / RAW_8 / RAW_16?
|
|
# seems dynasty dump it previously
|
|
s_shape = ",".join(str(a) for a in shape)
|
|
s_stride_in = ",".join(str(a) for a in stride_in)
|
|
s_stride_out = ",".join(str(a) for a in stride_out)
|
|
# always use --nhwc 1 for ng1 730
|
|
# --is_ng1 is same as --nhwc
|
|
cmd = f"""{BIN_DC} --fin {bin_seq} --fout {bin_rgba} --dim {ndim} --shape {s_shape} --fmt_in {fmt_in} --fmt_out {fmt_out} --stride_in {s_stride_in} --stride_out {s_stride_out} --is_ng1 1"""
|
|
|
|
return cmd
|
|
|
|
|
|
def data_convert(sqt_bin_list: dict, ioinfo_in: list, p_out=None, n_thread=4):
|
|
"""Convert list of sequential.bin to csim_input.bin, based on compiler info.
|
|
|
|
* NOT for 520
|
|
"""
|
|
# prepare for output folder
|
|
if p_out is None:
|
|
p_out = tempfile.mkdtemp(prefix="dc_")
|
|
p_out = pathlib.Path(p_out)
|
|
p_out.mkdir(exist_ok=True, parents=True)
|
|
|
|
def bw2raw_fmt(bw):
|
|
# the in.bin in regression is alwasy raw format.
|
|
if bw == 8:
|
|
return "FMT_RAW8B"
|
|
elif bw in [15, 16]:
|
|
# https://redmine.kneron.tw/issues/18706
|
|
return "FMT_RAW_NPU16B"
|
|
else:
|
|
raise NotImplementedError(f"unsupported bitwidth: {bw}.")
|
|
|
|
def convert_inproc_fmt(fmt1):
|
|
"""For inproc format, need to convert before send to data converter.
|
|
|
|
Reference:
|
|
- https://redmine.kneron.tw/issues/23754
|
|
- compiler_v2/get_support_formats() function.
|
|
"""
|
|
if fmt1 == "HW1C8B":
|
|
return "RAW8B"
|
|
elif fmt1 == "HW4C8B_DROP_A":
|
|
return "4C8B_DROP_A"
|
|
elif fmt1 == "HW4C8B_KEEP_A":
|
|
return "4C8B_DROP_A"
|
|
elif fmt1 == "HW1C16B_LE":
|
|
return "RAW16B"
|
|
elif fmt1 == "HW1C16B_BE":
|
|
return "RAW16B_BE"
|
|
else:
|
|
return fmt1
|
|
|
|
# ordered dictionary
|
|
n_sqt_bin = len(sqt_bin_list[0])
|
|
name2o_ord, _ = idx2onnx_order(ioinfo_in, n_sqt_bin)
|
|
|
|
cmds = [] # save the commands for debug
|
|
list_bin_csim = defaultdict(list)
|
|
for i_in, i_info in enumerate(ioinfo_in):
|
|
shape = i_info["shape"]
|
|
ndim = len(shape)
|
|
# stride in / out are same, per Tommy.
|
|
# stride_in = ioinfo_in[i_in]["stride"]
|
|
# stride_out = ioinfo_in[i_in]["stride"]
|
|
# TEMP: use -1 for now.
|
|
stride_in = [-1]
|
|
# stride_aligned in .kne.no_binary.json but will convert to stride
|
|
stride_out = i_info["stride"]
|
|
|
|
fmt_in = bw2raw_fmt(i_info["bitw"])
|
|
fmt_out = "FMT_{}".format(convert_inproc_fmt(i_info["data_format"]))
|
|
sqt_idx = name2o_ord[i_info["name"]]
|
|
for i_pair, bin_pair in sqt_bin_list.items():
|
|
p_sqtl_bin = bin_pair[sqt_idx]
|
|
p_csim_bin = p_out / f"csim_p{i_pair:06}_i{i_in:03}.bin"
|
|
list_bin_csim[i_pair].append(p_csim_bin)
|
|
cmd1 = data_convert_1(p_sqtl_bin, p_csim_bin,
|
|
fmt_in, fmt_out, stride_in, stride_out,
|
|
shape, ndim)
|
|
cmds.append(cmd1)
|
|
|
|
# save cmds then run
|
|
fn_cmd = p_out / "data_convert.sh"
|
|
with open(fn_cmd, "w") as f:
|
|
f.writelines([f"{cmd}\n" for cmd in cmds])
|
|
|
|
if n_thread is None:
|
|
n_str = ""
|
|
else:
|
|
n_str = f"--jobs {n_thread}"
|
|
command = f"parallel {n_str} --halt now,fail=1 < {fn_cmd}"
|
|
|
|
cp = futils.run_bash_script(command)
|
|
if cp.returncode != 0:
|
|
raise RuntimeError(f"data converter failed with {cp.returncode}")
|
|
|
|
return list_bin_csim, cmds
|
|
|
|
|
|
###################################################################################
|
|
# do real work here
|
|
###################################################################################
|
|
def gen_csim_settings(bin_pair: list,
|
|
cmpl_map,
|
|
p_cmpl_rel,
|
|
hw_mode,
|
|
dump_core_opt=0,
|
|
golden_txt=None):
|
|
"""Generate csim settings.
|
|
|
|
golden_txt should be relative path.
|
|
"""
|
|
csim_settings = {}
|
|
|
|
# convert to relative path
|
|
for fn_key, fn_full in cmpl_map.items():
|
|
csim_settings[fn_key] = p_cmpl_rel / fn_full.name
|
|
|
|
# fill none for unused
|
|
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:
|
|
csim_settings["command_bin"] = "None"
|
|
csim_settings["weight_bin"] = "None"
|
|
csim_settings["setup_bin"] = "None"
|
|
csim_settings["apb_npu"] = "None"
|
|
# in regression / unpack_nef, the kne will have have only 1 model. (it is possible to have multiple)
|
|
# csim_settings["model_index_in_kne"] = 0
|
|
else:
|
|
csim_settings["kne"] = "None"
|
|
|
|
# TODO: make this flow_constants
|
|
# input_location: 0 for nmem, 1 for dram
|
|
csim_settings["input_location"] = 1 if hw_mode in fconsts.MODE_HW_LIMIT["input_in_dram"] else 0
|
|
|
|
csim_settings["input_bin"] = ",".join(str(a) for a in bin_pair)
|
|
csim_settings["dump_core_opt"] = dump_core_opt
|
|
|
|
# NOTE: if golden_text available, csim own regression may use it for quick check
|
|
if golden_txt is None:
|
|
csim_settings["golden_txt"] = "NONE"
|
|
else:
|
|
# convert relative path
|
|
csim_settings["golden_txt"] = ",".join(str(a) for a in golden_txt)
|
|
|
|
# prepare dynasty golden
|
|
# golden_txt_fns = self.io_nodes[("btm_dynasty_golden_txt_fn", hw_mode)]
|
|
# p_dynasty_golden = [p_dynasty_dump / fn for fn in golden_txt_fns]
|
|
|
|
# # csim use relative path to output folder
|
|
# rel_fns = ["{}/{}".format(p_dynasty_dump_relative, fn) for fn in golden_txt_fns]
|
|
# csim_settings["golden_txt"] = ",".join(rel_fns)
|
|
|
|
return csim_settings
|
|
|
|
|
|
def gen_csim_ini(bin_pair,
|
|
p_compiler,
|
|
hw_mode,
|
|
template,
|
|
fn_ini,
|
|
dump_core_opt=0,
|
|
golden_txts=None):
|
|
"""Generate a ini for csim calling per input pair."""
|
|
if hw_mode == 520:
|
|
# 520 csim doesn't use .ini it's just for regression flow usage
|
|
return
|
|
|
|
# check folder
|
|
p_in = bin_pair[0].parent
|
|
# relative to the ini file.
|
|
p_ini = fn_ini.parent
|
|
|
|
p_in_rel = futils.relative_path(p_in, p_ini)
|
|
bin_pair_rel = [p_in_rel / a.name for a in bin_pair]
|
|
|
|
p_comp_rel = futils.relative_path(p_compiler, p_ini)
|
|
|
|
compiler_map = compiler.locate_compiler_dump(p_compiler, hw_mode)
|
|
|
|
if golden_txts is None:
|
|
p_golden_txt = None
|
|
else:
|
|
p_golden = golden_txts[0].parent
|
|
p_golden_rel = futils.relative_path(p_golden, p_ini)
|
|
p_golden_txt = [p_golden_rel / a.name for a in golden_txts]
|
|
|
|
csim_settings = gen_csim_settings(bin_pair_rel,
|
|
compiler_map,
|
|
p_comp_rel,
|
|
hw_mode,
|
|
dump_core_opt=dump_core_opt,
|
|
golden_txt=p_golden_txt)
|
|
|
|
# render template and save to run_csim.ini
|
|
output = template.render(model=csim_settings)
|
|
with open(fn_ini, "w") as f:
|
|
f.write(output)
|
|
assert pathlib.Path(fn_ini).exists(), f"failed to create {fn_ini}"
|
|
|
|
return fn_ini
|
|
|
|
|
|
def run_csim(list_csim: dict, bin_csim, sh_run_csim=None, n_thread=None, dry_run=False, timeout=3600*6):
|
|
"""Run csim inference on given input.
|
|
|
|
NOTE: we need bin_csim to pass in as it is platform-dependant
|
|
so we cannot grab from fconsts.BIN_SET directly without platform info.
|
|
"""
|
|
cmds = []
|
|
|
|
for i_csim, (p_out, ini_csim) in list_csim.items():
|
|
# prepare folders
|
|
p_out.mkdir(mode=0o770, parents=True, exist_ok=True)
|
|
|
|
cmd1 = f"{ENV_DYNASTY_LIB}; pushd {p_out} && {bin_csim} {ini_csim}"
|
|
cmds.append(cmd1)
|
|
|
|
if sh_run_csim is None:
|
|
r = np.random.randint(10000)
|
|
sh_run_csim = f"/tmp/run_csim_{r:05}.sh"
|
|
with open(sh_run_csim, "w") as f:
|
|
f.write("\n".join(cmds))
|
|
|
|
if n_thread is None:
|
|
n_str = ""
|
|
else:
|
|
n_str = f"--jobs {n_thread}"
|
|
command = f"parallel {n_str} --halt now,fail=1 < {sh_run_csim}"
|
|
|
|
if dry_run:
|
|
cp = None # placeholder
|
|
else:
|
|
cp = futils.run_bash_script(command, timeout=timeout)
|
|
|
|
return command, cp
|
|
|
|
|
|
def txt2np(out_node_list, out_nodes_shape, output_list, is_520=False):
|
|
"""Convert the csim dumped results to np.
|
|
|
|
- csim will only dump fx data. need extra info to convert back to float.
|
|
- csim dump files as `dma2seq_0.seq`
|
|
|
|
Args:
|
|
out_node_list (list): list of output node names.
|
|
As csim dump multiple output in same order,
|
|
this list will be used as key for output dict.
|
|
out_nodes_shape (list): list of shapes (for all output nodes).
|
|
The integer text will be reshaped to given shape in numpy.
|
|
output_list: where to find csim dump text files.
|
|
basically `list(p_dump.glob(f"*.txt/csim_{hw_mode}"))`.
|
|
is_520 (bool): different name pattern for 520 csim dump.
|
|
|
|
Returns:
|
|
dict of list of numpy array.
|
|
Each array is integer datatype and with onnx shape.
|
|
"""
|
|
collect_txt_fx = defaultdict(list)
|
|
for i_dp, dp_out in enumerate(out_node_list):
|
|
shape_out = out_nodes_shape[dp_out]
|
|
|
|
if is_520:
|
|
fx_output = f"node_{i_dp:04d}_final_output.txt"
|
|
else:
|
|
# NOTE: .seq is only 8/15bit, no 16bit
|
|
fx_output = f"dma2seq_{i_dp}.seq"
|
|
|
|
for p_dump in output_list:
|
|
p_fx = pathlib.Path(p_dump) / fx_output
|
|
if not p_fx.exists():
|
|
raise FileExistsError(f"missing {p_fx}")
|
|
collect_txt_fx[dp_out].append(futils.txt2np_fx(p_fx, shape_out))
|
|
|
|
return collect_txt_fx
|