862 lines
31 KiB
Python
862 lines
31 KiB
Python
#! /usr/bin/env python3
|
|
|
|
|
|
import pathlib
|
|
from collections import OrderedDict
|
|
import os
|
|
import tempfile
|
|
import shutil
|
|
|
|
import pytz
|
|
|
|
from jinja2 import Environment, FileSystemLoader
|
|
|
|
import sys_flow_v2.flow_utils as futils
|
|
import sys_flow_v2.flow_constants as fconsts
|
|
import sys_flow_v2.dynasty_v3 as dynasty
|
|
import sys_flow_v2.csim_utils as csim
|
|
|
|
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
|
import snoop
|
|
snoop.install(enabled=DEBUG)
|
|
|
|
# find the binaries first
|
|
dynasty_bin = fconsts.BIN_SET["dynasty"]["binary"]
|
|
dynasty_so = fconsts.BIN_SET["dynasty"]["lib.so"]
|
|
dynasty_cuda_so = fconsts.BIN_SET["dynasty"]["lib_cuda.so"]
|
|
bin_dc = fconsts.BIN_SET["data_converter"]["v2"]
|
|
|
|
timezone = pytz.timezone("America/Los_Angeles")
|
|
|
|
P_TMP_MODEL = pathlib.Path("/tmp/working")
|
|
|
|
|
|
def inference_onnx_runtime(
|
|
fn_onnx,
|
|
input_np,
|
|
device="gpu",
|
|
p_working="/tmp",
|
|
shape_in="onnx_shape",
|
|
dump_level=0
|
|
):
|
|
raise NotImplementedError()
|
|
|
|
|
|
def get_model_io(p_onnx, hw_mode=None):
|
|
"""Interface to get ioinfo from onnx/bie.
|
|
|
|
dynasty / csim flatten data to one dimension then dump to text.
|
|
Input node name list and output node (onnx) shape,
|
|
are needed when convert flattened data back to onnx shape.
|
|
|
|
Args:
|
|
p_onnx
|
|
onnx or bie file to examine.
|
|
hw_mode: int
|
|
520/720/...
|
|
If given p_onnx is a bie file, this is needed.
|
|
Returns:
|
|
- list of `input_nodes`
|
|
- list of `output_nodes`
|
|
- shape of output nodes
|
|
- dict of ioinfo json files per platform.
|
|
|
|
TODO:
|
|
move to flow_utils.
|
|
|
|
NOTE:
|
|
ioinfo include radix info, is needed by e2e.
|
|
"""
|
|
# make sure it is pathlib
|
|
p_onnx = pathlib.Path(p_onnx)
|
|
# get input_nodes names
|
|
if p_onnx.name.endswith(".onnx"):
|
|
input_nodes, output_nodes, out_node_shape, _ = futils.get_ioinfo_from_onnx(p_onnx)
|
|
elif p_onnx.name.endswith(".bie"):
|
|
assert not (hw_mode is None), "get_model_io: given bie file, hw_mode is needed."
|
|
input_nodes, output_nodes, out_node_shape, _ = futils.get_ioinfo_from_bie(
|
|
p_onnx, hw_mode, dynasty_bin)
|
|
# dynasty dump float results directly, no need to convert fx to fl
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
# 0.24.0 ioinfo is needed for dynasty.
|
|
# keep it (d_ioinfo) here for back-compatible
|
|
d_ioinfo = {}
|
|
|
|
return input_nodes, output_nodes, out_node_shape, d_ioinfo
|
|
|
|
|
|
def get_model_type(p_onnx):
|
|
"""Get model type from name.
|
|
|
|
Why not put in gen_dynasty_mode_settings?
|
|
Because it is complicated to support many regression debug purpose.
|
|
"""
|
|
if p_onnx.name.endswith(".onnx"):
|
|
return "piano_onnx"
|
|
elif p_onnx.name.endswith(".bie"):
|
|
return "piano_bie"
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
|
|
def inference_dynasty_fl_so(
|
|
fn_onnx,
|
|
input_np,
|
|
input_nodes=None, # list of input nodes, to know the order of input nodes
|
|
out_node_shape=None, # need this to convert dynasty_fl dump to onnx shape
|
|
device="dynasty_cpu",
|
|
p_working=None,
|
|
shape_in="onnx_shape",
|
|
dump_level=0
|
|
):
|
|
"""
|
|
Run inference on given model (fn_onnx) with data `input_np`.
|
|
|
|
Will run `dynasty float` mode on this model with given input().
|
|
Currently call dynasty bin for the inference.
|
|
Maybe: use dynasty.so for inference.
|
|
|
|
Args:
|
|
fn_onnx (pathlib / str): path to origin.onnx.
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
input_nodes (list): a list of input node names.
|
|
This indicate the order of input nodes so the numpy array in dict
|
|
can be passed to dynasty binary in correct order.
|
|
If not set, this function will call `get_model_io` to
|
|
get `input_nodes` and `out_node_shape`.
|
|
out_node_shape (list): a list of shape for out nodes.
|
|
This is necessary to convert flattened data to onnx shape.
|
|
device (str): choose which method for inference.
|
|
|
|
- `dynasty_cpu` (default), using kneron dynasty binary.
|
|
included in kneron/toolchain.
|
|
- `dynasty_gpu`, using kneron dynasty binary.
|
|
included in kneron/toolchain. Need to run in toolchain with cuda support.
|
|
- `ort_gpu`, using onnx runtime with GPU. (not ready).
|
|
Use this for accelleration but only appliable when GPU available.
|
|
- `ort_cpu`, using onnx runtime with CPU. (not ready).
|
|
some kneron customized nodes not supported?
|
|
|
|
p_working (pathlib / str): where to put cache file.
|
|
The user need to clean this folder to release disk space.
|
|
e2e will give different for each image.
|
|
shape_in (str): choose from `onnx_shape` (default) / `channel_last` (will be obsoleted).
|
|
dump_level:
|
|
|
|
- 0 (default): dump inference results for only output nodes.
|
|
- 1: dump inference results for output nodes + cpu nodes.
|
|
- 2: dump inference results for all nodes.
|
|
|
|
Returns:
|
|
dict of list of numpy array as inference results. Each numpy array is same shape as onnx specified.
|
|
"""
|
|
# prepare folder and files
|
|
if p_working is None:
|
|
p_working = tempfile.mkdtemp("dyn_fl_")
|
|
p_working = pathlib.Path(p_working)
|
|
p_onnx = pathlib.Path(fn_onnx)
|
|
assert p_onnx.exists(), f"{fn_onnx} does not exist!"
|
|
|
|
assert device in ["dynasty_cpu", "dynasty_gpu", "ort_cpu", "ort_gpu"], \
|
|
f"device should be dynasty/dynasty_gpu/ort_cpu/ort_gpu, but given {device}"
|
|
|
|
model_name = futils.clean_name(p_onnx.name)
|
|
model_id = f"tc/{model_name}"
|
|
|
|
# setup for inference.
|
|
np_id = futils.gen_random_string(8)
|
|
# will run only float mode on given onnx
|
|
# NOTE: we assume input does not change!
|
|
p_input = p_working / f"model_input_{np_id}"
|
|
# NOTE: each inference will have different results folder.
|
|
p_output = p_working / f"res_{np_id}"
|
|
|
|
# prepare text input
|
|
if input_nodes is None or out_node_shape is None:
|
|
# e2e need to call this if want to run infenrece one by one
|
|
input_nodes, _, out_node_shape, _ = get_model_io(p_onnx)
|
|
ch_last = shape_in == "channel_last"
|
|
_, grouped_input_list, _ = futils.npy2txt(input_np, input_nodes, p_input,
|
|
exists_then_skip=True,
|
|
compression="txt", ch_last=ch_last)
|
|
|
|
if "gpu" in device:
|
|
lib = dynasty_cuda_so
|
|
else:
|
|
lib = dynasty_so
|
|
|
|
if "ort" in device:
|
|
ort = True
|
|
else:
|
|
ort = False
|
|
|
|
# NOTE: use grouped_input_list[0] since E2E does one input at a time
|
|
dynasty.run_dynasty_so(lib,
|
|
fn_onnx,
|
|
len(input_nodes),
|
|
grouped_input_list[0],
|
|
input_nodes,
|
|
str(p_working),
|
|
ort)
|
|
|
|
# convert dynasty dump text to np
|
|
np_out = dynasty.txt2np_so(out_node_shape, p_working)
|
|
|
|
return np_out
|
|
|
|
|
|
def inference_dynasty_fl(
|
|
fn_onnx,
|
|
input_np,
|
|
input_nodes=None, # list of input nodes, to know the order of input nodes
|
|
out_node_shape=None, # need this to convert dynasty_fl dump to onnx shape
|
|
device="dynasty",
|
|
p_working=None,
|
|
shape_in="onnx_shape",
|
|
timeout=60*60*6,
|
|
dump_level=0
|
|
):
|
|
"""
|
|
Run inference on given model (fn_onnx) with data `input_np`.
|
|
|
|
Will run `dynasty float` mode on this model with given input().
|
|
Currently call dynasty bin for the inference.
|
|
Maybe: use dynasty.so for inference.
|
|
|
|
Args:
|
|
fn_onnx (pathlib / str): path to origin.onnx.
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
input_nodes (list): a list of input node names.
|
|
This indicate the order of input nodes so the numpy array in dict
|
|
can be passed to dynasty binary in correct order.
|
|
If not set, this function will call `get_model_io` to
|
|
get `input_nodes` and `out_node_shape`.
|
|
out_node_shape (list): a list of shape for out nodes.
|
|
This is necessary to convert flattened data to onnx shape.
|
|
device (str): choose which method for inference.
|
|
|
|
- `dynasty` (default), using kneron dynasty binary. included in kneron/toolchain.
|
|
- `ort_gpu`, using onnx runtime with GPU. (not ready).
|
|
Use this for accelleration but only appliable when GPU available.
|
|
- `ort_cpu`, using onnx runtime with CPU. (not ready).
|
|
some kneron customized nodes not supported?
|
|
p_working (pathlib / str): where to put cache file.
|
|
The user need to clean this folder to release disk space.
|
|
e2e will give different for each image.
|
|
shape_in (str): choose from `onnx_shape` (default) / `channel_last` (will be obsoleted).
|
|
dump_level:
|
|
- 0 (default): dump inference results for only output nodes.
|
|
- 1: dump inference results for output nodes + cpu nodes.
|
|
- 2: dump inference results for all nodes.
|
|
|
|
Returns:
|
|
dict of list of numpy array as inference results. Each numpy array is same shape as onnx specified.
|
|
"""
|
|
# prepare folder and files
|
|
if p_working is None:
|
|
p_working = tempfile.mkdtemp("dyn_fl_")
|
|
p_working = pathlib.Path(p_working)
|
|
p_onnx = pathlib.Path(fn_onnx)
|
|
assert p_onnx.exists(), f"{fn_onnx} does not exist!"
|
|
|
|
assert device in ["dynasty", "ort_cpu", "ort_gpu"], \
|
|
f"device should be dynasty/ort_cpu/ort_gpu, but given {device}"
|
|
# TODO: if device is "cpu"/"gpu", call libdynasty.so / libdynasty_gpu.so
|
|
|
|
model_name = futils.clean_name(p_onnx.name)
|
|
model_id = f"tc/{model_name}"
|
|
|
|
np_id = futils.gen_random_string(8)
|
|
# setup input for inference.
|
|
# NOTE: we assume input does not change!
|
|
p_input = p_working / f"model_input_{np_id}"
|
|
# NOTE: each inference will have different results folder.
|
|
p_output = p_working / f"res_{np_id}"
|
|
|
|
# prepare text input
|
|
if input_nodes is None or out_node_shape is None:
|
|
# e2e need to call this if want to run infenrece one by one to save time.
|
|
input_nodes, _, out_node_shape, _ = get_model_io(p_onnx)
|
|
_, grouped_input_list, _ = futils.npy2txt(input_np, input_nodes, p_input, exists_then_skip=True)
|
|
|
|
# prepare dynasty list
|
|
mode_float = dynasty.gen_dynasty_mode_settings(
|
|
"float", fn_onnx=p_onnx, onnx_map=None, model_id=model_id
|
|
)
|
|
|
|
d_list, dir_output_list = dynasty.gen_dynasty_list(
|
|
[mode_float],
|
|
grouped_input_list,
|
|
input_nodes,
|
|
p_output,
|
|
dump_level=dump_level,
|
|
shape_in=shape_in,
|
|
)
|
|
|
|
# run dynasty
|
|
fn_dynasty_sh = p_working / f"run_dynasty_{np_id}.sh"
|
|
cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh)
|
|
# TODO: add error process.
|
|
dynasty.run_dynasty_command_parallel(model_id, fn_dynasty_sh, timeout=timeout)
|
|
|
|
# convert dynasty dump text to np
|
|
np_out, _ = dynasty.txt2np(
|
|
out_node_shape,
|
|
output_list=dir_output_list,
|
|
dmode="float",
|
|
load_fl=True,
|
|
load_fx=False,
|
|
)
|
|
|
|
# TODO: remove dir_output_list
|
|
|
|
return np_out
|
|
|
|
|
|
def inference_dynasty_fx(
|
|
fn_onnx,
|
|
hw_mode,
|
|
input_np,
|
|
input_nodes=None,
|
|
out_node_shape=None,
|
|
p_working=None,
|
|
shape_in="onnx_shape",
|
|
timeout=60 * 60 * 6,
|
|
dump_level=0,
|
|
d_ioinfo=None
|
|
):
|
|
"""
|
|
Run inference of kneron platform (hw_mode) on given model (fn_onnx) with data `input_np`.
|
|
|
|
This call will use kneron `dynasty_fx` binary (included in toolchain)
|
|
for the inference. The inference is to simulate one of the kneron NPU
|
|
chip (specified by hw_mode).
|
|
|
|
Args:
|
|
fn_onnx (pathlib / str): path to QUANTIZED MODEL (.bie file).
|
|
`fn_onnx` must be generated for `hw_mode`.
|
|
Sometime a quantized onnx file can be passed in, but the accompany
|
|
quantization json must be in same folder with this onnx. (Not tested)
|
|
hw_mode (int): specify which platform to run.
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
input_nodes (list): a list of input node names.
|
|
This indicate the order of input nodes so the numpy array in dict
|
|
can be passed to dynasty binary in correct order.
|
|
If not set, this function will call `get_model_io` to
|
|
get `input_nodes` and `out_node_shape`.
|
|
out_node_shape (list): a list of shape for out nodes.
|
|
This is necessary to convert flattened data to onnx shape.
|
|
p_working (pathlib / str): where to put cache file.
|
|
The user need to clean this folder to release disk space.
|
|
e2e will give different for each image.
|
|
shape_in (str): choose from `onnx_shape` (default) / `channel_last`
|
|
(will be obsoleted).
|
|
dump_level:
|
|
- 0 (default): dump inference results for only output nodes.
|
|
- 1: dump inference results for output nodes + cpu nodes.
|
|
- 2: dump inference results for all nodes.
|
|
d_ioinfo: obsoleted parameter. kept here for compatiblility with 0.25.0. will not be used.
|
|
|
|
Returns:
|
|
* dict of list of numpy as inference results in float number,
|
|
with shape specified by onnx.
|
|
* dict of list of numpy as inference results in fix point number,
|
|
with shape specified by onnx.
|
|
|
|
NOTE:
|
|
input node shape:
|
|
included in np_in array.
|
|
output node order:
|
|
not needed, because results are loaded in dictionary by name.
|
|
The dynasty dumps is name based.
|
|
"""
|
|
# check p_working.
|
|
if p_working is None:
|
|
p_working = tempfile.mkdtemp("dyn_fx_")
|
|
p_working = pathlib.Path(p_working)
|
|
# Prepare constants
|
|
|
|
# check file / folders
|
|
p_onnx = pathlib.Path(fn_onnx)
|
|
assert p_onnx.exists(), f"Given fix-point-model: {p_onnx} does not exists!"
|
|
|
|
np_id = "np_" + futils.gen_random_string(8)
|
|
# NOTE: we assume input does not change!
|
|
p_input = p_working / f"model_input_{np_id}"
|
|
# NOTE: each inference will have different results folder.
|
|
p_output = pathlib.Path(p_working) / f"res_kdp{hw_mode}_{np_id}"
|
|
|
|
model_name = futils.remove_appendix(p_onnx.name)
|
|
model_id = f"tc/{model_name}"
|
|
|
|
if input_nodes is None or out_node_shape is None:
|
|
# try to avoid this operation to save time.
|
|
input_nodes, _, out_node_shape, _ = get_model_io(p_onnx)
|
|
fx_model_type = get_model_type(p_onnx)
|
|
|
|
# prepare text input
|
|
_, grouped_input_list, _ = futils.npy2txt(input_np, input_nodes, p_input, exists_then_skip=True)
|
|
|
|
# prepare dynasty list
|
|
hw_mode = str(hw_mode)
|
|
mode_fx = dynasty.gen_dynasty_mode_settings(
|
|
hw_mode,
|
|
fn_onnx=p_onnx,
|
|
which_onnx=fx_model_type,
|
|
onnx_map=None,
|
|
model_id=model_id,
|
|
)
|
|
|
|
d_list, dir_output_list = dynasty.gen_dynasty_list(
|
|
[mode_fx],
|
|
grouped_input_list,
|
|
input_nodes,
|
|
p_output,
|
|
dump_level=dump_level,
|
|
shape_in=shape_in,
|
|
)
|
|
|
|
# run dynasty
|
|
fn_dynasty_sh = p_working / f"run_dynasty_{np_id}.sh"
|
|
cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh)
|
|
# TODO: add error process.
|
|
dynasty.run_dynasty_command_parallel(model_id, fn_dynasty_sh, timeout=timeout)
|
|
|
|
# convert dynasty dump text to np format
|
|
np_out_fl, np_out_fx = dynasty.txt2np(
|
|
out_node_shape,
|
|
output_list=dir_output_list,
|
|
dmode=hw_mode,
|
|
load_fl=True,
|
|
load_fx=True,
|
|
)
|
|
|
|
# TODO: remove dir_output_list
|
|
|
|
return np_out_fl, np_out_fx
|
|
|
|
|
|
def inference_csim_v2(p_model,
|
|
ioinfo,
|
|
input_np: dict,
|
|
hw_mode: int,
|
|
out_fmt="fl",
|
|
cleanup=False,
|
|
p_working=None,
|
|
p_out=None):
|
|
"""Run csim infenrence.
|
|
|
|
NOTE:
|
|
Need to call `unpack_nefs()` to break the nef then get model address and ioinfo
|
|
for this model.
|
|
|
|
Args:
|
|
p_model (pathlib / str): path to unpacked model.
|
|
ioinfo (dict): dict of input/output node info, e.g., quantization, shape.
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
hw_mode (int): specify which platform to run. It must match the nef file
|
|
as the nef is platform dependant.
|
|
out_fmt (str): choose from below:
|
|
|
|
- fl: the function will return dict of list of numpy array.
|
|
Each np array is inferenced results in float format,
|
|
with shape specified by onnx.
|
|
- fx: the function will return dict of list of numpy array.
|
|
Each np array is inferenced results in fix-point format,
|
|
with shape specified by onnx.
|
|
- sqtl.bin: results is flattened, saved in bin format.
|
|
directly dumped by csim. For debug purpose. (not ready yet)
|
|
- dram.bin: results is as in dram, saved in bin format.
|
|
directly dumped by csim. For debug purpose. (not ready yet)
|
|
|
|
cleanup (bool): remove cache folder before finish.
|
|
No cleanup by default, and leave it to users.
|
|
p_working (pathlib / str): where to put cache file.
|
|
User may use same folder when multiple call on same model.
|
|
(e2e will give different for each inference call).
|
|
The user need to clean this folder to release disk space.
|
|
p_out (pathlib / str): path to output folder. Only appliable when
|
|
`out_fmt` set to `sqtl.bin` or `dram.bin`. (Not ready yet)
|
|
|
|
Returns:
|
|
See `out_fmt` explanation.
|
|
"""
|
|
# detour for 520
|
|
if hw_mode == 520:
|
|
return inference_csim_520(p_model, ioinfo, input_np,
|
|
out_fmt,
|
|
cleanup, p_working, p_out)
|
|
|
|
# check parameters
|
|
assert out_fmt in ["fl", "fx", "sqtl.bin", "dram.bin"]
|
|
if out_fmt.endswith(".bin"):
|
|
assert p_out is not None, "Please set p_out parameter to save .bin files."
|
|
raise NotImplementedError()
|
|
|
|
assert p_model.exists(), f"{p_model} does not exists!"
|
|
|
|
if p_working is None:
|
|
p_working = tempfile.mkdtemp(prefix="csim_")
|
|
# actually working under a RANDOM folder under given p_working
|
|
np_id = "np_" + futils.gen_random_string(8)
|
|
p_working = pathlib.Path(p_working) / np_id
|
|
|
|
# step 3: prepare input.bin
|
|
csim_bin_list, p_working, p_results = data_converter(input_np, ioinfo["input"],
|
|
p_working=p_working)
|
|
|
|
# step 5: prepare run_csim.ini
|
|
file_loader = FileSystemLoader(f"{fconsts.P_FLOW}/template")
|
|
jinja_env = Environment(loader=file_loader)
|
|
template = jinja_env.get_template(f"run_csim_{hw_mode}.ini")
|
|
csim_in_list = OrderedDict()
|
|
dir_output_list = []
|
|
for i, bin_pair in csim_bin_list.items():
|
|
p_csim_dump = p_results / f"in{i:06}"
|
|
p_csim_dump.mkdir(parents=True, exist_ok=True)
|
|
|
|
# p_model as model input, the folder contains setup.bin / etc
|
|
fn_ini = p_results / f"in{i:06}_csim.ini"
|
|
csim.gen_csim_ini(bin_pair, p_model, hw_mode,
|
|
template=template,
|
|
fn_ini=fn_ini)
|
|
csim_in_list[i] = [p_csim_dump, fn_ini]
|
|
dir_output_list.append(p_csim_dump)
|
|
|
|
# step 6: call run_csim with parallel
|
|
sh_csim = p_working / "run_csim.sh"
|
|
bin_csim = fconsts.BIN_SET["csim"][hw_mode]
|
|
cmd, cp = csim.run_csim(csim_in_list, bin_csim, sh_csim)
|
|
assert cp.returncode == 0, f"csim failed with return code {cp.returncode}"
|
|
|
|
# step 7: load csim results and return float data
|
|
# convert csim dump text to np format
|
|
# there is no float dump in csim output
|
|
info_out = ioinfo["output"]
|
|
out_node_list = [a["name"] for a in info_out]
|
|
out_node_shape = {a["name"]: a["onnx_shape"] for a in info_out}
|
|
out_ch_dim = {a["name"]: a["ch_dim"] for a in info_out}
|
|
out_scale = {a["name"]: a["scale"] for a in info_out}
|
|
out_radix = {a["name"]: a["radix"] for a in info_out}
|
|
|
|
if DEBUG:
|
|
# NOTE: csim will only have 8 or 15 bit dump on .seq file.
|
|
# nef -> ioinfo should have convert 16bit radix to 15bit
|
|
out_bw = {a["name"]: a["bitw"] for a in info_out}
|
|
if 16 in set(out_bw.values()):
|
|
raise ValueError(f"Got bitwidth of {out_bw}. but csim.seq bitwidth only support 8/15. check unpacked ioinfo from nef.")
|
|
|
|
# TODO: return dram.bin / sqtl.bin to p_out
|
|
|
|
# per channel support
|
|
np_out_fx = csim.txt2np(out_node_list, out_node_shape, dir_output_list)
|
|
if out_fmt == "fx":
|
|
return np_out_fx
|
|
|
|
# convert fx to fl
|
|
np_out_fl = dynasty.np_fx2fl(np_out_fx, out_ch_dim, out_scale, out_radix)
|
|
|
|
if cleanup:
|
|
# off by default. e2e will clean up.
|
|
shutil.rmtree(str(p_working))
|
|
|
|
return np_out_fl
|
|
|
|
|
|
def inference_dongle(p_combined_nefs,
|
|
model_id: int,
|
|
ioinfo,
|
|
input_np: dict,
|
|
hw_mode: int,
|
|
out_fmt="fl"):
|
|
"""Inference model via dongle server.
|
|
|
|
TODO: will be available after 0.24.0.
|
|
|
|
- solution may prefer use combined nef.
|
|
So assume p_combined_nefs may have multiple models included.
|
|
``model_id`` is used to pick correct model.
|
|
- specify dongle server info in bash environment.
|
|
"""
|
|
pass
|
|
|
|
|
|
def data_converter(input_np, info_in, p_working=None):
|
|
"""
|
|
Convert input numpy data into dram.bin format input as compiler specified.
|
|
|
|
Args:
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
info_in (dict): quantization info and dram format from compiler.
|
|
p_working (pathlib / str): where to put temp files.
|
|
same as p_working of csim if called by csim.
|
|
If not specified, will use a random folder.
|
|
If user give `p_working`, please make sure different path for each call.
|
|
|
|
Returns:
|
|
A tuple of 3 elements.
|
|
- output_bins (dict): dictionary of list of path to input bin files,
|
|
fix-point data in dram format. can be feed into csim/dongle.
|
|
- p_working (pathlib): where the cache files are. The user need to clean it later.
|
|
- p_results (pathlib): where the csim result will be saved.
|
|
"""
|
|
# setup folders for inference. random EACH TIME
|
|
# because this function may be called in a loop,
|
|
# it should use uniq name per input_np.
|
|
if p_working is None:
|
|
p_working = pathlib.Path(tempfile.mkdtemp(prefix="dpc_"))
|
|
p_working.mkdir(parents=True, exist_ok=True)
|
|
else:
|
|
p_working = pathlib.Path(p_working)
|
|
# TODO: p_working should be empty
|
|
p_bin = p_working / "sqtbin"
|
|
p_csim_bin = p_working / "csimbin"
|
|
p_results = p_working / "csim_out"
|
|
for p in [p_bin, p_csim_bin, p_results]:
|
|
p.mkdir(parents=True, exist_ok=True)
|
|
# DEBUG: save ioinfo for debug
|
|
fn_ioinfo = p_bin / "ioinfo.json"
|
|
futils.dict2json(info_in, fn_ioinfo)
|
|
|
|
# step 3: prepare sequential.bin
|
|
# NOTE: we assume the per-channel-scale of input are always 1.0.
|
|
# Other scales not supported now.
|
|
# (per-channel-scale should put into pre-process)
|
|
|
|
sqt_bin_list = csim.np2bin_seq(input_np, info_in, p_out=p_bin)
|
|
|
|
# step 4: convert to compiler specified format.bin
|
|
csim_bin_list, cmds = csim.data_convert(
|
|
sqt_bin_list, info_in, p_out=p_csim_bin
|
|
)
|
|
|
|
return csim_bin_list, p_working, p_results
|
|
|
|
|
|
#######################################################################################################
|
|
# special process for csim 520 related.
|
|
#######################################################################################################
|
|
|
|
def fix_520_output_name(rslt_csim:dict, input_nodes:list):
|
|
"""Fix the csim/dongle output dict name.
|
|
|
|
The 520 nef/setup.bin does not include node name,
|
|
so the collected text files only use "0", "1", "2".
|
|
"""
|
|
assert len(input_nodes) == len(rslt_csim), \
|
|
f"rslt_csim: {len(rslt_csim)}, input_nodes: {len(input_nodes)}"
|
|
rst = {v: rslt_csim[str(i)] for i, v in enumerate(input_nodes)}
|
|
return rst
|
|
|
|
|
|
def convert_520_output_list(rslt_csim:dict):
|
|
"""Get the csim/dongle output in list (not node name included).
|
|
|
|
The 520 nef/setup.bin does not include node name,
|
|
so the collected text files only use "0", "1", "2".
|
|
|
|
We can get inference results (without node names) in list,
|
|
same order as onnx specified.
|
|
"""
|
|
return [rslt_csim[str(i)] for i in range(len(rslt_csim))]
|
|
|
|
|
|
def hack_np_520(input_np):
|
|
"""Update input_np for 520 nef csim inference.
|
|
|
|
the 520 nef / setup.bin does not include the node names.
|
|
so will use "0", "1", etc as default.
|
|
"""
|
|
assert len(input_np) == 1, f"input_np for 520 got {len(input_np)} inputs. keys: {input_np.keys()}"
|
|
|
|
v1 = list(input_np.values())[0]
|
|
return {"0":v1}
|
|
|
|
|
|
def inference_csim_520(p_model,
|
|
ioinfo,
|
|
input_np: dict,
|
|
out_fmt="fl",
|
|
cleanup=False,
|
|
p_working=None,
|
|
p_out=None):
|
|
"""Run csim infenrence for 520.
|
|
|
|
NOTE:
|
|
Need to call `unpack_nefs()` to break the nef then get model address and ioinfo
|
|
for this model.
|
|
|
|
Args:
|
|
p_model (pathlib / str): path to unpacked model.
|
|
ioinfo (dict): dict of input/output node info, e.g., quantization, shape.
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
out_fmt (str): choose from below:
|
|
|
|
- fl: the function will return dict of list of numpy array.
|
|
Each np array is inferenced results in float format,
|
|
with shape specified by onnx.
|
|
- fx: the function will return dict of list of numpy array.
|
|
Each np array is inferenced results in fix-point format,
|
|
with shape specified by onnx.
|
|
- sqtl.bin: results is flattened, saved in bin format.
|
|
directly dumped by csim. For debug purpose. (not ready yet)
|
|
- dram.bin: results is as in dram, saved in bin format.
|
|
directly dumped by csim. For debug purpose. (not ready yet)
|
|
|
|
cleanup (bool): remove cache folder before finish.
|
|
No cleanup by default, and leave it to users.
|
|
p_working (pathlib / str): where to put cache file.
|
|
User may use same folder when multiple call on same model.
|
|
(e2e will give different for each inference call).
|
|
The user need to clean this folder to release disk space.
|
|
p_out (pathlib / str): path to output folder. Only appliable when
|
|
`out_fmt` set to `sqtl.bin` or `dram.bin`. (Not ready yet)
|
|
|
|
Returns:
|
|
See `out_fmt` explanation.
|
|
"""
|
|
hw_mode = 520
|
|
|
|
# check parameters
|
|
assert out_fmt in ["fl", "fx", "sqtl.bin", "dram.bin"]
|
|
if out_fmt.endswith(".bin"):
|
|
assert p_out is not None, "Please set p_out parameter to save .bin files."
|
|
raise NotImplementedError()
|
|
|
|
assert p_model.exists(), f"{p_model} does not exists!"
|
|
|
|
if p_working is None:
|
|
p_working = tempfile.mkdtemp(prefix="csim_")
|
|
# actually working under a RANDOM folder under given p_working
|
|
np_id = "np_" + futils.gen_random_string(8)
|
|
p_working = pathlib.Path(p_working) / np_id
|
|
|
|
# step 3: prepare input_rgba.bin
|
|
csim_bin_list, p_working, p_results = data_converter_520(input_np,
|
|
ioinfo["input"],
|
|
p_working=p_working)
|
|
|
|
def find_model_bins(p_model):
|
|
cs = {}
|
|
for fn_key in ["command.bin", "setup.bin", "weight.bin"]:
|
|
t = p_model.glob(f"*{fn_key}")
|
|
cs[fn_key] = list(t)[0]
|
|
return cs
|
|
|
|
cs_abs = find_model_bins(p_model)
|
|
bin_csim = fconsts.BIN_SET["csim"][520]
|
|
|
|
def gen_csim_cmd_1(fn_input_rgba, p_csim_out):
|
|
# get relative position
|
|
cs = {}
|
|
for k, v in cs_abs.items():
|
|
cs[k] = futils.relative_path(v, p_csim_out)
|
|
|
|
# NOTE: only 1 input for 520. no need for ","
|
|
c = f"""{bin_csim} -d 0 --thread 1 {cs["command.bin"]} {cs["weight.bin"]} {fn_input_rgba} --setup {cs["setup.bin"]}"""
|
|
|
|
command = f"pushd {p_csim_out} > /dev/null && {c} && popd > /dev/null"
|
|
|
|
return command
|
|
|
|
# step 5: prepare run_csim command
|
|
dir_output_list = []
|
|
cmds = []
|
|
# NOTE: there is only one input for 520
|
|
for i, bin_rgba in enumerate(csim_bin_list[0]):
|
|
p_csim_dump = p_results / f"in{i:06}"
|
|
p_csim_dump.mkdir(mode=0o770, parents=True, exist_ok=True)
|
|
dir_output_list.append(p_csim_dump)
|
|
|
|
# p_model as model input, the folder contains setup.bin / etc
|
|
cmd1 = gen_csim_cmd_1(bin_rgba, p_csim_dump)
|
|
cmds.append(cmd1)
|
|
|
|
# step 6: call run_csim, optional with parallel
|
|
if len(cmds) > 1:
|
|
sh_csim = p_working / "run_csim.sh"
|
|
with open(sh_csim, "w") as f:
|
|
for cmd1 in cmds:
|
|
f.write(f"{cmd1}\n")
|
|
command = f"parallel --jobs 6 --halt now,fail=1 < {sh_csim}"
|
|
else:
|
|
command = cmds[0]
|
|
|
|
cp = futils.run_bash_script(command)
|
|
assert cp.returncode == 0, f"csim failed with return code {cp.returncode}"
|
|
|
|
# step 7: load csim results and return float data
|
|
# convert csim dump text to np format
|
|
# there is no float dump in csim output
|
|
info_out = ioinfo["output"]
|
|
out_node_list = [a["name"] for a in info_out]
|
|
out_node_shape = {a["name"]: a["onnx_shape"] for a in info_out}
|
|
out_ch_dim = {a["name"]: a["ch_dim"] for a in info_out}
|
|
out_scale = {a["name"]: a["scale"] for a in info_out}
|
|
out_radix = {a["name"]: a["radix"] for a in info_out}
|
|
# NOTE: the a["name"] is "0", "1". ... as it is not available in nef_v0/setup.bin
|
|
|
|
# TODO: return dram.bin / sqtl.bin to p_out
|
|
|
|
# per channel support
|
|
np_out_fx = csim.txt2np(out_node_list, out_node_shape, dir_output_list, is_520=True)
|
|
if out_fmt == "fx":
|
|
return np_out_fx
|
|
|
|
# convert fx to fl
|
|
np_out_fl = dynasty.np_fx2fl(np_out_fx, out_ch_dim, out_scale, out_radix)
|
|
|
|
if cleanup:
|
|
# off by default. e2e will clean up.
|
|
shutil.rmtree(str(p_working))
|
|
|
|
return np_out_fl
|
|
|
|
|
|
def data_converter_520(input_np, info_in, p_working=None):
|
|
"""
|
|
Convert input numpy data into dram.bin format input as compiler specified.
|
|
|
|
Note:
|
|
- 520 only takes one RGBA.bin file, which is very different from other platforms.
|
|
|
|
Args:
|
|
input_np (dict): dict of list of numpy file. See begin of page for details.
|
|
info_in (dict): quantization info and dram format from compiler.
|
|
p_working (pathlib / str): where to put temp files.
|
|
same as p_working of csim if called by csim.
|
|
If not specified, will use a random folder.
|
|
If user give `p_working`, please make sure different path for each call.
|
|
|
|
Returns:
|
|
A tuple of 3 elements.
|
|
- output_bins (dict): dictionary of list of path to input bin files,
|
|
fix-point data in dram format. can be feed into csim/dongle.
|
|
- p_working (pathlib): where the cache files are. The user need to clean it later.
|
|
- p_results (pathlib): where the csim result will be saved.
|
|
"""
|
|
# setup folders for inference. random EACH TIME
|
|
# because this function may be called in a loop,
|
|
# it should use uniq name per input_np.
|
|
if p_working is None:
|
|
p_working = pathlib.Path(tempfile.mkdtemp(prefix="dpc_"))
|
|
p_working.mkdir(parents=True, exist_ok=True)
|
|
else:
|
|
p_working = pathlib.Path(p_working)
|
|
|
|
# TODO: p_working should be empty
|
|
p_csim_bin = p_working / "csimbin"
|
|
p_results = p_working / "csim_out"
|
|
for p in [p_csim_bin, p_results]:
|
|
p.mkdir(parents=True, exist_ok=True)
|
|
|
|
# step 3: prepare rgba.bin
|
|
input_np = hack_np_520(input_np)
|
|
# should be list of list.
|
|
in_lst = [[a] for a in input_np["0"]]
|
|
csim_bin_list = csim.txt2bin_rgba(in_lst, info_in, p_csim_bin)
|
|
|
|
return csim_bin_list, p_working, p_results
|