#! /usr/bin/env python3 import pathlib from collections import OrderedDict import os import tempfile import shutil import pytz from jinja2 import Environment, FileSystemLoader import sys_flow.flow_utils as futils import sys_flow.flow_constants as fconsts import sys_flow.dynasty_v3 as dynasty import sys_flow.csim_utils as csim DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False import snoop snoop.install(enabled=DEBUG) # find the binaries first dynasty_bin = fconsts.BIN_SET["dynasty"]["binary"] dynasty_so = fconsts.BIN_SET["dynasty"]["lib.so"] dynasty_cuda_so = fconsts.BIN_SET["dynasty"]["lib_cuda.so"] bin_dc = fconsts.BIN_SET["data_converter"]["v2"] timezone = pytz.timezone("America/Los_Angeles") P_TMP_MODEL = pathlib.Path("/tmp/working") def inference_onnx_runtime( fn_onnx, input_np, device="gpu", p_working="/tmp", shape_in="onnx_shape", dump_level=0 ): raise NotImplementedError() def get_model_io(p_onnx, hw_mode=None): """Interface to get ioinfo from onnx/bie. dynasty / csim flatten data to one dimension then dump to text. Input node name list and output node (onnx) shape, are needed when convert flattened data back to onnx shape. Args: p_onnx onnx or bie file to examine. hw_mode: int 520/720/... If given p_onnx is a bie file, this is needed. Returns: - list of `input_nodes` - list of `output_nodes` - shape of output nodes - dict of ioinfo json files per platform. TODO: move to flow_utils. """ # make sure it is pathlib p_onnx = pathlib.Path(p_onnx) # get input_nodes names if p_onnx.name.endswith(".onnx"): input_nodes, output_nodes, out_node_shape, _ = futils.get_ioinfo_from_onnx(p_onnx) d_ioinfo = {} # not available in onnx elif p_onnx.name.endswith(".bie"): assert not (hw_mode is None), "get_model_io: given bie file, hw_mode is needed." input_nodes, output_nodes, out_node_shape, ioinfo = futils.get_ioinfo_from_bie( p_onnx, hw_mode, dynasty_bin) # dynasty dump float results directly, no need to convert fx to fl # sometime ioinfo is needed for dynasty # NOTE: no need from mo4.2. but keep it for back-compatible. if ioinfo: rnd = futils.gen_random_string(6) p_ioinfo = pathlib.Path("/tmp") / f"ioinfo_{rnd}.json" futils.dict2json(ioinfo, p_ioinfo) d_ioinfo = {hw_mode: p_ioinfo} else: d_ioinfo = {} else: raise NotImplementedError return input_nodes, output_nodes, out_node_shape, d_ioinfo def get_model_type(p_onnx): """Get model type from name. Why not put in gen_dynasty_mode_settings? Because it is complicated to support many regression debug purpose. """ if p_onnx.name.endswith(".onnx"): return "piano_onnx" elif p_onnx.name.endswith(".bie"): return "piano_bie" else: raise NotImplementedError def inference_dynasty_fl_so( fn_onnx, input_np, input_nodes=None, # list of input nodes, to know the order of input nodes out_node_shape=None, # need this to convert dynasty_fl dump to onnx shape device="dynasty_cpu", p_working=None, shape_in="onnx_shape", dump_level=0 ): """ Run inference on given model (fn_onnx) with data `input_np`. Will run `dynasty float` mode on this model with given input(). Currently call dynasty bin for the inference. Maybe: use dynasty.so for inference. Args: fn_onnx (pathlib / str): path to origin.onnx. input_np (dict): dict of list of numpy file. See begin of page for details. input_nodes (list): a list of input node names. This indicate the order of input nodes so the numpy array in dict can be passed to dynasty binary in correct order. If not set, this function will call `get_model_io` to get `input_nodes` and `out_node_shape`. out_node_shape (list): a list of shape for out nodes. This is necessary to convert flattened data to onnx shape. device (str): choose which method for inference. - `dynasty_cpu` (default), using kneron dynasty binary. included in kneron/toolchain. - `dynasty_gpu`, using kneron dynasty binary. included in kneron/toolchain. Need to run in toolchain with cuda support. - `ort_gpu`, using onnx runtime with GPU. (not ready). Use this for accelleration but only appliable when GPU available. - `ort_cpu`, using onnx runtime with CPU. (not ready). some kneron customized nodes not supported? p_working (pathlib / str): where to put cache file. The user need to clean this folder to release disk space. e2e will give different for each image. shape_in (str): choose from `onnx_shape` (default) / `channel_last` (will be obsoleted). dump_level: - 0 (default): dump inference results for only output nodes. - 1: dump inference results for output nodes + cpu nodes. - 2: dump inference results for all nodes. Returns: dict of list of numpy array as inference results. Each numpy array is same shape as onnx specified. """ # prepare folder and files if p_working is None: p_working = tempfile.mkdtemp("dyn_fl_") p_working = pathlib.Path(p_working) p_onnx = pathlib.Path(fn_onnx) assert p_onnx.exists(), f"{fn_onnx} does not exist!" assert device in ["dynasty_cpu", "dynasty_gpu", "ort_cpu", "ort_gpu"], \ f"device should be dynasty/dynasty_gpu/ort_cpu/ort_gpu, but given {device}" model_name = futils.clean_name(p_onnx.name) model_id = f"tc/{model_name}" # setup for inference. # will run only float mode on given onnx # np_id = "np_{}".format(futils.md5sum(input_np)[:6]) np_id = futils.gen_random_string(10) dump_prefix = f"infr_{np_id}" result_prefix = f"res_{np_id}" p_output = p_working / result_prefix # prepare text input if input_nodes is None or out_node_shape is None: # e2e need to call this if want to run infenrece one by one input_nodes, _, out_node_shape, _ = get_model_io(p_onnx) _, input_list, input_fns = dynasty.np2txt( input_np, input_nodes, p_working=p_working, dump_prefix=dump_prefix, ch_last=True ) if "gpu" in device: lib = dynasty_cuda_so else: lib = dynasty_so if "ort" in device: ort = True else: ort = False # use input_list[0] since E2E does one input at a time dynasty.run_dynasty_so(lib, fn_onnx, len(input_nodes), input_list[0], input_nodes, str(p_working), ort) # convert dynasty dump text to np np_out = dynasty.txt2np_so(out_node_shape, p_working) return np_out def inference_dynasty_fl( fn_onnx, input_np, input_nodes=None, # list of input nodes, to know the order of input nodes out_node_shape=None, # need this to convert dynasty_fl dump to onnx shape device="dynasty", p_working=None, shape_in="onnx_shape", dump_level=0 ): """ Run inference on given model (fn_onnx) with data `input_np`. Will run `dynasty float` mode on this model with given input(). Currently call dynasty bin for the inference. Maybe: use dynasty.so for inference. Args: fn_onnx (pathlib / str): path to origin.onnx. input_np (dict): dict of list of numpy file. See begin of page for details. input_nodes (list): a list of input node names. This indicate the order of input nodes so the numpy array in dict can be passed to dynasty binary in correct order. If not set, this function will call `get_model_io` to get `input_nodes` and `out_node_shape`. out_node_shape (list): a list of shape for out nodes. This is necessary to convert flattened data to onnx shape. device (str): choose which method for inference. - `dynasty` (default), using kneron dynasty binary. included in kneron/toolchain. - `ort_gpu`, using onnx runtime with GPU. (not ready). Use this for accelleration but only appliable when GPU available. - `ort_cpu`, using onnx runtime with CPU. (not ready). some kneron customized nodes not supported? p_working (pathlib / str): where to put cache file. The user need to clean this folder to release disk space. e2e will give different for each image. shape_in (str): choose from `onnx_shape` (default) / `channel_last` (will be obsoleted). dump_level: - 0 (default): dump inference results for only output nodes. - 1: dump inference results for output nodes + cpu nodes. - 2: dump inference results for all nodes. Returns: dict of list of numpy array as inference results. Each numpy array is same shape as onnx specified. """ # prepare folder and files if p_working is None: p_working = tempfile.mkdtemp("dyn_fl_") p_working = pathlib.Path(p_working) p_onnx = pathlib.Path(fn_onnx) assert p_onnx.exists(), f"{fn_onnx} does not exist!" assert device in ["dynasty", "ort_cpu", "ort_gpu"], \ f"device should be dynasty/ort_cpu/ort_gpu, but given {device}" # TODO: if device is "cpu"/"gpu", call libdynasty.so / libdynasty_gpu.so model_name = futils.clean_name(p_onnx.name) model_id = f"tc/{model_name}" # setup for inference. # will run only float mode on given onnx # np_id = "np_{}".format(futils.md5sum(input_np)[:6]) np_id = futils.gen_random_string(10) dump_prefix = f"infr_{np_id}" result_prefix = f"res_{np_id}" p_output = p_working / result_prefix # prepare text input if input_nodes is None or out_node_shape is None: # e2e need to call this if want to run infenrece one by one input_nodes, _, out_node_shape, _ = get_model_io(p_onnx) _, input_list, input_fns = dynasty.np2txt( input_np, input_nodes, p_working=p_working, dump_prefix=dump_prefix ) # prepare dynasty list mode_float = dynasty.gen_dynasty_mode_settings( "float", fn_onnx=p_onnx, onnx_map=None, model_id=model_id ) d_list, dir_output_list = dynasty.gen_dynasty_list( [mode_float], input_list, input_nodes, p_output, dump_level=dump_level, shape_in=shape_in, ) # run dynasty fn_dynasty_sh = p_working / f"run_dynasty_{np_id}.sh" cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh) # TODO: add error process. dynasty.run_dynasty_command_parallel(model_id, fn_dynasty_sh) # convert dynasty dump text to np np_out, _ = dynasty.txt2np( out_node_shape, output_list=dir_output_list, dmode="float", load_fl=True, load_fx=False, ) # TODO: remove dir_output_list return np_out def inference_dynasty_fx( fn_onnx, hw_mode, input_np, input_nodes=None, out_node_shape=None, d_ioinfo={}, p_working=None, shape_in="onnx_shape", dump_level=0 ): """ Run inference of kneron platform (hw_mode) on given model (fn_onnx) with data `input_np`. This call will use kneron `dynasty_fx` binary (included in toolchain) for the inference. The inference is to simulate one of the kneron NPU chip (specified by hw_mode). Args: fn_onnx (pathlib / str): path to QUANTIZED MODEL (.bie file). `fn_onnx` must be generated for `hw_mode`. Sometime a quantized onnx file can be passed in, but the accompany quantization json must be in same folder with this onnx. (Not tested) hw_mode (int): specify which platform to run. input_np (dict): dict of list of numpy file. See begin of page for details. input_nodes (list): a list of input node names. This indicate the order of input nodes so the numpy array in dict can be passed to dynasty binary in correct order. If not set, this function will call `get_model_io` to get `input_nodes` and `out_node_shape`. out_node_shape (list): a list of shape for out nodes. This is necessary to convert flattened data to onnx shape. p_working (pathlib / str): where to put cache file. The user need to clean this folder to release disk space. e2e will give different for each image. shape_in (str): choose from `onnx_shape` (default) / `channel_last` (will be obsoleted). dump_level: - 0 (default): dump inference results for only output nodes. - 1: dump inference results for output nodes + cpu nodes. - 2: dump inference results for all nodes. Returns: * dict of list of numpy as inference results in float number, with shape specified by onnx. * dict of list of numpy as inference results in fix point number, with shape specified by onnx. NOTE: input node shape: included in np_in array. output node order: not needed, because results are loaded in dictionary by name. The dynasty dumps is name based. """ # check p_working. if p_working is None: p_working = tempfile.mkdtemp("dyn_fx_") p_working = pathlib.Path(p_working) # Prepare constants np_id = "np_" + futils.gen_random_string(8) dump_prefix = f"infr_{np_id}" result_prefix = f"res_kdp{hw_mode}_{np_id}" # check file / folders p_onnx = pathlib.Path(fn_onnx) assert p_onnx.exists(), f"Given fix-point-model: {p_onnx} does not exists!" model_name = futils.remove_appendix(p_onnx.name) model_id = f"tc/{model_name}" if input_nodes is None or out_node_shape is None: # try to avoid this operation to save time. input_nodes, _, out_node_shape, d_ioinfo = get_model_io(p_onnx) fx_model_type = get_model_type(p_onnx) # prepare text input _, input_list, input_fns = dynasty.np2txt( input_np, input_nodes, p_working=p_working, dump_prefix=dump_prefix ) # prepare dynasty list hw_mode = str(hw_mode) mode_fx = dynasty.gen_dynasty_mode_settings( hw_mode, fn_onnx=p_onnx, which_onnx=fx_model_type, onnx_map=None, ioinfo_map=d_ioinfo, model_id=model_id, ) p_output = pathlib.Path(p_working) / result_prefix d_list, dir_output_list = dynasty.gen_dynasty_list( [mode_fx], input_list, input_nodes, p_output, dump_level=dump_level, shape_in=shape_in, ) # run dynasty fn_dynasty_sh = p_working / f"run_dynasty_{np_id}.sh" cmds = dynasty.build_dynasty_cmd(d_list, dynasty_bin, fn_dynasty_sh) # TODO: add error process. dynasty.run_dynasty_command_parallel(model_id, fn_dynasty_sh) # convert dynasty dump text to np format np_out_fl, np_out_fx = dynasty.txt2np( out_node_shape, output_list=dir_output_list, dmode=hw_mode, load_fl=True, load_fx=True, ) # TODO: remove dir_output_list return np_out_fl, np_out_fx def inference_csim_v2(p_model, ioinfo, input_np: dict, hw_mode: int, out_fmt="fl", cleanup=False, p_working=None, p_out=None): """Run csim infenrence. NOTE: Need to call `unpack_nefs()` to break the nef then get model address and ioinfo for this model. Args: p_model (pathlib / str): path to unpacked model. ioinfo (dict): dict of input/output node info, e.g., quantization, shape. input_np (dict): dict of list of numpy file. See begin of page for details. hw_mode (int): specify which platform to run. It must match the nef file as the nef is platform dependant. out_fmt (str): choose from below: - fl: the function will return dict of list of numpy array. Each np array is inferenced results in float format, with shape specified by onnx. - fx: the function will return dict of list of numpy array. Each np array is inferenced results in fix-point format, with shape specified by onnx. - sqtl.bin: results is flattened, saved in bin format. directly dumped by csim. For debug purpose. (not ready yet) - dram.bin: results is as in dram, saved in bin format. directly dumped by csim. For debug purpose. (not ready yet) cleanup (bool): remove cache folder before finish. No cleanup by default, and leave it to users. p_working (pathlib / str): where to put cache file. User may use same folder when multiple call on same model. (e2e will give different for each inference call). The user need to clean this folder to release disk space. p_out (pathlib / str): path to output folder. Only appliable when `out_fmt` set to `sqtl.bin` or `dram.bin`. (Not ready yet) Returns: See `out_fmt` explanation. """ # detour for 520 if hw_mode == 520: return inference_csim_520(p_model, ioinfo, input_np, out_fmt, cleanup, p_working, p_out) # check parameters assert out_fmt in ["fl", "fx", "sqtl.bin", "dram.bin"] if out_fmt.endswith(".bin"): assert p_out is not None, "Please set p_out parameter to save .bin files." raise NotImplementedError() assert p_model.exists(), f"{p_model} does not exists!" if p_working is None: p_working = tempfile.mkdtemp(prefix="csim_") # actually working under a RANDOM folder under given p_working np_id = "np_" + futils.gen_random_string(8) p_working = pathlib.Path(p_working) / np_id # step 3: prepare input.bin csim_bin_list, p_working, p_results = data_converter(input_np, ioinfo["input"], p_working=p_working) # step 5: prepare run_csim.ini file_loader = FileSystemLoader(f"{fconsts.P_FLOW}/template") jinja_env = Environment(loader=file_loader) template = jinja_env.get_template(f"run_csim_{hw_mode}.ini") csim_in_list = OrderedDict() dir_output_list = [] for i, bin_pair in csim_bin_list.items(): p_csim_dump = p_results / f"in{i:06}" p_csim_dump.mkdir(parents=True, exist_ok=True) # p_model as model input, the folder contains setup.bin / etc fn_ini = p_results / f"in{i:06}_csim.ini" csim.gen_csim_ini(bin_pair, p_model, hw_mode, template=template, fn_ini=fn_ini) csim_in_list[i] = [p_csim_dump, fn_ini] dir_output_list.append(p_csim_dump) # step 6: call run_csim with parallel sh_csim = p_working / "run_csim.sh" bin_csim = fconsts.BIN_SET["csim"][hw_mode] cmd, cp = csim.run_csim(csim_in_list, bin_csim, sh_csim) assert cp.returncode == 0, f"csim failed with return code {cp.returncode}" # step 7: load csim results and return float data # convert csim dump text to np format # there is no float dump in csim output info_out = ioinfo["output"] out_node_list = [a["name"] for a in info_out] out_node_shape = {a["name"]: a["onnx_shape"] for a in info_out} out_ch_dim = {a["name"]: a["ch_dim"] for a in info_out} out_scale = {a["name"]: a["scale"] for a in info_out} out_radix = {a["name"]: a["radix"] for a in info_out} # TODO: return dram.bin / sqtl.bin to p_out # per channel support np_out_fx = csim.txt2np(out_node_list, out_node_shape, dir_output_list) if out_fmt == "fx": return np_out_fx # convert fx to fl np_out_fl = dynasty.np_fx2fl(np_out_fx, out_ch_dim, out_scale, out_radix) if cleanup: # off by default. e2e will clean up. shutil.rmtree(str(p_working)) return np_out_fl def inference_dongle(p_combined_nefs, model_id: int, ioinfo, input_np: dict, hw_mode: int, out_fmt="fl"): """Inference model via dongle server. TODO: will be available after 0.24.0. - solution may prefer use combined nef. So assume p_combined_nefs may have multiple models included. ``model_id`` is used to pick correct model. - specify dongle server info in bash environment. """ pass def data_converter(input_np, info_in, p_working=None): """ Convert input numpy data into dram.bin format input as compiler specified. Args: input_np (dict): dict of list of numpy file. See begin of page for details. info_in (dict): quantization info and dram format from compiler. p_working (pathlib / str): where to put temp files. same as p_working of csim if called by csim. If not specified, will use a random folder. If user give `p_working`, please make sure different path for each call. Returns: A tuple of 3 elements. - output_bins (dict): dictionary of list of path to input bin files, fix-point data in dram format. can be feed into csim/dongle. - p_working (pathlib): where the cache files are. The user need to clean it later. - p_results (pathlib): where the csim result will be saved. """ # setup folders for inference. random EACH TIME # because this function may be called in a loop, # it should use uniq name per input_np. if p_working is None: p_working = pathlib.Path(tempfile.mkdtemp(prefix="dpc_")) p_working.mkdir(parents=True, exist_ok=True) else: p_working = pathlib.Path(p_working) # TODO: p_working should be empty p_bin = p_working / "sqtbin" p_csim_bin = p_working / "csimbin" p_results = p_working / "csim_out" for p in [p_bin, p_csim_bin, p_results]: p.mkdir(parents=True, exist_ok=True) # step 3: prepare sequential.bin # NOTE: we assume the per-channel-scale of input are always 1.0. # Other scales not supported now. # (per-channel-scale should put into pre-process) sqt_bin_list = csim.np2bin_seq(input_np, info_in, p_out=p_bin) # step 4: convert to compiler specified format.bin csim_bin_list, cmds = csim.data_convert( sqt_bin_list, info_in, p_out=p_csim_bin ) return csim_bin_list, p_working, p_results ####################################################################################################### # special process for csim 520 related. ####################################################################################################### def fix_520_output_name(rslt_csim:dict, input_nodes:list): """Fix the csim/dongle output dict name. The 520 nef/setup.bin does not include node name, so the collected text files only use "0", "1", "2". """ assert len(input_nodes) == len(rslt_csim), \ f"rslt_csim: {len(rslt_csim)}, input_nodes: {len(input_nodes)}" rst = {v: rslt_csim[str(i)] for i, v in enumerate(input_nodes)} return rst def convert_520_output_list(rslt_csim:dict): """Get the csim/dongle output in list (not node name included). The 520 nef/setup.bin does not include node name, so the collected text files only use "0", "1", "2". We can get inference results (without node names) in list, same order as onnx specified. """ return [rslt_csim[str(i)] for i in range(len(rslt_csim))] def hack_np_520(input_np): """Update input_np for 520 nef csim inference. the 520 nef / setup.bin does not include the node names. so will use "0", "1", etc as default. """ assert len(input_np) == 1, f"input_np for 520 got {len(input_np)} inputs. keys: {input_np.keys()}" v1 = list(input_np.values())[0] return {"0":v1} def inference_csim_520(p_model, ioinfo, input_np: dict, out_fmt="fl", cleanup=False, p_working=None, p_out=None): """Run csim infenrence for 520. NOTE: Need to call `unpack_nefs()` to break the nef then get model address and ioinfo for this model. Args: p_model (pathlib / str): path to unpacked model. ioinfo (dict): dict of input/output node info, e.g., quantization, shape. input_np (dict): dict of list of numpy file. See begin of page for details. out_fmt (str): choose from below: - fl: the function will return dict of list of numpy array. Each np array is inferenced results in float format, with shape specified by onnx. - fx: the function will return dict of list of numpy array. Each np array is inferenced results in fix-point format, with shape specified by onnx. - sqtl.bin: results is flattened, saved in bin format. directly dumped by csim. For debug purpose. (not ready yet) - dram.bin: results is as in dram, saved in bin format. directly dumped by csim. For debug purpose. (not ready yet) cleanup (bool): remove cache folder before finish. No cleanup by default, and leave it to users. p_working (pathlib / str): where to put cache file. User may use same folder when multiple call on same model. (e2e will give different for each inference call). The user need to clean this folder to release disk space. p_out (pathlib / str): path to output folder. Only appliable when `out_fmt` set to `sqtl.bin` or `dram.bin`. (Not ready yet) Returns: See `out_fmt` explanation. """ hw_mode = 520 # check parameters assert out_fmt in ["fl", "fx", "sqtl.bin", "dram.bin"] if out_fmt.endswith(".bin"): assert p_out is not None, "Please set p_out parameter to save .bin files." raise NotImplementedError() assert p_model.exists(), f"{p_model} does not exists!" if p_working is None: p_working = tempfile.mkdtemp(prefix="csim_") # actually working under a RANDOM folder under given p_working np_id = "np_" + futils.gen_random_string(8) p_working = pathlib.Path(p_working) / np_id # step 3: prepare input_rgba.bin csim_bin_list, p_working, p_results = data_converter_520(input_np, ioinfo["input"], p_working=p_working) def find_model_bins(p_model): cs = {} for fn_key in ["command.bin", "setup.bin", "weight.bin"]: t = p_model.glob(f"*{fn_key}") cs[fn_key] = list(t)[0] return cs cs_abs = find_model_bins(p_model) bin_csim = fconsts.BIN_SET["csim"][520] def gen_csim_cmd_1(fn_input_rgba, p_csim_out): # get relative position cs = {} for k, v in cs_abs.items(): cs[k] = futils.relative_path(v, p_csim_out) # NOTE: only 1 input for 520. no need for "," c = f"""{bin_csim} -d 0 --thread 1 {cs["command.bin"]} {cs["weight.bin"]} {fn_input_rgba} --setup {cs["setup.bin"]}""" command = f"pushd {p_csim_out} > /dev/null && {c} && popd > /dev/null" return command # step 5: prepare run_csim command dir_output_list = [] cmds = [] # NOTE: there is only one input for 520 for i, bin_rgba in enumerate(csim_bin_list[0]): p_csim_dump = p_results / f"in{i:06}" p_csim_dump.mkdir(mode=0o770, parents=True, exist_ok=True) dir_output_list.append(p_csim_dump) # p_model as model input, the folder contains setup.bin / etc cmd1 = gen_csim_cmd_1(bin_rgba, p_csim_dump) cmds.append(cmd1) # step 6: call run_csim, optional with parallel if len(cmds) > 1: sh_csim = p_working / "run_csim.sh" with open(sh_csim, "w") as f: for cmd1 in cmds: f.write(f"{cmd1}\n") command = f"parallel --jobs 6 --halt now,fail=1 < {sh_csim}" else: command = cmds[0] cp = futils.run_bash_script(command) assert cp.returncode == 0, f"csim failed with return code {cp.returncode}" # step 7: load csim results and return float data # convert csim dump text to np format # there is no float dump in csim output info_out = ioinfo["output"] out_node_list = [a["name"] for a in info_out] out_node_shape = {a["name"]: a["onnx_shape"] for a in info_out} out_ch_dim = {a["name"]: a["ch_dim"] for a in info_out} out_scale = {a["name"]: a["scale"] for a in info_out} out_radix = {a["name"]: a["radix"] for a in info_out} # NOTE: the a["name"] is "0", "1". ... as it is not available in nef_v0/setup.bin # TODO: return dram.bin / sqtl.bin to p_out # per channel support np_out_fx = csim.txt2np(out_node_list, out_node_shape, dir_output_list, is_520=True) if out_fmt == "fx": return np_out_fx # convert fx to fl np_out_fl = dynasty.np_fx2fl(np_out_fx, out_ch_dim, out_scale, out_radix) if cleanup: # off by default. e2e will clean up. shutil.rmtree(str(p_working)) return np_out_fl def data_converter_520(input_np, info_in, p_working=None): """ Convert input numpy data into dram.bin format input as compiler specified. Note: - 520 only takes one RGBA.bin file, which is very different from other platforms. Args: input_np (dict): dict of list of numpy file. See begin of page for details. info_in (dict): quantization info and dram format from compiler. p_working (pathlib / str): where to put temp files. same as p_working of csim if called by csim. If not specified, will use a random folder. If user give `p_working`, please make sure different path for each call. Returns: A tuple of 3 elements. - output_bins (dict): dictionary of list of path to input bin files, fix-point data in dram format. can be feed into csim/dongle. - p_working (pathlib): where the cache files are. The user need to clean it later. - p_results (pathlib): where the csim result will be saved. """ # setup folders for inference. random EACH TIME # because this function may be called in a loop, # it should use uniq name per input_np. if p_working is None: p_working = pathlib.Path(tempfile.mkdtemp(prefix="dpc_")) p_working.mkdir(parents=True, exist_ok=True) else: p_working = pathlib.Path(p_working) # TODO: p_working should be empty p_csim_bin = p_working / "csimbin" p_results = p_working / "csim_out" for p in [p_csim_bin, p_results]: p.mkdir(parents=True, exist_ok=True) # step 3: prepare rgba.bin input_np = hack_np_520(input_np) # should be list of list. in_lst = [[a] for a in input_np["0"]] csim_bin_list = csim.txt2bin_rgba(in_lst, info_in, p_csim_bin) return csim_bin_list, p_working, p_results