#! /usr/bin/env python3 """Provide compiler related tools.""" import os import pathlib import tempfile import shutil import re import json from collections import OrderedDict from functools import lru_cache import numpy as np import sys_flow_v2.flow_constants as fconsts import sys_flow_v2.flow_utils as futils import snoop DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False snoop.install(enabled=DEBUG) # constants P_TMP_MODEL = pathlib.Path("/tmp/model_working") P_TMP_INPUT = pathlib.Path("/tmp/input_data") def get_nef_util_bins(): """Get binaries to use.""" bin_nef_util = fconsts.BIN_SET["compiler"]["kneron_nef_utils"] bin_kne_util = fconsts.BIN_SET["compiler"]["kneron_kne_utils"] pb_nef = pathlib.Path(bin_nef_util).parent pb_kne = pathlib.Path(bin_kne_util).parent ADD_NEF_UTIL_PATH = f"""export PATH={pb_nef}:{pb_kne}:$PATH""" return ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util = get_nef_util_bins() ################################################################################### # get model info from nef + ioinfo.json ################################################################################### def clean_list_nef(list_nef): """Convert to str and make uniq.""" # convert to str. it may be pathlib obj l1 = [str(nef) for nef in list_nef] # make unique l2 = list(set(l1)) return " ".join(l2) def combine_nef(list_nef: list, hw_mode, d_out): """Combine multiple nef into one using nef utils. After combination, the combined.nef will run extra `unpack_nefs()` and re-organized with `ioinfo.json` per model. This side-effect is prepared a combined `ioinfo.json` for dongle inference. Args: list_nef (list): each element is path to nef file. hw_mode (int): specify platform. d_out (pathlib / str) : where to put `combined.nef` and `ioinfo.json` Returns: tuple: multiple info returned: - `p_out`: where is the out folder. usually same as specified. - `p_nef`: path of the combined nef - `p_ioinfo`: path of the (combined) ioinfo.json, prepared for dongle, not for normal process! - `fn_maps`: the combined.nef is unpacked and re-organized in `p_out/unpack`. Per-model file mapping is recorded in this. Same as `unpack_nefs()` returned. """ temp_dir = tempfile.mkdtemp() lst = clean_list_nef(list_nef) cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} --combine_nef \"{lst}\" -O {temp_dir}" # currently no -o option working. we need to cpy $temp_dir/models_xxx.nef to fn_out cp = futils.run_bash_script(cmd) assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}" # check output p_temp = pathlib.Path(temp_dir) nefs = list(p_temp.glob("models_*.nef")) assert len(nefs) == 1, f"combine nef but find {len(nefs)} created: {nefs}" # copy necessary files to p_out p_out = pathlib.Path(d_out) p_out.mkdir(parents=True, exist_ok=True) p_nef = p_out / "combined.nef" shutil.copyfile(nefs[0], p_nef) # prepare ioinfo (for convience of dongle) dongle_io = {} fn_maps, p_dump = unpack_nefs(p_nef, hw_mode) for model_id, (p_unpack, ioinfo) in fn_maps.items(): dongle_io[model_id] = {} dongle_io[model_id]["ioinfo_in"] = ioinfo["input"] dongle_io[model_id]["ioinfo_out"] = ioinfo["output"] p_ioinfo = p_out / "ioinfo.json" with open(p_ioinfo, "w") as f: json.dump(dongle_io, f, cls=NumpyEncoder) shutil.rmtree(temp_dir, ignore_errors=True) return p_out, p_nef, p_ioinfo, fn_maps def guess_available_model_id(p_dump, hw_mode): """Guess model_id from extracted filenames from NEF. NOTE: if the nef is from regression, it will have dfault model_id 32768. Args: p_dump (pathlib / str): where the nef was extracted to. hw_mode (int): specify the platform. Returns: tuple: list of model_id available in givem dump folder. """ if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730/1140 s1 = f"models_{hw_mode}_model_*.kne" s2 = rf"models_{hw_mode}_model_(\d+).kne" else: s1 = "NEF_*modelid_*" s2 = r'NEF_.*?_modelid_(\d+)_.*$' p_nefs = pathlib.Path(p_dump).glob(s1) p_names = [p.name for p in p_nefs] modelids = [] for name in p_names: modelids.extend(re.findall(s2, name)) ids = tuple(set([int(a) for a in modelids])) return ids def verify_ioinfo(ioinfo, nef_version): """Verify ioinfo got enought quantization info.""" missing = False for k1 in ["input", "output"]: if DEBUG: print(f"ioinfo got {len(ioinfo[k1])} {k1}(s).") for i_info, info in enumerate(ioinfo[k1]): for k2 in ["name", "shape", "onnx_shape", "ch_dim", "radix", "scale", "bitw", "data_format"]: if k2 not in info: print(f"Error: {k1}/{i_info} is missing {k2}") missing = True assert not missing def convert_ioinfo(p_sub, hw_mode): """Load ioinfo from io_raw, then save to ioinfo.json . This is a wrapper function to call correct parser according to hw_mode. """ if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]: # 520, or 720 pure bin (obsolete) # BUG: only per-layer quantization info # BUG: only sim shape. (no onnx shape. no dimension transpose in sim shape though.) fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0] ioinfo = parse_setup_json_v0(fn_json_raw) nef_version = 0 elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]: fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0] ioinfo = parse_setup_json_v1(fn_json_raw) nef_version = 1 elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730/1140 # .no_binary.json is from unpack_nefs (during toolchain/inference_csim*) # or from debug version of compiler running. # or kneron_kne_utils -j to extract from release version of compiler fn_json_raw = find_no_binary_json(p_sub) ioinfo = parse_setup_json_v2(fn_json_raw) nef_version = 2 verify_ioinfo(ioinfo, nef_version) fn_ioinfo = p_sub / "ioinfo.json" ioinfo["note"] = f"created by `convert_ioinfo()` from {fn_json_raw}" with open(fn_ioinfo, "w") as f: json.dump(ioinfo, f, cls=NumpyEncoder) return ioinfo def check_kne_util_err(cp, p_kne): """Examine kne_util reported error code to find error detaiils.""" if cp.returncode == 0: return elif cp.returncode == 100: raise FileNotFoundError(f"Given {p_kne} does not exist or file size is 0.") elif cp.returncode == 101: raise PermissionError(f"Failed to read {p_kne}. Please check this file.") elif cp.returncode == 102: raise ValueError(f"Given {p_kne} does not compatible with current schema.") else: raise ChildProcessError(f"kne_util failed with {cp.returncode} .") def find_no_binary_json(p_sub): """Better way to find .no_binary.json . TODO: - what if multiple kne is same folder? """ fn_json_raw = list(p_sub.glob("*.no_binary.json")) if len(fn_json_raw) == 0: # need to extrtact from kne # release version compiler will not create .no_binary.json . p_kne_s = list(p_sub.glob("models_*.kne")) if len(p_kne_s) == 0: raise FileExistsError(f"No models_*.kne and no .no_binary.json found in {p_sub} .") cmd = f"{ADD_NEF_UTIL_PATH}; pushd {p_sub} >> /dev/null && {bin_kne_util} -j {p_kne_s[0].name}" cp = futils.run_bash_script(cmd) check_kne_util_err(cp, p_kne_s[0]) fn_json_raw = list(p_sub.glob("*.no_binary.json")) if len(fn_json_raw) == 0: raise FileExistsError(f"Failed to extract .no_binary.json from {p_kne_s[0].name} .") return fn_json_raw[0] def kne2nef(p_kne, p_nef, hw_mode): """Convert given kne file to nef.""" hw_mode = int(hw_mode) hw_nef_v2 = fconsts.MODE_HW_LIMIT["nef_v2"] assert hw_mode in hw_nef_v2, f"hw_mode ({hw_mode}) must be in {hw_nef_v2}" # model_info using "test" for place holder. toolchain will use actual values. cmd = f"""{ADD_NEF_UTIL_PATH}; {bin_nef_util} --gen --kne {p_kne} --target {hw_mode} -O {p_nef.parent} -o {p_nef.stem} && {bin_nef_util} -U {p_nef} --model_info_version "test" --model_info_name "test" --replace_original """ cp = futils.run_bash_script(cmd) assert cp.returncode == 0, f"convert kne to nef failed with return code: {cp.returncode}." return cp def unpack_nefs(p_nef, hw_mode): """Parse nef to get compiler outputs for csim inference. Ref: `ticket #17762`_ Args: p_nef (pathlib or str): path to the nef file, which may include multiple models. hw_mode (int): specify the platform (520/530/540/630/720/730/1140/etc), because the way to call nef_utils are different. Returns: dict-type: example: `{model_id: (p_sub, ioinfo)}`. - The `model_id` is unique for each released model. - `p_sub` is where the model for `model_id` is unpacked, - the `ioinfo` includes the shape and quantization info of input/output nodes. It will be used to convert input data to bin file as csim/dongle input. .. _ticket #17762: https://redmine.kneron.tw/issues/17762 """ p_out = pathlib.Path(tempfile.mkdtemp(prefix="nef_unpack_")) if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]: # 520, or 720 pure bin (obsolete) nef_version = 0 cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -p {hw_mode} -O {p_out}" elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]: # 720/530/630 flatbuffer nef_version = 1 cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -O {p_out}" elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730/1140 # 1 nef -> 1 kne (incl multiple models) nef_version = 2 cmd = f"""set -e; {ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -O {p_out} && pushd {p_out} >> /dev/null && {bin_kne_util} -X NEF_0x*_models_{hw_mode}.kne && for k in `ls models_{hw_mode}_model_*.kne` do {bin_kne_util} -j ${{k}} done """ else: raise NotImplementedError # extract nef file cp = futils.run_bash_script(cmd) if DEBUG: print(f"unpack nef (version {nef_version}) to {p_out}") print(cp.stderr) assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}." # put each model into submodel # for 520/720/530/630 model_ids = guess_available_model_id(p_out, hw_mode) fn_maps = {} for mid in model_ids: p_sub = p_out / f"model_{mid}" p_sub.mkdir(parents=True, exist_ok=True) if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730/1140 cmd = f"mv {p_out}/models_{hw_mode}_model_{mid}.kne* {p_sub}" else: cmd = f"mv {p_out}/NEF_*_modelid_{mid}_* {p_sub}" cp = futils.run_bash_script(cmd) assert cp.returncode == 0, f"Failed to move model_{mid} bin files. Return code: {cp.returncode}" p_sub = p_out / f"model_{mid}" ioinfo = convert_ioinfo(p_sub, hw_mode) # fn_map = locate_compiler_dump(p_sub, hw_mode, parse_nef=True) fn_maps[mid] = (p_sub, ioinfo) return fn_maps, p_out class NumpyEncoder(json.JSONEncoder): """To save numpy array in json. From `numpy array is not json serializable`_ . .. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable """ def default(self, obj): """Set default way.""" if isinstance(obj, np.ndarray): return obj.tolist() return json.JSONEncoder.default(self, obj) def parse_setup_json_v0(fn_json): """Parse raw json generated from 520 setup.bin. Necessary info per io node (same for all platform), see `verify_ioinfo()`. NOTE: - we assume only 1 input for 520 models. """ with open(fn_json, "r") as f: raw = json.load(f) ioinfo = {} def get_in(h): v1 = {} # NOTE: for 520, the given dimension is always 1CHW # There will be no onnx shape in setup.bin. # example, [1, 10] will be [1, 10, 1, 1] v1["name"] = "0" v1["shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]] v1["onnx_shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]] v1["bitw"] = 8 # only support 8bit # 520 only support per layer v1["radix"] = [h["input_radix"] for i in range(h["input_channel"])] v1["scale"] = [1.0 for i in range(h["input_channel"])] v1["ch_dim"] = 1 v1["data_format"] = "RGBA_8BIT" # just guess. to keep same format return v1 def get_out(i, h): d = {} # no name saved in 520 setup.bin / nef. so we use index only d["name"] = str(i) d["shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]] d["onnx_shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]] d["bitw"] = 8 # only support 8bit # NOTE: 520 radix/scale are same for all channels d["radix"] = [h["output_radix"] for i in range(h["ch_length"])] per_channel_scales = futils.intle2flt(h["output_scale"]) d["scale"] = [per_channel_scales for i in range(h["ch_length"])] d["ch_dim"] = 1 d["data_format"], _ = parse_data_format(520, h["data_format"]) return d # input. assume only one. # sometime the json have headers or header if "headers" in raw: ioinfo["input"] = [get_in(a) for a in raw["headers"]] else: assert "header" in raw, "Extracted 520 setup.bin.json have no header nor headers." ioinfo["input"] = [get_in(raw["header"])] # output. maybe multiple. ioinfo["output"] = [get_out(i, d) for i, d in enumerate(raw["outputs"])] return ioinfo def check_input_fmt(input_fmt, platform): """Check the input format. Args: input_fmt: None, str或dict类型。如果是dict, key必须是'input_数字_value_info'格式 platform: 硬件平台 Raises: ValueError: 当格式不正确时抛出 Note: Compiler会进行额外的格式检查, 可能会抛出以下错误: * InvalidProgramInput: 当指定不正确的格式时(例如:指定input_fmt为HW5C8B) * UnimplementedFeature: 当指定模型不支持的inproc format时(会触发assert) * HardwareNotSupport: 当指定硬件不支持的格式时(例如: first layer不支持4W4C8B但指定了该格式) Todo: * Check if NUM in 'input_NUM_value_info' is within valid range (1 to max number of inputs) """ if input_fmt is not None: _, supported_formats = get_support_formats(platform) if isinstance(input_fmt, str): if input_fmt not in supported_formats: raise ValueError(f"input_fmt should be in {supported_formats}. But got {input_fmt} .") elif isinstance(input_fmt, dict): pattern = re.compile(r'^input_\d+_value_info$') for k, v in input_fmt.items(): if not pattern.match(k): raise ValueError(f"input_fmt's key should be in 'input_NUM_value_info' format, but got {k}") if v not in supported_formats: raise ValueError(f"input_fmt's value should be in {supported_formats}, but got {v}") def get_support_formats(hw_mode): """Get the list of supported formats for a given hw_mode.""" if hw_mode == 520: """refer to compiler/lib/target/mozart/basic/hw_define.h not using this info now. """ ref = { -1: ("UNKNOWN", 8), 8: ("16W1C8B", 8), 0: ("8W1C16B", 16), 9: ("BY_COL_8BIT", 8), 1: ("BY_COL_16BIT", 16), 10: ("BY_CHNL_8BIT", 8), 2: ("BY_CHNL_16BIT", 16), 15: ("CUSTOMIZE", 8), 16: ("RGBA_8BIT", 8), 17: ("RGBA_16BIT", 16), 18: ("SEQ_32BIT", 32), 100: ("RAW8", 8), 101: ("RAW16", 16), 102: ("RAW_FLOAT", 32), } elif hw_mode == 720: """refer to compiler/lib/target/beethoven/basic/hw_define.h""" ref = { -1: ("UNKNOWN", 8), 0: ("1W16C8B", 8), 1: ("1W16C8B_INTLV", 8), 2: ("1W16C8BHL", 16), 3: ("1W16C8BHL_INTLV", 16), 4: ("4W4C8B", 8), 5: ("16W1C8B", 8), 6: ("8W1C16B", 16), 7: ("PS_8W1C16B", 16), 8: ("PS_1W8C16B", 16), 9: ("PS_1W4C32B", 32), 11: ("PS_2W4C16B", 16), 12: ("PS_4W1C32B", 32), 13: ("PS_1W16C16B", 16), 14: ("PS_1W8C32B", 32), 15: ("PS_1W16C32B", 32), 16: ("PS_4W2C16B", 16), 17: ("PS_2W4C32B", 32), 18: ("PS_2W2C32B", 32), 100: ("RAW8", 8), 101: ("RAW16", 16), 102: ("RAW_FLOAT", 32), } elif hw_mode in [530, 540, 630]: """ 730/540/630 refer to compiler/lib/target/wagner/basic/hw_define.h 530 see refer to compiler/lib/target/bach/basic/hw_define.h but seems same for now UNKNOWN = (int)DATA_FORMAT_FMT_UNKNOWN, 1W16C8B, 1W16C8BHL, 4W4C8B, 4W4C8BHL, 16W1C8B, 16W1C8BHL, 8W1C16B, PS_1W16C24B, RAW_FLOAT = (int)DATA_FORMAT_FMT_RAW_FLOAT, """ ref = { -1: ("UNKNOWN", 8), 0: ("1W16C8B", 8), 1: ("1W16C8BHL", 16), 2: ("4W4C8B", 8), 3: ("4W4C8BHL", 16), 4: ("16W1C8B", 8), 5: ("16W1C8BHL", 16), 6: ("8W1C16B", 16), 7: ("PS_1W16C24B", 24), 100: ("RAW8", 8), 102: ("RAW16", 16), 103: ("RAW_FLOAT", 32), } elif hw_mode in [730]: """ 730/540/630 refer to compiler/lib/target/wagner/basic/hw_define.h UNKNOWN = (int)DATA_FORMAT_FMT_UNKNOWN, 1W16C8B, 1W16C8BHL, 4W4C8B, 4W4C8BHL, 16W1C8B, 16W1C8BHL, 8W1C16B, PS_1W16C24B, 1W16C8B_CH_COMPACT, // only used by fw 1W16C8BHL_CH_COMPACT, // only used by fw RAW_FLOAT = (int)DATA_FORMAT_FMT_RAW_FLOAT, """ ref = { -1: ("UNKNOWN", 8), 0: ("1W16C8B_CH_COMPACT", 8), 1: ("1W16C8BHL_CH_COMPACT", 16), 2: ("4W4C8B", 8), 3: ("4W4C8BHL", 16), 4: ("16W1C8B", 8), 5: ("16W1C8BHL", 16), 6: ("8W1C16B", 16), 7: ("PS_1W16C24B", 24), 8: ("1W16C8B", 8), 9: ("1W16C8BHL", 16), 10: ("HW4C8B_KEEP_A", 8), # inproc 11: ("HW4C8B_DROP_A", 8), # inproc 12: ("HW1C8B", 8), # inproc 13: ("HW1C16B_LE", 16), # inproc 14: ("HW1C16B_BE", 16), # inproc 100: ("RAW8", 8), 102: ("RAW16", 16), 103: ("RAW_FLOAT", 32), } elif hw_mode in [1140]: """ 1140 refer to compiler/lib/ravel/basic/hw_define.h 1W32C8B, 1W32C8BHL, 8W4C8B, 8W4C8BHL, 32W1C8B, 32W1C8BHL, 16W1C16B, PS_1W32C40B, RAW_FLOAT = (int)DATA_FORMAT_FMT_RAW_FLOAT, """ ref = { -1: ("UNKNOWN", 8), 0: ("1W32C8B", 8), 1: ("1W32C8BHL", 16), 2: ("8W4C8B", 8), 3: ("8W4C8BHL", 16), 4: ("32W1C8B", 8), 5: ("32W1C8BHL", 16), 6: ("16W1C16B", 16), 7: ("PS_1W32C40B", 40), 100: ("RAW8", 8), 102: ("RAW16", 16), 103: ("RAW_FLOAT", 32), } else: raise ValueError(f"Unsupported hw_mode: {hw_mode}") fmt_valid = [v[0] for k, v in ref.items() if k >= 0] return ref, fmt_valid def parse_data_format(hw_mode, fmt): """Convert fmt number to real format. The raw ioinfo from compiler use int to represent hardware data format. The data-converter require the input of format in "string". This function will take definition from compiler and hard-code here. Update if compiler changed. Ref: `ticket #17762`_ """ ref, _ = get_support_formats(hw_mode) if int(fmt) not in ref: raise ValueError(f"Unsupported fmt: {fmt} for hw_mode: {hw_mode}") # need format name and bitwidth return ref[int(fmt)] def parse_setup_json_v1(fn_json): """Parse raw json generated from setup.bin (v2 flatbuffer, 530/630/720). Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`. """ with open(fn_json, "r") as f: raw = json.load(f) ioinfo = {} def get_platform(j): return int(j["header"]["target"].removeprefix("KL")) platform = get_platform(raw) def get_in(h): v1 = {} v1["name"] = h["name"] # from Jay # setup.bin 的 Tensor 定義: # raw_shape 是 onnx shape # shape 是 hw shape # ioinfo.json 的定義: # onnx_shape 是 onnx_shape # shape 是 hw shape v1["shape"] = np.array(h["shape"]) v1["onnx_shape"] = np.array(h["raw_shape"]) # TODO: is this true? always second? v1["ch_dim"] = 1 v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"]) # for per channel radix/scale n_ch = v1["onnx_shape"][v1["ch_dim"]] for k in ["radix", "scale"]: t = [a[k] for a in h["quantization"]["fxp_info"]] if len(t) == n_ch: # per channel given v1[k] = np.array(t) else: # per layer given. need expand assert ( len(t) == 1 ), f"channel {n_ch} but got {k} for {len(t)} channels: {t}" v1[k] = np.array([t[0] for i in range(n_ch)]) return v1 # input. maybe multiple ioinfo["input"] = [get_in(d) for d in raw["inputs"]] # output. maybe multiple. ioinfo["output"] = [get_in(d) for d in raw["outputs"]] return ioinfo def patch_16b_output(out_1): """CSIM will have only 8/15 bit dump of .seq files. Convert if compiler give 16bit radix. Output Only. """ if out_1["bitw"] == 16: out_1["radix"] = out_1["radix"] - 1 out_1["bitw"] = 15 return out_1 def parse_setup_json_v2(fn_json): """Parse raw json generated from kne (540/730). NOTE: we suppose only one model in a kne. Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`. Ref: `ticket #17762`_ """ with open(fn_json, "r") as f: # MAYBE: .no_binary.json may have inf in it making the json invalid. json_str = futils.preprocess_json(f.read()) raw = json.loads(json_str) n_models = len(raw["models"]) if n_models > 1: print(f"WARNING: found {n_models} in {fn_json}. will only extract first model ioinfo for now.") def get_platform(j): return int(j["header"]["target"].removeprefix("KL")) platform = get_platform(raw) def parse_ch_dim(lst): """Input lst should be a list of 4 elements: [b, c, h, w].""" if lst[0] == -1: # when list is [-1. -1, -1, -1] return 1 else: # there should be no -1 in the list assert lst[1] != -1 return lst[1] def get_in(h): """Convert no_binary.json to ioinfo.json format.""" v1 = {} v1["name"] = h["name"] v1["ndim"] = h["ndim"] v1["shape"] = np.array(h["shape"]) # need to combine shape and inv_shape_intrp_dim to get real onnx_shape. # see #18456 v1["onnx_shape"] = np.array([v1["shape"][a] for a in h["inv_shape_intrp_dim"]]) # v1["ch_dim"] = parse_ch_dim(h["shape_intrp_dim"]) v1["ch_dim"] = h["ch_dim"] n_ch = v1["shape"][v1["ch_dim"]] v1["stride"] = np.array(h["stride_aligned"]) v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"]) # for per channel radix k = "radix" t = h["quantization"][k] if len(t) == n_ch: # per channel given v1[k] = np.array(t) else: # per layer given. need expand assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}" v1[k] = np.array([t[0] for i in range(n_ch)]) # scale k = "scale" scale_le = h["quantization"]["scale"] scale_le_n = h["quantization"]["scale_count"] t = futils.array_le2flt(scale_le, scale_le_n) if len(t) == n_ch: # per channel given v1[k] = np.array(t) else: # per layer given. need expand assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}" v1[k] = np.array([t[0] for i in range(n_ch)]) return v1 ioinfo = {} ioinfo["input"] = [get_in(d) for d in raw["models"][0]["header"]["inputs"]] ioinfo["output"] = [patch_16b_output(get_in(d)) for d in raw["models"][0]["header"]["outputs"]] return ioinfo @lru_cache(maxsize=128) def locate_compiler_dump(p_out, hw_mode, parse_nef=False): """Locate important files in compiler dump folder. Each platform has it's own required files to run csim. Some names may change, e.g., test.conf/apb.npu, but they serve same purpose. This function is to find correponding file and return organized as dict, so that each call will always get correct file independant of hw_mode. """ p_out = pathlib.Path(p_out) if hw_mode in [520]: patterns = { "setup_bin": "*setup.bin", "command_bin": "*command.bin", "weight_bin": "*weight.bin", "apb_npu": "*test.conf", # diff } if parse_nef: # HACK # unpack_nefs will genearte ioinfo.json for 520 patterns["ioinfo_json"] = "*ioinfo.json" # diff elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]: patterns = { "setup_bin": "*setup.bin", "command_bin": "*command.bin", "weight_bin": "*weight.bin", "ioinfo_json": "*ioinfo.json", "apb_npu": "*apb.npu", } if hw_mode in [720]: # diff for 520/720 patterns["apb_npu"] = "*test.conf" elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: patterns = { "kne": f"*models_{hw_mode}*.kne", } else: raise NotImplementedError fn_map = {} for k, v in patterns.items(): ps = list(p_out.glob(v)) assert len(ps) >= 1, f"Looking for {k}, expect 1, but found {len(ps)}: {ps}" fn_map[k] = ps[0] return fn_map # TODELETE # def load_ioinfo_json(fn_ioinfo): # """Load compiler generated ioinfo.json then apply special process. # # Convert `shape`/`onnx_shape`/`radix`/`scale` to numpy array for # better process later. # # NOTE: # No ioinfo.json for 520. # """ # with open(fn_ioinfo, "r") as f: # ioinfo = json.load(f) # # for io in ["input", "output"]: # for a in ioinfo[io]: # a["name"] = futils.clean_name(a["name"]) # for k in ["onnx_shape", "scale", "radix", "shape"]: # a[k] = np.array(a[k]) # return ioinfo def collect_fps_improve(dir_out): """Load the fps improved from ip_evaluator reports. The reults will be compiled with other analysis and put in the final report. Args: `dir_out`: the output folder of compiler. """ p_f = pathlib.Path(dir_out) / "summary_image_cut_search.txt" if not p_f.exists(): return None with open(p_f, "r") as f: lines = f.readlines() prefix = "fps improve:" for line in lines: if line.startswith(prefix): return line.removeprefix(prefix).strip() return None def get_cpu_node_op_type(dir_out): """Extract cpu op related from setup.txt.""" cpu_node_list_str = set() setup_dir = f"{dir_out}/setup.txt" def extract_cpu_op_type(txt): s = re.compile('"(op_type|opcode_index)": *"(.*?)"') return s.findall(txt)[0][1] try: with open(setup_dir, "r") as f: lines = f.readlines() for line in lines: # new setup.txt(opcode_index) # old setup.txt(op_type) if "op_type" in line or "opcode_index" in line: cpu_node_str = extract_cpu_op_type(str(line)) if cpu_node_str == "CpuFusion": continue cpu_node_list_str.add(cpu_node_str) if len(cpu_node_list_str) == 0: return "None" else: return ",".join(cpu_node_list_str) except: # print("No setup.txt found.") return "N/A" def collect_command_weight_size(dir_out): """As name implies.""" cmd_size = None weight_size = None stats_dir = f"{dir_out}/dbg.stat.json" try: with open(stats_dir, "r") as f: stats = json.load(f) cmd_size = int(stats["general"]["cmd_size"] / (10**3)) weight_size = int(stats["general"]["wt_size"] / (10**6)) except: pass return cmd_size, weight_size def find_cpu_nodes(lines): """As name implies.""" nodes = [] found = False for line in lines: if line.startswith("***** Warning: CPU ops types"): found = True continue if found: clean = line.strip().strip(",") if len(clean) > 4: nodes.append(clean) else: found = False if len(nodes) > 0: return "//".join(nodes) else: return "N/A" def collect_FPS(dir_out, hw_mode): """Collect FPS info from compiler output folder. WARNING: - Tiefang will make report same for ALL platforms. - will all be named as `ProfileResult.txt` """ profile_dir = f"{dir_out}/ProfileResult.txt" d_profile = OrderedDict() def search_by_prefix(lines, k): for line in lines: if line.startswith(k): return line.removeprefix(k).strip() return None def gb2mb(line): return float(line.removesuffix("GB")) * 1000 def convert2int(s): if s == "inf" or s is None: return None return int(float(s)) try: with open(profile_dir, "r") as f: lines = f.readlines() # load fps if hw_mode == 520: d_profile["fps"] = search_by_prefix(lines, "output_fps =") d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =") d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =") d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =") d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =") d_profile["cpu_node"] = find_cpu_nodes(lines) # d_profile[f"RV(mb)"] = search_by_prefix(lines, "output_total_data_move_in_amount =") # d_profile[f"WV(mb)"] = search_by_prefix(lines, "output_total_data_move_out_amount =") else: d_profile["fps"] = search_by_prefix(lines, "output_fps =") d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =") d_profile["C(GOPs)"] = search_by_prefix(lines, "output_total_theory_mac =") d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =") d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =") d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =") d_profile["RV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_RDMA_amount =")) d_profile["WV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_WDMA_amount =")) d_profile["cpu_node"] = find_cpu_nodes(lines) except: # print("No {} found.".format(profile_dir)) pass # filter None items d_prof = OrderedDict() for k, v in d_profile.items(): if v: # not None d_prof[k] = v return d_prof def parse_fm_cut_summary(p_txt): """Parse the Summary.txt from compiler/fm_cut output for time and iteration records.""" time_total = None n_total = None n_fm_cut = None with open(p_txt, 'r') as file: for line in file: # 提取估计时间 if time_total is None and "Total search time:" in line: time_match = re.search(r"Total search time: *(\d+) mins?", line) if time_match: time_total = int(time_match.group(1)) else: # probally failed by timeout. # TODO: use the timeout value to replace it. time_total = "NA" # 提取计数器信息 if n_total is None and "Totally searched" in line: count_match = re.search(r"Totally searched (\d+) times; Image cut\(compiler\) succeeded (\d+) times!", line) if count_match: n_total = int(count_match.group(1)) n_fm_cut = int(count_match.group(2)) else: n_total = n_fm_cut = "NA" # 如果所有变量都已经找到,可以提前结束循环 if time_total and n_total and n_fm_cut: break if DEBUG: print(f"fm cut: time {time_total} min, total {n_total} iterations, include {n_fm_cut} succcessful fm_cut.") return time_total, n_total, n_fm_cut def lookup_compiler_error(cp, hw_mode, module="compiler"): """Find the detailed error from compiler return code. Ref: https://redmine.kneron.tw/issues/18389 Compiler return code is between 1-30. gen_config.py will return 31-50 if fm_cut failed. """ rc = cp.returncode status = { 1: ("compiler", "compiler common"), 2: ("compiler", "compiler invalid input"), 3: ("compiler", "invalid onnx attribute"), 4: ("HW not support", "Err: 4"), 5: ("compiler", "unexpected graph"), 6: ("unimplemented feature", f"compiler: {rc}"), 7: ("compiler", "value not ready"), 8: ("knerex", "compiler: knerex config error"), 9: ("compiler", "unexpected value"), 111: ("fm_cut", cp.stderr), -15: ("fm_cut", "killed by SIGTERM"), 32: ("fm_cut", f"{hw_mode} is not supported"), 33: ("fm_cut", "No info_cutting.log"), } if rc in status: # specific msgs return status[rc] elif rc >= 1 and rc <= 30: return ("compiler", f"Err: {rc}") elif rc >= 31 and rc <= 50: return ("fm_cut", f"Err: {rc}") else: return (module, f"Err: {rc}") def check_fm_cut_log(log_content): """Extract ret_code from fm_cut log.""" # 定义正则表达式,匹配特定的错误信息 pattern = r'ERROR: run sub-module "image_cut_search" failed[ !]*\[ret_code=(\d+)\. msg="(.*?)"\]' # 在日志内容中查找匹配项 match = re.search(pattern, log_content) if match: # 提取ret_code和msg ret_code = int(match.group(1)) # msg = match.group(2) # not used now if ret_code == 4: return ("HW not support", "reported by fm_cut") elif ret_code == 6: return ("unimplemented feature", "reported by fm_cut") else: return ("compiler", f"fm_cut reported: err {ret_code}") else: return None def parse_compiler_warning(p_compiler_out): """Need to extract warning from compiler logs. Those lines has `[error]` `[critical] [warning]` Extract each line and return a list. NOTE: - keyword in test_case.py: self.graph_warnings """ p_logs = list(p_compiler_out.rglob("batch_compile.log")) warning_lines = [] MARKS = ["[error]", "[critical]", "[warning]"] # 遍历所有日志文件 for p_log in p_logs: try: with open(p_log, "r", encoding="utf-8", errors="ignore") as f: for line_num, line in enumerate(f, 1): line = line.strip() # 检查是否包含警告、错误或关键信息标记 if any(marker in line.lower() for marker in MARKS): # 记录文件名、行号和内容 warning_lines.append({ "file": str(p_log.name), "line": line_num, "content": line }) except Exception as e: pass # 如果读取文件失败,记录错误信息 # warning_lines.append({ # "file": str(p_log.name), # "line": 0, # "content": f"Failed to read file: {str(e)}" # }) return warning_lines def parse_compiler_logs(p_compiler_out): """Extract detailed error from compiler logs.""" p_logs = list(p_compiler_out.rglob("*.log")) # load all the logs t = "" for p_log in p_logs: with open(p_log, "r") as f: t += "".join(f.readlines()) if len(t) == 0: return None # t is a long line with \n in it. results = check_fm_cut_log(t) if results: return results prefixes_1 = { # "ERROR: run sub-module \"image_cut_search\" failed": ("fm_cut", "compiler report"), "Invalid program input: Memory region \[weight\] .*? overlapps \[dram\]": ("compiler", "datapath oversize"), # 720 old setup "CSim only support CPU node in the end of model and write data to output buffer": ("compiler", "cpu node in middle"), } for keyw, (col_name, msg) in prefixes_1.items(): pat1 = re.compile(keyw) if len(pat1.findall(t)) > 0: return (col_name, msg) prefixes = { "Common": ("compiler", ""), "InvalidProgramInput": ("compiler", ""), "InvalidONNXAttribute": ("compiler", ""), "HardwareNotSupport": ("HW not support", "compiler: "), "Hardware not support": ("HW not support", "compiler: "), "UnexpectedGraph": ("compiler", ""), "UnimplementedFeature": ("unimplemented feature", "compiler: "), "ValueNotReady": ("compiler", ""), "KnerexError": ("knerex", "compiler: "), "UnexpectedValue": ("compiler", ""), "creating an EmptyNode instance for op_type:": ("compiler", "unsupported nodes: //"), } for keyw, (col_name, prefix) in prefixes.items(): pat1 = re.compile(f"{keyw}[:\s]*(.*)") if len(pat1.findall(t)) > 0: msg = prefix + "//".join(pat1.findall(t)) return (col_name, msg) # found no detailed error. return None