755 lines
25 KiB
Python
755 lines
25 KiB
Python
#! /usr/bin/env python3
|
|
|
|
import os
|
|
import pathlib
|
|
import tempfile
|
|
import shutil
|
|
import re
|
|
import json
|
|
from collections import OrderedDict
|
|
from functools import lru_cache
|
|
|
|
import numpy as np
|
|
|
|
import sys_flow.flow_constants as fconsts
|
|
import sys_flow.flow_utils as futils
|
|
|
|
import snoop
|
|
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
|
snoop.install(enabled=DEBUG)
|
|
|
|
# constants
|
|
|
|
P_TMP_MODEL = pathlib.Path("/tmp/model_working")
|
|
P_TMP_INPUT = pathlib.Path("/tmp/input_data")
|
|
|
|
|
|
def get_nef_util_bins():
|
|
bin_nef_util = fconsts.BIN_SET["compiler"]["kneron_nef_utils"]
|
|
bin_kne_util = fconsts.BIN_SET["compiler"]["kneron_kne_utils"]
|
|
pb_nef = pathlib.Path(bin_nef_util).parent
|
|
pb_kne = pathlib.Path(bin_kne_util).parent
|
|
ADD_NEF_UTIL_PATH = f"""export PATH={pb_nef}:{pb_kne}:$PATH"""
|
|
return ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util
|
|
|
|
|
|
ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util = get_nef_util_bins()
|
|
|
|
|
|
###################################################################################
|
|
# get model info from nef + ioinfo.json
|
|
###################################################################################
|
|
def clean_list_nef(list_nef):
|
|
"""Convert to str and make uniq."""
|
|
# convert to str. it may be pathlib obj
|
|
l1 = [str(nef) for nef in list_nef]
|
|
# make unique
|
|
l2 = list(set(l1))
|
|
return " ".join(l2)
|
|
|
|
|
|
def combine_nef(list_nef: list, hw_mode, d_out):
|
|
"""Combine multiple nef into one using nef utils.
|
|
|
|
After combination, the combined.nef will run extra `unpack_nefs()` and
|
|
re-organized with `ioinfo.json` per model. This side-effect is prepared a
|
|
combined `ioinfo.json` for dongle inference.
|
|
|
|
Args:
|
|
list_nef (list): each element is path to nef file.
|
|
hw_mode (int): specify platform.
|
|
d_out (pathlib / str) : where to put `combined.nef` and `ioinfo.json`
|
|
|
|
Returns:
|
|
tuple: multiple info returned:
|
|
|
|
- `p_out`: where is the out folder. usually same as specified.
|
|
- `p_nef`: path of the combined nef
|
|
- `p_ioinfo`: path of the (combined) ioinfo.json, prepared for dongle, not for normal process!
|
|
- `fn_maps`: the combined.nef is unpacked and re-organized in `p_out/unpack`.
|
|
Per-model file mapping is recorded in this.
|
|
Same as `unpack_nefs()` returned.
|
|
|
|
"""
|
|
temp_dir = tempfile.mkdtemp()
|
|
lst = clean_list_nef(list_nef)
|
|
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} --combine_nef \"{lst}\" -O {temp_dir}"
|
|
# currently no -o option working. we need to cpy $temp_dir/models_xxx.nef to fn_out
|
|
cp = futils.run_bash_script(cmd)
|
|
assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}"
|
|
|
|
# check output
|
|
p_temp = pathlib.Path(temp_dir)
|
|
nefs = list(p_temp.glob("models_*.nef"))
|
|
assert len(nefs) == 1, f"combine nef but find {len(nefs)} created: {nefs}"
|
|
|
|
# copy necessary files to p_out
|
|
p_out = pathlib.Path(d_out)
|
|
p_out.mkdir(parents=True, exist_ok=True)
|
|
p_nef = p_out / "combined.nef"
|
|
shutil.copyfile(nefs[0], p_nef)
|
|
|
|
# prepare ioinfo (for convience of dongle)
|
|
dongle_io = {}
|
|
fn_maps, p_dump = unpack_nefs(p_nef, hw_mode)
|
|
for model_id, (p_unpack, ioinfo) in fn_maps.items():
|
|
dongle_io[model_id] = {}
|
|
dongle_io[model_id]["ioinfo_in"] = ioinfo["input"]
|
|
dongle_io[model_id]["ioinfo_out"] = ioinfo["output"]
|
|
p_ioinfo = p_out / "ioinfo.json"
|
|
with open(p_ioinfo, "w") as f:
|
|
json.dump(dongle_io, f, cls=NumpyEncoder)
|
|
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
return p_out, p_nef, p_ioinfo, fn_maps
|
|
|
|
|
|
def guess_available_model_id(p_dump, hw_mode):
|
|
"""Guess model_id from extracted filenames from NEF.
|
|
|
|
NOTE: if the nef is from regression, it will have dfault model_id 32768.
|
|
|
|
Args:
|
|
p_dump (pathlib / str): where the nef was extracted to.
|
|
hw_mode (int): specify the platform.
|
|
|
|
Returns:
|
|
tuple: list of model_id available in givem dump folder.
|
|
"""
|
|
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
|
|
s1 = f"models_{hw_mode}_model_*.kne"
|
|
s2 = rf"models_{hw_mode}_model_(\d+).kne"
|
|
else:
|
|
s1 = "NEF_*modelid_*"
|
|
s2 = r'NEF_.*?_modelid_(\d+)_.*$'
|
|
|
|
p_nefs = pathlib.Path(p_dump).glob(s1)
|
|
p_names = [p.name for p in p_nefs]
|
|
modelids = []
|
|
for name in p_names:
|
|
modelids.extend(re.findall(s2, name))
|
|
ids = tuple(set([int(a) for a in modelids]))
|
|
return ids
|
|
|
|
|
|
def verify_ioinfo(ioinfo, nef_version):
|
|
"""Verify ioinfo got enought quantization info."""
|
|
missing = False
|
|
for k1 in ["input", "output"]:
|
|
if DEBUG:
|
|
print(f"ioinfo got {len(ioinfo[k1])} of {k1}.")
|
|
for i_info, info in enumerate(ioinfo[k1]):
|
|
for k2 in ["name",
|
|
"shape",
|
|
"onnx_shape",
|
|
"ch_dim",
|
|
"radix",
|
|
"scale",
|
|
"bitw",
|
|
"data_format"]:
|
|
if k2 not in info:
|
|
print(f"Error: {k1}/{i_info} is missing {k2}")
|
|
missing = True
|
|
assert not missing
|
|
|
|
|
|
def convert_ioinfo(p_sub, hw_mode):
|
|
"""Load ioinfo from io_raw, then save to ioinfo.json .
|
|
|
|
This is a wrapper function to call correct parser according to hw_mode.
|
|
"""
|
|
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]:
|
|
# 520, or 720 pure bin (obsolete)
|
|
# BUG: only per-layer quantization info
|
|
# BUG: only sim shape. (no onnx shape. no dimension transpose in sim shape though.)
|
|
fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
|
|
ioinfo = parse_setup_json_v0(fn_json_raw)
|
|
nef_version = 0
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
|
|
fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
|
|
ioinfo = parse_setup_json_v1(fn_json_raw)
|
|
nef_version = 1
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
|
|
fn_json_raw = list(p_sub.glob("*.kne.no_binary.json"))[0]
|
|
ioinfo = parse_setup_json_v2(fn_json_raw)
|
|
nef_version = 2
|
|
|
|
verify_ioinfo(ioinfo, nef_version)
|
|
|
|
fn_ioinfo = p_sub / "ioinfo.json"
|
|
with open(fn_ioinfo, "w") as f:
|
|
json.dump(ioinfo, f, cls=NumpyEncoder)
|
|
return ioinfo
|
|
|
|
|
|
def unpack_nefs(p_nef, hw_mode):
|
|
"""Parse nef to get compiler outputs for csim inference.
|
|
|
|
Ref: `ticket #17762`_
|
|
|
|
Args:
|
|
p_nef (pathlib or str): path to the nef file, which may include
|
|
multiple models.
|
|
hw_mode (int): specify the platform (520/530/540/630/720/730/etc),
|
|
because the way to call nef_utils are different.
|
|
|
|
Returns:
|
|
dict-type: example: `{model_id: (p_sub, ioinfo)}`.
|
|
|
|
- The `model_id` is unique for each released model.
|
|
- `p_sub` is where the model for `model_id` is unpacked,
|
|
- the `ioinfo` includes the shape and quantization info of input/output nodes.
|
|
It will be used to convert input data to
|
|
bin file as csim/dongle input.
|
|
|
|
.. _ticket #17762: https://redmine.kneron.tw/issues/17762
|
|
"""
|
|
p_out = pathlib.Path(tempfile.mkdtemp(prefix="nef_unpack_"))
|
|
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]: # 520, or 720 pure bin (obsolete)
|
|
nef_version = 0
|
|
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -p {hw_mode} -O {p_out}"
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]: # 720/530/630 flatbuffer
|
|
nef_version = 1
|
|
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -O {p_out}"
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
|
|
# 1 nef -> 1 kne (incl multiple models)
|
|
nef_version = 2
|
|
cmd = f"""set -e; {ADD_NEF_UTIL_PATH};
|
|
{bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -O {p_out} &&
|
|
pushd {p_out} >> /dev/null &&
|
|
{bin_kne_util} -X NEF_0x*_models_{hw_mode}.kne &&
|
|
for k in `ls models_{hw_mode}_model_*.kne`
|
|
do
|
|
{bin_kne_util} -j ${{k}}
|
|
done
|
|
"""
|
|
else:
|
|
raise NotImplementedError
|
|
# extract nef file
|
|
cp = futils.run_bash_script(cmd)
|
|
if DEBUG:
|
|
print(f"unpack nef (version {nef_version}) to {p_out}")
|
|
print(cp.stderr)
|
|
assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}."
|
|
|
|
# put each model into submodel
|
|
# for 520/720/530/630
|
|
model_ids = guess_available_model_id(p_out, hw_mode)
|
|
fn_maps = {}
|
|
for mid in model_ids:
|
|
p_sub = p_out / f"model_{mid}"
|
|
p_sub.mkdir(parents=True, exist_ok=True)
|
|
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
|
|
cmd = f"mv {p_out}/models_{hw_mode}_model_{mid}.kne* {p_sub}"
|
|
else:
|
|
cmd = f"mv {p_out}/NEF_*_modelid_{mid}_* {p_sub}"
|
|
cp = futils.run_bash_script(cmd)
|
|
assert cp.returncode == 0, f"Failed to move model_{mid} bin files. Return code: {cp.returncode}"
|
|
|
|
p_sub = p_out / f"model_{mid}"
|
|
ioinfo = convert_ioinfo(p_sub, hw_mode)
|
|
|
|
# fn_map = locate_compiler_dump(p_sub, hw_mode, parse_nef=True)
|
|
fn_maps[mid] = (p_sub, ioinfo)
|
|
|
|
return fn_maps, p_out
|
|
|
|
|
|
class NumpyEncoder(json.JSONEncoder):
|
|
"""To save numpy array in json.
|
|
|
|
From `numpy array is not json serializable`_ .
|
|
|
|
.. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
|
|
"""
|
|
|
|
def default(self, obj):
|
|
"""Set default way."""
|
|
if isinstance(obj, np.ndarray):
|
|
return obj.tolist()
|
|
return json.JSONEncoder.default(self, obj)
|
|
|
|
|
|
def parse_setup_json_v0(fn_json):
|
|
"""Parse raw json generated from 520 setup.bin.
|
|
|
|
Necessary info per io node (same for all platform),
|
|
see `verify_ioinfo()`.
|
|
|
|
NOTE:
|
|
- we assume only 1 input for 520 models.
|
|
|
|
Related to flow_utils/get_ioinfo_from_knerex_json
|
|
"""
|
|
with open(fn_json, "r") as f:
|
|
raw = json.load(f)
|
|
|
|
ioinfo = {}
|
|
|
|
def get_in(h):
|
|
v1 = {}
|
|
# NOTE: for 520, the given dimension is always 1CHW
|
|
# There will be no onnx shape in setup.bin.
|
|
# example, [1, 10] will be [1, 10, 1, 1]
|
|
v1["name"] = "0"
|
|
v1["shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
|
|
v1["onnx_shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
|
|
v1["bitw"] = 8 # only support 8bit
|
|
# 520 only support per layer
|
|
v1["radix"] = [h["input_radix"] for i in range(h["input_channel"])]
|
|
v1["scale"] = [1.0 for i in range(h["input_channel"])]
|
|
v1["ch_dim"] = 1
|
|
v1["data_format"] = "RGBA_8BIT" # just guess. to keep same format
|
|
return v1
|
|
|
|
def get_out(i, h):
|
|
d = {}
|
|
# no name saved in 520 setup.bin / nef. so we use index only
|
|
d["name"] = str(i)
|
|
d["shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
|
|
d["onnx_shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
|
|
d["bitw"] = 8 # only support 8bit
|
|
# NOTE: 520 radix/scale are same for all channels
|
|
d["radix"] = [h["output_radix"] for i in range(h["ch_length"])]
|
|
per_channel_scales = futils.intle2flt(h["output_scale"])
|
|
d["scale"] = [per_channel_scales for i in range(h["ch_length"])]
|
|
d["ch_dim"] = 1
|
|
d["data_format"], _ = parse_data_format(520, h["data_format"])
|
|
return d
|
|
|
|
# input. assume only one.
|
|
# sometime the json have headers or header
|
|
if "headers" in raw:
|
|
ioinfo["input"] = [get_in(a) for a in raw["headers"]]
|
|
else:
|
|
assert "header" in raw, "Extracted 520 setup.bin.json have no header nor headers."
|
|
ioinfo["input"] = [get_in(raw["header"])]
|
|
# output. maybe multiple.
|
|
ioinfo["output"] = [get_out(i, d) for i, d in enumerate(raw["outputs"])]
|
|
|
|
return ioinfo
|
|
|
|
|
|
def parse_data_format(hw_mode, fmt):
|
|
"""Convert fmt number to real format.
|
|
|
|
The raw ioinfo from compiler use int to represent hardware data format.
|
|
The data-converter require the input of format in "string".
|
|
This function will take definition from compiler and hard-code here.
|
|
Update if compiler changed.
|
|
|
|
Ref: `ticket #17762`_
|
|
"""
|
|
if hw_mode == 520:
|
|
"""refer to compiler/lib/target/mozart/basic/hw_define.h
|
|
not using this info now.
|
|
"""
|
|
d = {
|
|
-1: ("UNKNOWN", 8),
|
|
8: ("16W1C8B", 8),
|
|
0: ("8W1C16B", 16),
|
|
9: ("BY_COL_8BIT", 8),
|
|
1: ("BY_COL_16BIT", 16),
|
|
10: ("BY_CHNL_8BIT", 8),
|
|
2: ("BY_CHNL_16BIT", 16),
|
|
15: ("CUSTOMIZE", 8),
|
|
16: ("RGBA_8BIT", 8),
|
|
17: ("RGBA_16BIT", 16),
|
|
18: ("SEQ_32BIT", 32),
|
|
100: ("RAW8", 8),
|
|
101: ("RAW16", 16),
|
|
102: ("RAW_FLOAT", 32),
|
|
}
|
|
|
|
return d[int(fmt)]
|
|
elif hw_mode == 720:
|
|
"""refer to compiler/lib/target/beethoven/basic/hw_define.h"""
|
|
ref = {
|
|
-1: ("UNKNOWN", 8),
|
|
0: ("1W16C8B", 8),
|
|
1: ("1W16C8B_INTLV", 8),
|
|
2: ("1W16C8BHL", 16),
|
|
3: ("1W16C8BHL_INTLV", 16),
|
|
4: ("4W4C8B", 8),
|
|
5: ("16W1C8B", 8),
|
|
6: ("8W1C16B", 16),
|
|
7: ("PS_8W1C16B", 16),
|
|
8: ("PS_1W8C16B", 16),
|
|
9: ("PS_1W4C32B", 32),
|
|
11: ("PS_2W4C16B", 16),
|
|
12: ("PS_4W1C32B", 32),
|
|
13: ("PS_1W16C16B", 16),
|
|
14: ("PS_1W8C32B", 32),
|
|
15: ("PS_1W16C32B", 32),
|
|
16: ("PS_4W2C16B", 16),
|
|
17: ("PS_2W4C32B", 32),
|
|
18: ("PS_2W2C32B", 32),
|
|
100: ("RAW8", 8),
|
|
101: ("RAW16", 16),
|
|
102: ("RAW_FLOAT", 32),
|
|
}
|
|
return ref[int(fmt)]
|
|
elif hw_mode in [530, 730, 540, 630]:
|
|
"""
|
|
6730/540/630 refer to compiler/lib/target/wagner/basic/hw_define.h
|
|
|
|
530 see refer to compiler/lib/target/bach/basic/hw_define.h
|
|
but seems same for now
|
|
"""
|
|
ref = {
|
|
-1: ("UNKNOWN", 8),
|
|
0: ("1W16C8B", 8),
|
|
1: ("1W16C8BHL", 15),
|
|
2: ("4W4C8B", 8),
|
|
3: ("4W4C8BHL", 15),
|
|
4: ("16W1C8B", 8),
|
|
5: ("16W1C8BHL", 15),
|
|
6: ("8W1C16B", 16),
|
|
7: ("PS_1W16C24B", 24),
|
|
100: ("RAW8", 8),
|
|
102: ("RAW16", 16),
|
|
103: ("RAW_FLOAT", 32),
|
|
}
|
|
return ref[int(fmt)]
|
|
|
|
|
|
def parse_setup_json_v1(fn_json):
|
|
"""Parse raw json generated from setup.bin (v2 flatbuffer, 530/630/720).
|
|
|
|
Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.
|
|
|
|
Related to flow_utils/get_ioinfo_from_knerex_json
|
|
"""
|
|
with open(fn_json, "r") as f:
|
|
raw = json.load(f)
|
|
|
|
ioinfo = {}
|
|
|
|
def get_platform(j):
|
|
return int(j["header"]["target"].strip("KL"))
|
|
|
|
platform = get_platform(raw)
|
|
|
|
def get_in(h):
|
|
v1 = {}
|
|
v1["name"] = h["name"]
|
|
# from Jay
|
|
# setup.bin 的 Tensor 定義:
|
|
# raw_shape 是 onnx shape
|
|
# shape 是 hw shape
|
|
# ioinfo.json 的定義:
|
|
# onnx_shape 是 onnx_shape
|
|
# shape 是 hw shape
|
|
v1["shape"] = np.array(h["shape"])
|
|
v1["onnx_shape"] = np.array(h["raw_shape"])
|
|
# TODO: is this true? always second?
|
|
v1["ch_dim"] = 1
|
|
|
|
v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])
|
|
|
|
# for per channel radix/scale
|
|
n_ch = v1["onnx_shape"][v1["ch_dim"]]
|
|
for k in ["radix", "scale"]:
|
|
t = [a[k] for a in h["quantization"]["fxp_info"]]
|
|
if len(t) == n_ch: # per channel given
|
|
v1[k] = np.array(t)
|
|
else: # per layer given. need expand
|
|
assert (
|
|
len(t) == 1
|
|
), f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
|
|
v1[k] = np.array([t[0] for i in range(n_ch)])
|
|
return v1
|
|
|
|
# input. maybe multiple
|
|
ioinfo["input"] = [get_in(d) for d in raw["inputs"]]
|
|
# output. maybe multiple.
|
|
ioinfo["output"] = [get_in(d) for d in raw["outputs"]]
|
|
|
|
return ioinfo
|
|
|
|
|
|
def parse_setup_json_v2(fn_json):
|
|
"""Parse raw json generated from kne (540/730).
|
|
|
|
Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.
|
|
|
|
Ref: `ticket #17762`_
|
|
|
|
Related to flow_utils/get_ioinfo_from_knerex_json
|
|
"""
|
|
with open(fn_json, "r") as f:
|
|
raw = json.load(f)
|
|
|
|
ioinfo = {}
|
|
|
|
def get_platform(j):
|
|
return int(j["header"]["target"].strip("KL"))
|
|
|
|
platform = get_platform(raw)
|
|
|
|
def parse_ch_dim(lst):
|
|
"""Input lst should be a list of 4 elements: [b, c, h, w]."""
|
|
if lst[0] == -1:
|
|
# when list is [-1. -1, -1, -1]
|
|
return 1
|
|
else:
|
|
# there should be no -1 in the list
|
|
assert lst[1] != -1
|
|
return lst[1]
|
|
|
|
def get_in(h):
|
|
v1 = {}
|
|
v1["name"] = h["name"]
|
|
v1["ndim"] = h["ndim"]
|
|
v1["shape"] = np.array(h["shape"])
|
|
# need to combine shape and inv_shape_intrp_dim to get real onnx_shape.
|
|
# see #18456
|
|
v1["onnx_shape"] = np.array([v1["shape"][a] for a in h["inv_shape_intrp_dim"]])
|
|
v1["ch_dim"] = parse_ch_dim(h["shape_intrp_dim"])
|
|
|
|
v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])
|
|
|
|
# for per channel radix
|
|
n_ch = v1["shape"][1]
|
|
k = "radix"
|
|
t = h["quantization"][k]
|
|
if len(t) == n_ch: # per channel given
|
|
v1[k] = np.array(t)
|
|
else: # per layer given. need expand
|
|
assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
|
|
v1[k] = np.array([t[0] for i in range(n_ch)])
|
|
|
|
# scale
|
|
k = "scale"
|
|
scale_le = h["quantization"]["scale"]
|
|
scale_le_n = h["quantization"]["scale_count"]
|
|
t = futils.array_le2flt(scale_le, scale_le_n)
|
|
if len(t) == n_ch: # per channel given
|
|
v1[k] = np.array(t)
|
|
else: # per layer given. need expand
|
|
assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
|
|
v1[k] = np.array([t[0] for i in range(n_ch)])
|
|
|
|
return v1
|
|
|
|
ioinfo["input"] = [get_in(d) for d in raw["models"][0]["header"]["inputs"]]
|
|
ioinfo["output"] = [get_in(d) for d in raw["models"][0]["header"]["outputs"]]
|
|
|
|
return ioinfo
|
|
|
|
|
|
@lru_cache(maxsize=128)
|
|
def locate_compiler_dump(p_out, hw_mode, parse_nef=False):
|
|
"""Locate important files in compiler dump folder.
|
|
|
|
Each platform has it's own required files to run csim.
|
|
Some names may change, e.g., test.conf/apb.npu,
|
|
but they serve same purpose.
|
|
|
|
This function is to find correponding file and return
|
|
organized as dict, so that each call will always get correct file
|
|
independant of hw_mode.
|
|
"""
|
|
p_out = pathlib.Path(p_out)
|
|
if hw_mode in [520]:
|
|
patterns = {
|
|
"setup_bin": "*setup.bin",
|
|
"command_bin": "*command.bin",
|
|
"weight_bin": "*weight.bin",
|
|
"apb_npu": "*test.conf", # diff
|
|
}
|
|
if parse_nef:
|
|
# HACK
|
|
# unpack_nefs will genearte ioinfo.json for 520
|
|
patterns["ioinfo_json"] = "*ioinfo.json" # diff
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
|
|
patterns = {
|
|
"setup_bin": "*setup.bin",
|
|
"command_bin": "*command.bin",
|
|
"weight_bin": "*weight.bin",
|
|
"ioinfo_json": "*ioinfo.json",
|
|
"apb_npu": "*apb.npu",
|
|
}
|
|
if hw_mode in [720]: # diff for 520/720
|
|
patterns["apb_npu"] = "*test.conf"
|
|
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:
|
|
patterns = {
|
|
"kne": f"*models_{hw_mode}*.kne",
|
|
"ioinfo_json": "*ioinfo.json",
|
|
}
|
|
else:
|
|
raise NotImplementedError
|
|
|
|
fn_map = {}
|
|
for k, v in patterns.items():
|
|
ps = list(p_out.glob(v))
|
|
assert len(ps) >= 1, f"Looking for {k}, expect 1, but found {len(ps)}: {ps}"
|
|
fn_map[k] = ps[0]
|
|
|
|
return fn_map
|
|
|
|
|
|
def load_ioinfo_json(fn_ioinfo):
|
|
"""Load compiler generated ioinfo.json then apply special process.
|
|
|
|
Convert `shape`/`onnx_shape`/`radix`/`scale` to numpy array for
|
|
better process later.
|
|
|
|
NOTE:
|
|
No ioinfo.json for 520.
|
|
"""
|
|
with open(fn_ioinfo, "r") as f:
|
|
ioinfo = json.load(f)
|
|
|
|
for io in ["input", "output"]:
|
|
for a in ioinfo[io]:
|
|
a["name"] = futils.clean_name(a["name"])
|
|
for k in ["onnx_shape", "scale", "radix", "shape"]:
|
|
a[k] = np.array(a[k])
|
|
return ioinfo
|
|
|
|
|
|
def collect_fps_improve(dir_out):
|
|
"""Load the fps improved from ip_evaluator reports.
|
|
|
|
The reults will be compiled with other analysis and put in the final report.
|
|
|
|
Args:
|
|
`dir_out`: the output folder of compiler.
|
|
"""
|
|
p_f = pathlib.Path(dir_out) / "summary_image_cut_search.txt"
|
|
if not p_f.exists():
|
|
return None
|
|
|
|
with open(p_f, "r") as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
if line.startswith("fps improve: "):
|
|
return line.strip().replace("fps improve: ", "")
|
|
return None
|
|
|
|
|
|
def get_cpu_node_op_type(dir_out):
|
|
cpu_node_list_str = set()
|
|
|
|
setup_dir = "{}/{}".format(dir_out, "setup.txt")
|
|
|
|
def extract_cpu_op_type(txt):
|
|
s = re.compile('"(op_type|opcode_index)": *"(.*?)"')
|
|
return s.findall(txt)[0][1]
|
|
|
|
try:
|
|
with open(setup_dir, "r") as f:
|
|
lines = f.readlines()
|
|
for line in lines:
|
|
# new setup.txt(opcode_index)
|
|
# old setup.txt(op_type)
|
|
if "op_type" in line or "opcode_index" in line:
|
|
cpu_node_str = extract_cpu_op_type(str(line))
|
|
if cpu_node_str == "CpuFusion":
|
|
continue
|
|
cpu_node_list_str.add(cpu_node_str)
|
|
if len(cpu_node_list_str) == 0:
|
|
return "None"
|
|
else:
|
|
return ",".join(cpu_node_list_str)
|
|
except:
|
|
# print("No setup.txt found.")
|
|
return "N/A"
|
|
|
|
|
|
def collect_command_weight_size(dir_out):
|
|
cmd_size = None
|
|
weight_size = None
|
|
stats_dir = "{}/{}".format(dir_out, "dbg.stat.json")
|
|
try:
|
|
with open(stats_dir, "r") as f:
|
|
stats = json.load(f)
|
|
cmd_size = int(stats["general"]["cmd_size"] / (10**3))
|
|
weight_size = int(stats["general"]["wt_size"] / (10**6))
|
|
except:
|
|
pass
|
|
return cmd_size, weight_size
|
|
|
|
|
|
def find_cpu_nodes(lines):
|
|
nodes = []
|
|
found = False
|
|
for line in lines:
|
|
if line.startswith("***** Warning: CPU ops types"):
|
|
found = True
|
|
continue
|
|
if found:
|
|
clean = line.strip().strip(",")
|
|
if len(clean) > 4:
|
|
nodes.append(clean)
|
|
else:
|
|
found = False
|
|
if len(nodes) > 0:
|
|
return "//".join(nodes)
|
|
else:
|
|
return "N/A"
|
|
|
|
|
|
|
|
def collect_FPS(dir_out, hw_mode):
|
|
"""Collect FPS info from compiler output folder.
|
|
|
|
WARNING:
|
|
- Tiefang will make report same for ALL platforms.
|
|
- will all be named as `ProfileResult.txt`
|
|
"""
|
|
profile_dir = f"{dir_out}/ProfileResult.txt"
|
|
|
|
d_profile = OrderedDict()
|
|
|
|
def search_by_prefix(lines, k):
|
|
for line in lines:
|
|
if line.startswith(k):
|
|
return line.lstrip(k).strip()
|
|
return None
|
|
|
|
def gb2mb(line):
|
|
return float(line.strip("GB"))*1000
|
|
|
|
def convert2int(s):
|
|
if s == "inf" or s is None:
|
|
return None
|
|
return int(float(s))
|
|
try:
|
|
with open(profile_dir, "r") as f:
|
|
lines = f.readlines()
|
|
|
|
# load fps
|
|
if hw_mode == 520:
|
|
d_profile["fps"] = search_by_prefix(lines, "output_fps =")
|
|
d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
|
|
d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
|
|
d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
|
|
d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
|
|
d_profile["cpu_node"] = find_cpu_nodes(lines)
|
|
# d_profile[f"RV(mb)"] = search_by_prefix(lines, "output_total_data_move_in_amount =")
|
|
# d_profile[f"WV(mb)"] = search_by_prefix(lines, "output_total_data_move_out_amount =")
|
|
else:
|
|
d_profile["fps"] = search_by_prefix(lines, "output_fps =")
|
|
d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
|
|
d_profile["C(GOPs)"] = search_by_prefix(lines, "output_total_theory_mac =")
|
|
d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
|
|
d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
|
|
d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
|
|
d_profile["RV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_RDMA_amount ="))
|
|
d_profile["WV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_WDMA_amount ="))
|
|
d_profile["cpu_node"] = find_cpu_nodes(lines)
|
|
|
|
except:
|
|
# print("No {} found.".format(profile_dir))
|
|
pass
|
|
|
|
# filter None items
|
|
d_prof = OrderedDict()
|
|
for k, v in d_profile.items():
|
|
if v: # not None
|
|
d_prof[k] = v
|
|
return d_prof
|