2026-01-28 06:16:04 +00:00

755 lines
25 KiB
Python

#! /usr/bin/env python3
import os
import pathlib
import tempfile
import shutil
import re
import json
from collections import OrderedDict
from functools import lru_cache
import numpy as np
import sys_flow.flow_constants as fconsts
import sys_flow.flow_utils as futils
import snoop
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
snoop.install(enabled=DEBUG)
# constants
P_TMP_MODEL = pathlib.Path("/tmp/model_working")
P_TMP_INPUT = pathlib.Path("/tmp/input_data")
def get_nef_util_bins():
bin_nef_util = fconsts.BIN_SET["compiler"]["kneron_nef_utils"]
bin_kne_util = fconsts.BIN_SET["compiler"]["kneron_kne_utils"]
pb_nef = pathlib.Path(bin_nef_util).parent
pb_kne = pathlib.Path(bin_kne_util).parent
ADD_NEF_UTIL_PATH = f"""export PATH={pb_nef}:{pb_kne}:$PATH"""
return ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util
ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util = get_nef_util_bins()
###################################################################################
# get model info from nef + ioinfo.json
###################################################################################
def clean_list_nef(list_nef):
"""Convert to str and make uniq."""
# convert to str. it may be pathlib obj
l1 = [str(nef) for nef in list_nef]
# make unique
l2 = list(set(l1))
return " ".join(l2)
def combine_nef(list_nef: list, hw_mode, d_out):
"""Combine multiple nef into one using nef utils.
After combination, the combined.nef will run extra `unpack_nefs()` and
re-organized with `ioinfo.json` per model. This side-effect is prepared a
combined `ioinfo.json` for dongle inference.
Args:
list_nef (list): each element is path to nef file.
hw_mode (int): specify platform.
d_out (pathlib / str) : where to put `combined.nef` and `ioinfo.json`
Returns:
tuple: multiple info returned:
- `p_out`: where is the out folder. usually same as specified.
- `p_nef`: path of the combined nef
- `p_ioinfo`: path of the (combined) ioinfo.json, prepared for dongle, not for normal process!
- `fn_maps`: the combined.nef is unpacked and re-organized in `p_out/unpack`.
Per-model file mapping is recorded in this.
Same as `unpack_nefs()` returned.
"""
temp_dir = tempfile.mkdtemp()
lst = clean_list_nef(list_nef)
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} --combine_nef \"{lst}\" -O {temp_dir}"
# currently no -o option working. we need to cpy $temp_dir/models_xxx.nef to fn_out
cp = futils.run_bash_script(cmd)
assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}"
# check output
p_temp = pathlib.Path(temp_dir)
nefs = list(p_temp.glob("models_*.nef"))
assert len(nefs) == 1, f"combine nef but find {len(nefs)} created: {nefs}"
# copy necessary files to p_out
p_out = pathlib.Path(d_out)
p_out.mkdir(parents=True, exist_ok=True)
p_nef = p_out / "combined.nef"
shutil.copyfile(nefs[0], p_nef)
# prepare ioinfo (for convience of dongle)
dongle_io = {}
fn_maps, p_dump = unpack_nefs(p_nef, hw_mode)
for model_id, (p_unpack, ioinfo) in fn_maps.items():
dongle_io[model_id] = {}
dongle_io[model_id]["ioinfo_in"] = ioinfo["input"]
dongle_io[model_id]["ioinfo_out"] = ioinfo["output"]
p_ioinfo = p_out / "ioinfo.json"
with open(p_ioinfo, "w") as f:
json.dump(dongle_io, f, cls=NumpyEncoder)
shutil.rmtree(temp_dir, ignore_errors=True)
return p_out, p_nef, p_ioinfo, fn_maps
def guess_available_model_id(p_dump, hw_mode):
"""Guess model_id from extracted filenames from NEF.
NOTE: if the nef is from regression, it will have dfault model_id 32768.
Args:
p_dump (pathlib / str): where the nef was extracted to.
hw_mode (int): specify the platform.
Returns:
tuple: list of model_id available in givem dump folder.
"""
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
s1 = f"models_{hw_mode}_model_*.kne"
s2 = rf"models_{hw_mode}_model_(\d+).kne"
else:
s1 = "NEF_*modelid_*"
s2 = r'NEF_.*?_modelid_(\d+)_.*$'
p_nefs = pathlib.Path(p_dump).glob(s1)
p_names = [p.name for p in p_nefs]
modelids = []
for name in p_names:
modelids.extend(re.findall(s2, name))
ids = tuple(set([int(a) for a in modelids]))
return ids
def verify_ioinfo(ioinfo, nef_version):
"""Verify ioinfo got enought quantization info."""
missing = False
for k1 in ["input", "output"]:
if DEBUG:
print(f"ioinfo got {len(ioinfo[k1])} of {k1}.")
for i_info, info in enumerate(ioinfo[k1]):
for k2 in ["name",
"shape",
"onnx_shape",
"ch_dim",
"radix",
"scale",
"bitw",
"data_format"]:
if k2 not in info:
print(f"Error: {k1}/{i_info} is missing {k2}")
missing = True
assert not missing
def convert_ioinfo(p_sub, hw_mode):
"""Load ioinfo from io_raw, then save to ioinfo.json .
This is a wrapper function to call correct parser according to hw_mode.
"""
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]:
# 520, or 720 pure bin (obsolete)
# BUG: only per-layer quantization info
# BUG: only sim shape. (no onnx shape. no dimension transpose in sim shape though.)
fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
ioinfo = parse_setup_json_v0(fn_json_raw)
nef_version = 0
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
ioinfo = parse_setup_json_v1(fn_json_raw)
nef_version = 1
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
fn_json_raw = list(p_sub.glob("*.kne.no_binary.json"))[0]
ioinfo = parse_setup_json_v2(fn_json_raw)
nef_version = 2
verify_ioinfo(ioinfo, nef_version)
fn_ioinfo = p_sub / "ioinfo.json"
with open(fn_ioinfo, "w") as f:
json.dump(ioinfo, f, cls=NumpyEncoder)
return ioinfo
def unpack_nefs(p_nef, hw_mode):
"""Parse nef to get compiler outputs for csim inference.
Ref: `ticket #17762`_
Args:
p_nef (pathlib or str): path to the nef file, which may include
multiple models.
hw_mode (int): specify the platform (520/530/540/630/720/730/etc),
because the way to call nef_utils are different.
Returns:
dict-type: example: `{model_id: (p_sub, ioinfo)}`.
- The `model_id` is unique for each released model.
- `p_sub` is where the model for `model_id` is unpacked,
- the `ioinfo` includes the shape and quantization info of input/output nodes.
It will be used to convert input data to
bin file as csim/dongle input.
.. _ticket #17762: https://redmine.kneron.tw/issues/17762
"""
p_out = pathlib.Path(tempfile.mkdtemp(prefix="nef_unpack_"))
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]: # 520, or 720 pure bin (obsolete)
nef_version = 0
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -p {hw_mode} -O {p_out}"
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]: # 720/530/630 flatbuffer
nef_version = 1
cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -O {p_out}"
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
# 1 nef -> 1 kne (incl multiple models)
nef_version = 2
cmd = f"""set -e; {ADD_NEF_UTIL_PATH};
{bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -O {p_out} &&
pushd {p_out} >> /dev/null &&
{bin_kne_util} -X NEF_0x*_models_{hw_mode}.kne &&
for k in `ls models_{hw_mode}_model_*.kne`
do
{bin_kne_util} -j ${{k}}
done
"""
else:
raise NotImplementedError
# extract nef file
cp = futils.run_bash_script(cmd)
if DEBUG:
print(f"unpack nef (version {nef_version}) to {p_out}")
print(cp.stderr)
assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}."
# put each model into submodel
# for 520/720/530/630
model_ids = guess_available_model_id(p_out, hw_mode)
fn_maps = {}
for mid in model_ids:
p_sub = p_out / f"model_{mid}"
p_sub.mkdir(parents=True, exist_ok=True)
if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]: # 540/730
cmd = f"mv {p_out}/models_{hw_mode}_model_{mid}.kne* {p_sub}"
else:
cmd = f"mv {p_out}/NEF_*_modelid_{mid}_* {p_sub}"
cp = futils.run_bash_script(cmd)
assert cp.returncode == 0, f"Failed to move model_{mid} bin files. Return code: {cp.returncode}"
p_sub = p_out / f"model_{mid}"
ioinfo = convert_ioinfo(p_sub, hw_mode)
# fn_map = locate_compiler_dump(p_sub, hw_mode, parse_nef=True)
fn_maps[mid] = (p_sub, ioinfo)
return fn_maps, p_out
class NumpyEncoder(json.JSONEncoder):
"""To save numpy array in json.
From `numpy array is not json serializable`_ .
.. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
"""
def default(self, obj):
"""Set default way."""
if isinstance(obj, np.ndarray):
return obj.tolist()
return json.JSONEncoder.default(self, obj)
def parse_setup_json_v0(fn_json):
"""Parse raw json generated from 520 setup.bin.
Necessary info per io node (same for all platform),
see `verify_ioinfo()`.
NOTE:
- we assume only 1 input for 520 models.
Related to flow_utils/get_ioinfo_from_knerex_json
"""
with open(fn_json, "r") as f:
raw = json.load(f)
ioinfo = {}
def get_in(h):
v1 = {}
# NOTE: for 520, the given dimension is always 1CHW
# There will be no onnx shape in setup.bin.
# example, [1, 10] will be [1, 10, 1, 1]
v1["name"] = "0"
v1["shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
v1["onnx_shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
v1["bitw"] = 8 # only support 8bit
# 520 only support per layer
v1["radix"] = [h["input_radix"] for i in range(h["input_channel"])]
v1["scale"] = [1.0 for i in range(h["input_channel"])]
v1["ch_dim"] = 1
v1["data_format"] = "RGBA_8BIT" # just guess. to keep same format
return v1
def get_out(i, h):
d = {}
# no name saved in 520 setup.bin / nef. so we use index only
d["name"] = str(i)
d["shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
d["onnx_shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
d["bitw"] = 8 # only support 8bit
# NOTE: 520 radix/scale are same for all channels
d["radix"] = [h["output_radix"] for i in range(h["ch_length"])]
per_channel_scales = futils.intle2flt(h["output_scale"])
d["scale"] = [per_channel_scales for i in range(h["ch_length"])]
d["ch_dim"] = 1
d["data_format"], _ = parse_data_format(520, h["data_format"])
return d
# input. assume only one.
# sometime the json have headers or header
if "headers" in raw:
ioinfo["input"] = [get_in(a) for a in raw["headers"]]
else:
assert "header" in raw, "Extracted 520 setup.bin.json have no header nor headers."
ioinfo["input"] = [get_in(raw["header"])]
# output. maybe multiple.
ioinfo["output"] = [get_out(i, d) for i, d in enumerate(raw["outputs"])]
return ioinfo
def parse_data_format(hw_mode, fmt):
"""Convert fmt number to real format.
The raw ioinfo from compiler use int to represent hardware data format.
The data-converter require the input of format in "string".
This function will take definition from compiler and hard-code here.
Update if compiler changed.
Ref: `ticket #17762`_
"""
if hw_mode == 520:
"""refer to compiler/lib/target/mozart/basic/hw_define.h
not using this info now.
"""
d = {
-1: ("UNKNOWN", 8),
8: ("16W1C8B", 8),
0: ("8W1C16B", 16),
9: ("BY_COL_8BIT", 8),
1: ("BY_COL_16BIT", 16),
10: ("BY_CHNL_8BIT", 8),
2: ("BY_CHNL_16BIT", 16),
15: ("CUSTOMIZE", 8),
16: ("RGBA_8BIT", 8),
17: ("RGBA_16BIT", 16),
18: ("SEQ_32BIT", 32),
100: ("RAW8", 8),
101: ("RAW16", 16),
102: ("RAW_FLOAT", 32),
}
return d[int(fmt)]
elif hw_mode == 720:
"""refer to compiler/lib/target/beethoven/basic/hw_define.h"""
ref = {
-1: ("UNKNOWN", 8),
0: ("1W16C8B", 8),
1: ("1W16C8B_INTLV", 8),
2: ("1W16C8BHL", 16),
3: ("1W16C8BHL_INTLV", 16),
4: ("4W4C8B", 8),
5: ("16W1C8B", 8),
6: ("8W1C16B", 16),
7: ("PS_8W1C16B", 16),
8: ("PS_1W8C16B", 16),
9: ("PS_1W4C32B", 32),
11: ("PS_2W4C16B", 16),
12: ("PS_4W1C32B", 32),
13: ("PS_1W16C16B", 16),
14: ("PS_1W8C32B", 32),
15: ("PS_1W16C32B", 32),
16: ("PS_4W2C16B", 16),
17: ("PS_2W4C32B", 32),
18: ("PS_2W2C32B", 32),
100: ("RAW8", 8),
101: ("RAW16", 16),
102: ("RAW_FLOAT", 32),
}
return ref[int(fmt)]
elif hw_mode in [530, 730, 540, 630]:
"""
6730/540/630 refer to compiler/lib/target/wagner/basic/hw_define.h
530 see refer to compiler/lib/target/bach/basic/hw_define.h
but seems same for now
"""
ref = {
-1: ("UNKNOWN", 8),
0: ("1W16C8B", 8),
1: ("1W16C8BHL", 15),
2: ("4W4C8B", 8),
3: ("4W4C8BHL", 15),
4: ("16W1C8B", 8),
5: ("16W1C8BHL", 15),
6: ("8W1C16B", 16),
7: ("PS_1W16C24B", 24),
100: ("RAW8", 8),
102: ("RAW16", 16),
103: ("RAW_FLOAT", 32),
}
return ref[int(fmt)]
def parse_setup_json_v1(fn_json):
"""Parse raw json generated from setup.bin (v2 flatbuffer, 530/630/720).
Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.
Related to flow_utils/get_ioinfo_from_knerex_json
"""
with open(fn_json, "r") as f:
raw = json.load(f)
ioinfo = {}
def get_platform(j):
return int(j["header"]["target"].strip("KL"))
platform = get_platform(raw)
def get_in(h):
v1 = {}
v1["name"] = h["name"]
# from Jay
# setup.bin 的 Tensor 定義:
# raw_shape 是 onnx shape
# shape 是 hw shape
# ioinfo.json 的定義:
# onnx_shape 是 onnx_shape
# shape 是 hw shape
v1["shape"] = np.array(h["shape"])
v1["onnx_shape"] = np.array(h["raw_shape"])
# TODO: is this true? always second?
v1["ch_dim"] = 1
v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])
# for per channel radix/scale
n_ch = v1["onnx_shape"][v1["ch_dim"]]
for k in ["radix", "scale"]:
t = [a[k] for a in h["quantization"]["fxp_info"]]
if len(t) == n_ch: # per channel given
v1[k] = np.array(t)
else: # per layer given. need expand
assert (
len(t) == 1
), f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
v1[k] = np.array([t[0] for i in range(n_ch)])
return v1
# input. maybe multiple
ioinfo["input"] = [get_in(d) for d in raw["inputs"]]
# output. maybe multiple.
ioinfo["output"] = [get_in(d) for d in raw["outputs"]]
return ioinfo
def parse_setup_json_v2(fn_json):
"""Parse raw json generated from kne (540/730).
Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.
Ref: `ticket #17762`_
Related to flow_utils/get_ioinfo_from_knerex_json
"""
with open(fn_json, "r") as f:
raw = json.load(f)
ioinfo = {}
def get_platform(j):
return int(j["header"]["target"].strip("KL"))
platform = get_platform(raw)
def parse_ch_dim(lst):
"""Input lst should be a list of 4 elements: [b, c, h, w]."""
if lst[0] == -1:
# when list is [-1. -1, -1, -1]
return 1
else:
# there should be no -1 in the list
assert lst[1] != -1
return lst[1]
def get_in(h):
v1 = {}
v1["name"] = h["name"]
v1["ndim"] = h["ndim"]
v1["shape"] = np.array(h["shape"])
# need to combine shape and inv_shape_intrp_dim to get real onnx_shape.
# see #18456
v1["onnx_shape"] = np.array([v1["shape"][a] for a in h["inv_shape_intrp_dim"]])
v1["ch_dim"] = parse_ch_dim(h["shape_intrp_dim"])
v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])
# for per channel radix
n_ch = v1["shape"][1]
k = "radix"
t = h["quantization"][k]
if len(t) == n_ch: # per channel given
v1[k] = np.array(t)
else: # per layer given. need expand
assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
v1[k] = np.array([t[0] for i in range(n_ch)])
# scale
k = "scale"
scale_le = h["quantization"]["scale"]
scale_le_n = h["quantization"]["scale_count"]
t = futils.array_le2flt(scale_le, scale_le_n)
if len(t) == n_ch: # per channel given
v1[k] = np.array(t)
else: # per layer given. need expand
assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
v1[k] = np.array([t[0] for i in range(n_ch)])
return v1
ioinfo["input"] = [get_in(d) for d in raw["models"][0]["header"]["inputs"]]
ioinfo["output"] = [get_in(d) for d in raw["models"][0]["header"]["outputs"]]
return ioinfo
@lru_cache(maxsize=128)
def locate_compiler_dump(p_out, hw_mode, parse_nef=False):
"""Locate important files in compiler dump folder.
Each platform has it's own required files to run csim.
Some names may change, e.g., test.conf/apb.npu,
but they serve same purpose.
This function is to find correponding file and return
organized as dict, so that each call will always get correct file
independant of hw_mode.
"""
p_out = pathlib.Path(p_out)
if hw_mode in [520]:
patterns = {
"setup_bin": "*setup.bin",
"command_bin": "*command.bin",
"weight_bin": "*weight.bin",
"apb_npu": "*test.conf", # diff
}
if parse_nef:
# HACK
# unpack_nefs will genearte ioinfo.json for 520
patterns["ioinfo_json"] = "*ioinfo.json" # diff
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
patterns = {
"setup_bin": "*setup.bin",
"command_bin": "*command.bin",
"weight_bin": "*weight.bin",
"ioinfo_json": "*ioinfo.json",
"apb_npu": "*apb.npu",
}
if hw_mode in [720]: # diff for 520/720
patterns["apb_npu"] = "*test.conf"
elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:
patterns = {
"kne": f"*models_{hw_mode}*.kne",
"ioinfo_json": "*ioinfo.json",
}
else:
raise NotImplementedError
fn_map = {}
for k, v in patterns.items():
ps = list(p_out.glob(v))
assert len(ps) >= 1, f"Looking for {k}, expect 1, but found {len(ps)}: {ps}"
fn_map[k] = ps[0]
return fn_map
def load_ioinfo_json(fn_ioinfo):
"""Load compiler generated ioinfo.json then apply special process.
Convert `shape`/`onnx_shape`/`radix`/`scale` to numpy array for
better process later.
NOTE:
No ioinfo.json for 520.
"""
with open(fn_ioinfo, "r") as f:
ioinfo = json.load(f)
for io in ["input", "output"]:
for a in ioinfo[io]:
a["name"] = futils.clean_name(a["name"])
for k in ["onnx_shape", "scale", "radix", "shape"]:
a[k] = np.array(a[k])
return ioinfo
def collect_fps_improve(dir_out):
"""Load the fps improved from ip_evaluator reports.
The reults will be compiled with other analysis and put in the final report.
Args:
`dir_out`: the output folder of compiler.
"""
p_f = pathlib.Path(dir_out) / "summary_image_cut_search.txt"
if not p_f.exists():
return None
with open(p_f, "r") as f:
lines = f.readlines()
for line in lines:
if line.startswith("fps improve: "):
return line.strip().replace("fps improve: ", "")
return None
def get_cpu_node_op_type(dir_out):
cpu_node_list_str = set()
setup_dir = "{}/{}".format(dir_out, "setup.txt")
def extract_cpu_op_type(txt):
s = re.compile('"(op_type|opcode_index)": *"(.*?)"')
return s.findall(txt)[0][1]
try:
with open(setup_dir, "r") as f:
lines = f.readlines()
for line in lines:
# new setup.txt(opcode_index)
# old setup.txt(op_type)
if "op_type" in line or "opcode_index" in line:
cpu_node_str = extract_cpu_op_type(str(line))
if cpu_node_str == "CpuFusion":
continue
cpu_node_list_str.add(cpu_node_str)
if len(cpu_node_list_str) == 0:
return "None"
else:
return ",".join(cpu_node_list_str)
except:
# print("No setup.txt found.")
return "N/A"
def collect_command_weight_size(dir_out):
cmd_size = None
weight_size = None
stats_dir = "{}/{}".format(dir_out, "dbg.stat.json")
try:
with open(stats_dir, "r") as f:
stats = json.load(f)
cmd_size = int(stats["general"]["cmd_size"] / (10**3))
weight_size = int(stats["general"]["wt_size"] / (10**6))
except:
pass
return cmd_size, weight_size
def find_cpu_nodes(lines):
nodes = []
found = False
for line in lines:
if line.startswith("***** Warning: CPU ops types"):
found = True
continue
if found:
clean = line.strip().strip(",")
if len(clean) > 4:
nodes.append(clean)
else:
found = False
if len(nodes) > 0:
return "//".join(nodes)
else:
return "N/A"
def collect_FPS(dir_out, hw_mode):
"""Collect FPS info from compiler output folder.
WARNING:
- Tiefang will make report same for ALL platforms.
- will all be named as `ProfileResult.txt`
"""
profile_dir = f"{dir_out}/ProfileResult.txt"
d_profile = OrderedDict()
def search_by_prefix(lines, k):
for line in lines:
if line.startswith(k):
return line.lstrip(k).strip()
return None
def gb2mb(line):
return float(line.strip("GB"))*1000
def convert2int(s):
if s == "inf" or s is None:
return None
return int(float(s))
try:
with open(profile_dir, "r") as f:
lines = f.readlines()
# load fps
if hw_mode == 520:
d_profile["fps"] = search_by_prefix(lines, "output_fps =")
d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
d_profile["cpu_node"] = find_cpu_nodes(lines)
# d_profile[f"RV(mb)"] = search_by_prefix(lines, "output_total_data_move_in_amount =")
# d_profile[f"WV(mb)"] = search_by_prefix(lines, "output_total_data_move_out_amount =")
else:
d_profile["fps"] = search_by_prefix(lines, "output_fps =")
d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
d_profile["C(GOPs)"] = search_by_prefix(lines, "output_total_theory_mac =")
d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
d_profile["RV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_RDMA_amount ="))
d_profile["WV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_WDMA_amount ="))
d_profile["cpu_node"] = find_cpu_nodes(lines)
except:
# print("No {} found.".format(profile_dir))
pass
# filter None items
d_prof = OrderedDict()
for k, v in d_profile.items():
if v: # not None
d_prof[k] = v
return d_prof