2026-01-28 06:16:04 +00:00

1436 lines
43 KiB
Python

#! /usr/bin/env python3
import os
import sys
import subprocess
import logging
import pathlib
import json
import contextlib
import re
import zipfile
import lzma
import pickle
import math
import struct
import hashlib
import shutil
import collections.abc
from collections import defaultdict
import string
import secrets
import tempfile
import itertools
import numpy as np
import pandas as pd
from sys_flow.flow_constants import MODE_HARDWARE
from sys_flow.onnx_op_stats import onnx_info
from sys_flow.util_lib import load_zip_jsons
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
import snoop
snoop.install(enabled=DEBUG)
if DEBUG:
from IPython.terminal import embed as emb
terminal = emb.InteractiveShellEmbed()
terminal.extension_manager.load_extension("autoreload")
terminal.run_line_magic("autoreload", "2")
embed = terminal.mainloop
else:
embed = lambda: None
# functions on loading text file
def twos_comp(val, bits):
"""Compute the 2's complement of int value val."""
# if sign bit is set e.g., 8bit: 128-255
if (val & (1 << (bits - 1))) != 0:
# compute negative value
val = (1 << bits) + val
# return positive value as is
return val
def array_le2flt(arr, n_flt: int, n_byte: int = 4):
"""Convert arry of 4 elements of unsigned integer (little endian) to float.
INPUT:
* n_flt: how many float number to extract
* n_byte: how many byte to convert to 1 float.
Used to convert scale to float.
"""
return struct.unpack(f"{n_flt}f", struct.pack(f"<{n_flt*n_byte}B", *arr))
def intle2flt(i):
packed = struct.pack('<I', i)
return struct.unpack('<f', packed)[0]
array_intle2flt = np.vectorize(intle2flt)
def load_txt(filename):
"""Load dynasty dump text (float data) as flattened data."""
return np.array(pd.read_csv(filename, names=["fx"])["fx"], dtype=np.float32)
def txt2np_fl(filename, shape):
"""Load dynasty dumped text (float data) into numpy with given shape."""
return np.array(
pd.read_csv(filename, names=["data"])["data"], dtype=np.float32
).reshape(shape)
def txt2np_fx(filename, shape):
"""Load dynasty dumped text (fix point data) into numpy with given shape."""
return np.array(
pd.read_csv(filename, names=["data"])["data"], dtype=np.int32
).reshape(shape)
def df2pkl(df, fn):
"""Dump python object to a lzma compressed pickle file.
fn is suggested to end with .pkl.xz
"""
with lzma.open(fn, 'wb') as f:
pickle.dump(df, f)
def pkl2df(fn):
"""Load python object from a lzma compressed pickle file."""
with lzma.open(fn, 'rb') as f:
df = pickle.load(f)
return df
def dir2pkl(p_dir):
p_input = pathlib.Path(p_dir)
p_xz_s = list(p_input.glob("*_inputs/*.xz"))
print(f"Found {len(p_xz_s)} xz files")
if len(p_xz_s) == 0:
print(f"ERROR: found 0 xz file in {p_dir}")
return
d_xz = [pkl2df(a) for a in p_xz_s]
dp_ins = set(d_xz[0].keys())
assert all(set(d.keys()) == dp_ins for d in d_xz), "xy have different input keys."
np_in = {}
for k_in in dp_ins:
# there is only one in solution dumped xy file.
np_in[k_in] = [a[k_in][0] for a in d_xz]
# save to working directory
fn_pkl = f"{p_input.name}.pkl.xz"
df2pkl(np_in, fn_pkl)
print(f" np_in dumped to {fn_pkl}")
return fn_pkl
# get ioinfo from onnx or bie
def get_ioinfo_from_onnx(p_onnx):
"""Get input/output nodes info from onnx.
Info includes:
* input node name with order
* output node name and shape
"""
oinfo = onnx_info(p_onnx)
input_nodes, output_nodes, _ = oinfo.get_ioinfo()
out_node_shape = {dp_out: oinfo.dp_shape[dp_out]["dims"] for dp_out in output_nodes}
# Note: keep same interface for get_ioinfo_from_bie / get_ioinfo_from_bie2
ioinfo = None
return input_nodes, output_nodes, out_node_shape, ioinfo
def dp2dyn_dump(dp, graph_in, graph_out, i_loop=None):
"""Given dynasty dump name for given dp.
Some special prefix for model input / output nodes.
Some special appendix for nodes in loop.
"""
fn_dump = clean_name(dp)
if dp in graph_in:
# if graph input/output, add special prefix
fn_dump = f"input_{fn_dump}"
elif dp in graph_out:
fn_dump = f"output_{fn_dump}"
else:
# normal datapath # will not in dp_in
pass
if i_loop is not None:
fn_dump = f"{fn_dump}_iteration_{i_loop}"
return fn_dump
def parse_shape_info(j):
"""Parse op/dp info from knerex shapeinfo.json."""
if "op2dps" in j:
# knerex from 0.23.0 will give op2dps and ops
nodes_w_dp = list(j["op2dps"].keys()) # no order.
# TODO: use this make sure correct
# nodes_outnode = [a for a in nodes_w_dp if a.startswith("OutputNode_")]
# j["ops"] with order, but need to remove OutputNode
nodes = [node for node in j["ops"] if node in nodes_w_dp]
node2dp = j["op2dps"]
dp2node = {dp: op for op, dps in node2dp.items() for dp in dps}
dp2idx_in_node = {dp: i_dp
for op, dps in node2dp.items()
for i_dp, dp in enumerate(dps)}
elif "ops" in j:
# old way. TODELETE. knerex will only give ops. no op2dps
nodes = j["ops"]
dp2node = {}
node2dp = {}
for dp in j["dp_info"]:
nd = j["dp_info"][dp]["node_name"][0]
dp2node[dp] = nd
# multi-output not supported without "op2dps"
# we assume each node will have one datapath
assert nd not in node2dp, (
f"node ({nd}) has dp ({node2dp[nd]}) already. "
f"Trying to append dp ({dp})."
"Old knerex format without op2dps info, does not support multi-output."
)
# one node may have multi dp outputs
# but we don't have correct order of dp!!!
node2dp[nd] = [dp]
# each dp is always 0th dp for its node.
dp2idx_in_node = {}
# remove OutputNode_*, they are dummy nodes without output datapath
def is_outputNode(node, node2dp):
return (node not in node2dp) and node.startswith("OutputNode_")
nodes = [node for node in nodes if not is_outputNode(node, node2dp)]
else:
raise NotImplementedError("Missing op2dps / ops in shapeinfo.json")
# onnx_shape
dp_shape = {k: tuple(j["dp_info"][k]["onnx_shape"]) for k in dp2node.keys()}
dp_hw_c = {k: j["dp_info"][k]["hw_c_in_onnx"][0] for k in dp2node.keys()}
# get graph in/output dp
graph_dp_out = j["dp_out"]
graph_dp_in = j["dp_in"]
# get dump and index in graph. (support subgraph)
dp2dump = dict()
# index is used to sort snr report
dp2index = dict()
subgraph = j.get("subgraph", None)
for i_op, op in enumerate(nodes):
# NOTE: we can get OPs in straightened order.
# each OP may have multiple DPs
dps = node2dp[op]
for i_dp, dp in enumerate(dps):
# some dp may run in loops so dp2dump and dp2index has different index
# here is main graph, so use "-" placeholder
dp_index = (dp, "-")
dp2dump[dp_index] = dp2dyn_dump(dp, graph_dp_in, graph_dp_out)
idx_2nd = f"o{i_dp}" if len(dps) > 1 else "-"
dp2index[dp_index] = (i_op, idx_2nd)
# check subgraph. some OP may be loop node.
if subgraph and op in subgraph:
this_sub = subgraph[op]
if "op_outs" in this_sub:
# back compatible for old single-output
# turn it into tuple
op_outs = [[t] for t in this_sub["op_outs"]]
elif "op2dps" in this_sub:
d2 = this_sub["op2dps"]
sub_ops = this_sub["ops"]
op_outs = [d2[sub_op] for sub_op in sub_ops if sub_op in d2]
else:
raise NotImplementedError("Missing op2dps / op_outs for "
f"subgraph {op} in shapeinfo.json")
# op_outs is list of list flatten into dps
sub_outs = list(itertools.chain(*op_outs))
N_dp = len(sub_outs)
n_loop = this_sub["max_count"][0] # why knerx given list here?
for i_loop in range(n_loop):
for i_dp, dp_name in enumerate(sub_outs):
dp_index = (dp_name, i_loop)
dp2index[dp_index] = (i_op, N_dp*i_loop+i_dp)
dp2dump[dp_index] = dp2dyn_dump(dp_name,
graph_dp_in,
graph_dp_out,
i_loop=i_loop)
return (nodes, dp2node, node2dp, dp2idx_in_node, dp_shape, dp_hw_c,
dp2index, dp2dump, graph_dp_in, graph_dp_out)
def get_ioinfo_from_knerex_json(j_fx, j_shape):
"""Get ioinfo from knerex dumped json.
This function returns json which is compatiable with
* *_ioinfo.json dumped by compiler
* then loaded by compiler_v2.load_ioinfo_json.
Specification of ioinfo.json
* key of "input" / "output"
* values are list of dict.
* each dict has keys:
* "bitw": integer
* "radix": list/array, per channel
* "scale": list/array, per channel
* "ch_dim": integer, index to onnx_shape
* "onnx_shape": list/array, onnx shape
* "shape": list/array, sim shape
* "data_format": string, used by data_converter
* "stride": list/array, used by data_converter
TODO:
only graph in/out fx_info are send out. we could send out every dp fx_info
"""
# helper function
def get_fx_info(d_radix, d_shape, dp_name, i_dp):
"""Extract fx info of one datapath.
Args:
d_radix (dict): quantization info for this dp
d_shape (dict): shape info for this dp
NOTE: missing "data_format" / "stride"
"""
# suppose to be integer
conv11 = {
"output_datapath_bitwidth": "bitw",
}
# suppose to be list
conv12 = {
"output_datapath_radix": "radix",
"output_scale": "scale",
}
# info in SnrShapeInfo. suppose to be list
conv22 = {
"onnx_shape": "onnx_shape",
"hw_shape": "shape",
}
fx_info = {}
# d_radix is per op, it may include multi-dp, use i_dp to get it
try:
for k, v in conv11.items():
fx_info[v] = d_radix[k][i_dp]
for k, v in conv12.items():
fx_info[v] = np.array(d_radix[k][i_dp])
except:
# back-compatible. not multi-output format. toolchain version < 0.23.0
# assert i_dp == 0
# TODELETE.
for k, v in conv11.items():
fx_info[v] = d_radix[k]
for k, v in conv12.items():
fx_info[v] = np.array(d_radix[k])
dim = len(fx_info[v].shape)
assert dim == 1, f"Expect {v} to have 1 dimension, but got {dim} shape: {fx_info[v].shape}"
# NOTE: take the 0 element for hw_c_in_onnx
# knerex should give it a int not list
fx_info["ch_dim"] = d_shape["hw_c_in_onnx"][0]
for k, v in conv22.items():
fx_info[v] = np.array(d_shape[k])
fx_info["name"] = clean_name(dp_name)
fx_info["ndim"] = len(fx_info["shape"])
return fx_info
# extract shape info
_, dp2node, _, dp2idx_in_node, _, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
ioinfo = {}
ioinfo["input"] = [get_fx_info(j_fx[dp2node[dp]],
j_shape["dp_info"][dp],
dp,
dp2idx_in_node.get(dp, 0))
for dp in dp_in]
ioinfo["output"] = [get_fx_info(j_fx[dp2node[dp]],
j_shape["dp_info"][dp],
dp,
dp2idx_in_node.get(dp, 0))
for dp in dp_out]
return ioinfo
def is_zip_file(file_path):
"""Judget a zip or not using magic number."""
with open(file_path, 'rb') as f:
return f.read(4) == b'PK\x03\x04'
def get_ioinfo_from_bie(
p_bie, hw_mode, dyn_bin="/workspace/libs/dynasty/run_fix_inference"
):
"""Get input/output nodes info from bie.
Info includes:
* input node name with order
* output node name and shape
Not included:
* (NOT) datapath (in+out) fx info: bw, radix, scales per channel
"""
# detour for bie2
if is_zip_file(p_bie):
return get_ioinfo_from_bie2(p_bie)
p_working = pathlib.Path(tempfile.mkdtemp(prefix="unpack_bie_"))
cmd = f"{dyn_bin} -m {p_bie} -t 1 -p kl{hw_mode} -e -o {p_working}"
cp = run_bash_script(cmd)
assert (
cp.returncode == 0
), f"Failed to extract fx info from bie. Return code {cp.returncode}"
p_j = p_working / "SnrShapeInfo.json"
assert p_j.exists(), f"output missing: {p_j}"
with open(p_j, "r") as f:
j_shape = json.load(f)
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
# just need graph out datapath shape
dp_out_shape = {k: shape_info[k] for k in dp_out}
# TODO: delete folder p_working
# the last one is optional ioinfo.json
return dp_in, dp_out, dp_out_shape, None
def get_ioinfo_from_bie2(p_bie2):
"""Parse ioinfo from bie2 format.
NOTE:
should be same output as get_ioinfo_from_bie.
"""
js = load_zip_jsons(p_bie2)
k = "shape_info.json" # from 0.23.0
assert k in js, f"NO {k} found in bie {p_bie2}. Found: {js.keys()}"
j_shape = js[k]
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
# just need graph out datapath shape
dp_out_shape = {k: shape_info[k] for k in dp_out}
# need ioinfo.json for dynasty
# but optional
k = "ioinfo.json"
if k in js:
ioinfo = js[k]
else:
ioinfo = None
return dp_in, dp_out, dp_out_shape, ioinfo
def find_input_txt_folder(p_model, pref="knerex_input"):
"""Find all input folders.
The input folders should be:
- knerex_input / knerex_input_1 / knerex_input_2 ... (for models with multiple inputs), or
- simulator_input / simulator_input_1 / simulator_input_2 ... (for models with multiple inputs)
"""
lst = []
p_in = p_model / "input" / pref
if p_in.exists():
lst.append(p_in)
else:
return None
for i in range(1, 100):
p_in = p_model / "input" / f"{pref}_{i}"
if p_in.exists():
lst.append(p_in)
else:
return lst
else:
print("should not arrive here")
return lst
def get_input_txt_list(p_in):
"""List input txt names in given folder.
test_input.txt will be 1st one if exist.
"""
fns = [fn.name for fn in list(p_in.glob("*.txt"))]
fn_default = "test_input.txt"
if fn_default in fns:
# move fn_default to first one
fns.remove(fn_default)
return [fn_default] + fns
else:
return fns
def need_compress_command_bin(tc_cat, tc_name):
"""Special mark for some special case."""
if tc_cat.startswith("m"):
big_kernels = [
"bk23x23",
"bk25x25",
"bk27x27",
"bk29x29",
"bk31x31",
"bk33x33",
"bk35x35",
]
return any([a in tc_name for a in big_kernels])
return False
def guess_model_id(s):
sr = re.compile("model_(\d+)*")
try:
return int(sr.findall(s)[0])
except:
return 32768
def clean_case_name(x):
"""Normalize the case names.
The case name in final report, may have extra info:
- xxx (known bug) // remove space and after
- model_ddd_xxxxxx_append // remove _xxxxxxx
"""
def remove_append(x):
"""works without space in the name"""
return x.split(" ")[0]
def remove_model_share_commit(x):
"""if this is a case of model_share with commit number in it"""
s = re.compile("(model_\d{3})_[\da-f]{6}(_.*)")
finds = s.findall(x)
if len(finds) == 1:
# foud pattern
return "".join(finds[0])
else:
# no change.
return x
return remove_model_share_commit(remove_append(x))
def relative_path(target, origin):
"""Return path of target relative to origin.
NOTE: .resolve() will make symlink to its target. not working in our case
"""
# copy from https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
p_t = pathlib.Path(target)
p_o = pathlib.Path(origin)
try:
return p_t.absolute().relative_to(p_o.absolute())
except ValueError as e: # target does not start with origin
# recursion with origin (eventually origin is root so try will succeed)
return pathlib.Path("..").joinpath(relative_path(target, p_o.parent))
class CustomCP:
"""A customized cp to return if timeout."""
def __init__(self, returncode, stdout=None, stderr=None):
"""Init it this cp."""
self.returncode = returncode
self.stdout = stdout
self.stderr = stderr
def __str__(self):
"""Make this cp printable."""
return f'Return Code: {self.returncode}, Error Message: {self.stderr}'
def run_bash_script(command, do_echo=False, fail_then_exit=False, timeout=60*60*6):
"""Kneron wrap on bash commands.
Inputs:
- timeout: how many seconds to run
"""
if type(command) is list:
command = " ".join(command)
try:
cp = subprocess.run(
command,
shell=True,
executable="/bin/bash",
check=False,
capture_output=True,
text=True,
timeout=timeout
)
except subprocess.TimeoutExpired:
cp = CustomCP(111, stdout=f"Command: {command}", stderr=f"TIMEOUT ({timeout}s)")
if do_echo or (cp.returncode != 0 and DEBUG):
print("-------------------------------------------------------------")
print(f"running command: {command}")
if cp.returncode != 0:
print(f"Failed at {cp.returncode}")
print("-------------------------------------------------------------")
print(cp.stdout)
print(cp.stderr)
print("-------------------------------------------------------------")
if cp.returncode != 0 and fail_then_exit:
print(f"Failed to run {command}. Exit code: {cp.returncode}")
print(f"Will not continue. exit.")
sys.exit(1)
return cp
def check_parallel_log(fn_log):
"""Check the parallel log file.
NOTE: sometime dynasty killed by segment fault however return code is 0.
We need to make both are zero.
"""
exitvals = []
signals = []
with open(fn_log, 'r') as file:
for i, line in enumerate(file):
if i == 0:
# skiip the headline
continue
parts = line.split()
if len(parts) > 6: # Ensuring the line has enough parts
try:
exitvals.append(int(parts[6]))
signals.append(int(parts[7]))
except:
pass
e1 = tuple(a for a in exitvals if a != 0)
e2 = tuple(a for a in signals if a != 0)
msg = []
if len(e1) > 0:
# usually not happen here
msg.append(f"error: {e1}")
if len(e2) > 0:
msg.append(f"signal: {e2}")
return "//".join(msg), e1, e2
def set_folder_public(p_out):
"""Make output path readable for basic group."""
dir_out = pathlib.Path(p_out)
if not dir_out.exists():
# in case it had been delete before trigger test_case.__del__
return
script = f"""
chgrp -R 50000 {dir_out}
chmod 755 {dir_out}
find {dir_out} -type d -exec chmod 755 {{}} \;
find {dir_out} -type f -exec chmod 644 {{}} \;
"""
run_bash_script(script, do_echo=False)
def assert_nodes_exists(fn_onnx, node_list):
"""Kneron solutions may cast some special process on certain nodes, specified by name.
use this script to make sure onnx not changed."""
import onnx
o = onnx.load_model(fn_onnx)
nodes_all = [a.name for a in o.graph.node]
missing = False
print("check {} for special nodes {}".format(fn_onnx, node_list))
for node in node_list:
if node not in nodes_all:
print("ERROR: node {} does not exist. check with ALG team".format(node))
missing = True
if missing:
raise FileNotFoundError
def detect_valid_model(dir_in):
"""Give a path, detect valid models under it"""
p_base = pathlib.Path(dir_in).resolve()
onnx = p_base.glob("**/*.origin.onnx")
for o in onnx:
p_model = o.parent.parent
if is_valid_case(p_model):
yield p_model
def is_valid_case(dir_case):
"""Is this a valid test case?
XXXXX/input/XXXXX.origin.onnx (or bie)
XXXXX must be same
XXXXX/input/knerex_input must exist
Not checking the txt in it.
"""
p_case = pathlib.Path(dir_case)
if not p_case.is_dir():
# print(f"{p_case} is not dir.")
# TODO: return extra string
return False
p_origin = p_case / f"input/{p_case.name}.origin.onnx"
if not p_origin.exists():
p_origin = p_case / f"input/{p_case.name}.origin.bie"
if not p_origin.exists():
return False
p_knerex_input = p_case / "input" / "knerex_input"
if not p_knerex_input.exists():
return False
return True
def filter_cases(dir_base, keywords=[]):
"""Find all test cases in dir_base.
filter out bad test cases, e.g., missing input, origin.onnx
select by only test case with keywords
output: list of path to test cases
"""
excludekeywords = []
if "-e" in keywords:
excludekeywords = keywords[keywords.index("-e") + 1 :]
keywords = keywords[0 : keywords.index("-e")]
whitelist = []
if "-f" in keywords:
whitefile = keywords[keywords.index("-f") + 1]
keywords = keywords[0 : keywords.index("-f")]
with open(whitefile, "r") as f:
lineList = f.readlines()
whitelist[:] = [x.strip() for x in lineList if x.strip()]
# print("linelist=", lineList)
# find all second level sub folder
case_all = list(pathlib.Path(dir_base).glob("*/*"))
case_all.sort()
# ONLY keep cases including ALL keywords.
# self.logger.debug("search cases using keywords: {}".format(keywords))
case_selected = [
a
for a in case_all
if all([True if k in str(a.absolute()) else False for k in keywords])
]
if len(excludekeywords):
case_selected = [
a
for a in case_selected
if all([False if k in str(a.absolute()) else True for k in excludekeywords])
]
if len(whitelist):
case_selected = [
a
for a in case_selected
if any(
[
True if all(s in str(a.absolute()) for s in k.split()) else False
for k in whitelist
]
)
]
case_selected = [a for a in case_selected if is_valid_case(a)]
return case_selected, case_all
def filter_failed_cases(case_selected, fn_config, p_report):
p_config = pathlib.Path(fn_config)
command = f"pushd {p_report} >> /dev/null && grep {p_config.name} *.info | sort | tail -n 1 | awk -F: '{{ print $1 }}'"
cp = subprocess.run(
command,
shell=True,
executable="/bin/bash",
check=False,
capture_output=True,
text=True,
)
fn_info = cp.stdout.strip()
if len(fn_info) == 0:
# found nothing, do nothing
return case_selected
# fn_info should be `run_YYYYMMDD_hhmmss_TAG_regression.info`
fn_status = fn_info.replace("_regression.info", "_status.csv")
p_status = pathlib.Path(p_report) / fn_status
if not p_status.exists():
# no status found! something wrong. should I look for another one before?
return case_selected
# load status.csv
try:
df = pd.read_csv(str(p_status), header=[0, 1], index_col=[0, 1])
index_success = set(df.loc[df["general"]["Success"] == ""].index)
cases_failed = [
a for a in case_selected if (a.parent.name, a.name) not in index_success
]
return cases_failed
except Exception as e:
pp(e)
return case_selected
def md5sum(filePath):
"""Check md5sum of a file/folder.
Does not support input as python object.
pikcle the file if need to.
we use string() to get object representation,
but it will not work properly for large list / numpy matrix.
because python will only print some part of data.
"""
assert type(filePath) in [str, pathlib.PosixPath], f"md5sum works on file only, but got {type(filePath)}"
def do_exclude(p_f):
ignore_patterns = ["__pycache__"]
for ig in ignore_patterns:
if ig in str(p_f):
return True
return False
def md5_update(m, fp):
# TODO: maybe use OO is better
with open(fp, "rb") as fh:
while True:
data = fh.read(8192)
if not data:
break
m.update(data)
return m
def md5sum_folder(p_folder):
l1 = list(p_folder.iterdir())
l2 = [t for t in l1 if not (do_exclude(t)) and not t.is_dir()]
# sorting is crucial for md5 calculation
l2.sort(key=lambda a: str(a))
m = hashlib.md5()
for tf in l2:
m = md5_update(m, tf)
return m.hexdigest()
p = pathlib.Path(filePath)
if p.is_file():
m = hashlib.md5()
m = md5_update(m, p)
return m.hexdigest()
elif p.is_dir():
return md5sum_folder(p)
else:
raise NotImplementedError(f"{p} is nor file neither folder. Check existence!")
def list2chunks(lst, k):
"""Yield successive k chunks from lst."""
n = math.ceil(len(lst) / k)
for i in range(0, len(lst), n):
yield lst[i : i + n]
def let_user_pick(options, create_new=False):
if len(options) == 0:
if create_new:
return input("\nInput new message: ")
else:
raise AttributeError
# if options available, pick one
if create_new:
# option to create new one
options.append("Create new?")
while True:
print("Please choose:")
for idx, element in enumerate(options):
print("{}) {}".format(idx + 1, element))
i = input("Enter number: ")
try:
ii = int(i) - 1
if 0 <= ii < len(options):
if create_new and ii == len(options) - 1:
# create new
return input("\nInput new message: ")
else:
return options[ii]
except:
pass
def create_zip(fn_zip, fns, p_base=None):
"""Create a zip with give files in base folder.
BUG: if diff files with same name in one folder,
only the last one will be kept.
"""
if isinstance(fns, list):
# no name change. for list of fn come in, just use the original name
# but need to turn into dict
fns = [pathlib.Path(fn) for fn in fns if fn]
fns = {p.name: p for p in fns}
assert isinstance(fns, dict), f"parameter fns must be list or dict. but got {type(fns)}"
with zipfile.ZipFile(fn_zip, "w", zipfile.ZIP_DEFLATED) as zf:
for new_name, fn in fns.items():
pf = pathlib.Path(fn)
if not pf.exists():
continue
if p_base is None:
arcname = new_name
else:
pf2 = pf.parent / new_name
arcname = str(pf2.relative_to(p_base))
zf.write(filename=str(pf), arcname=arcname)
def dict2json(d, fn_json):
"""Customize function to save dict to json file.
TODO:
merge similar functions.
"""
with open(fn_json, "w") as f:
json.dump(d, f, indent=4, sort_keys=True)
def load_regression_json(fn_json):
def convert_key(k):
d = {str(plt): plt for plt in MODE_HARDWARE}
return d.get(k, k)
def jsonKeys2int(x):
# refer to https://stackoverflow.com/questions/1450957/pythons-json-module-converts-int-dictionary-keys-to-strings
if isinstance(x, dict):
return {convert_key(k): v for k, v in x.items()}
return x
with open(fn_json, "r") as f:
j = json.load(f, object_hook=jsonKeys2int)
return j
def clean_name(this_name):
"""remove special charaters from given string.
Some node names with slash, example: batch_normalization_9/gamma:0_o0
Make this a function that will all conversion will be same.
"""
return this_name.replace("/", "_")
def remove_appendix(this_name):
return (
this_name.strip(".onnx")
.strip(".bie")
.strip(".origin")
.strip(".decomposed")
.strip(".wqbi")
.strip(".quan")
.strip(".scaled")
)
def get_switch_value(this_map, this_key, default):
if this_key in this_map:
return this_map[this_key]
else:
return default
def set_default(this_map, this_key, this_value):
if this_key not in this_map:
this_map[this_key] = this_value
def create_logger(module_name, fn_log=None, level="WARNING"):
logger = logging.getLogger(module_name)
levels = {
"CRITICAL": logging.CRITICAL,
"ERROR": logging.ERROR,
"WARNING": logging.WARNING,
"INFO": logging.INFO,
"DEBUG": logging.DEBUG,
}
logger.setLevel(levels.get(level.upper(), logging.WARNING))
if logger.hasHandlers():
return logger
# create formatter
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
)
# create console handler and set level to debug
ch = logging.StreamHandler()
ch.setLevel(logging.DEBUG)
# add formatter to ch
ch.setFormatter(formatter)
logger.addHandler(ch)
if fn_log:
fh = logging.FileHandler(fn_log)
fh.setFormatter(formatter)
logger.addHandler(fh)
return logger
def patch_batch_column(cols):
"""patch r2 for NG.
old/r2 has columns of 8
NG has columns of 9
"""
if len(cols) == 9:
"""for ng
i,0,input_1_o0,1,1,28,28,16W1C8B,4,16W1C8B,4
o,0,conv2d_4_o0,1,1,1,1,16W1C8B,4,,
"""
# NOTE: bchw.
# convert to interger?
return cols
elif len(cols) == 4:
# for 520, the very old format
"""
i,0,input.1,3,112,112
o,0,806,256,1,1
o,1,847,256,1,1
"""
return cols
elif len(cols) == 8:
cols.insert(1, "")
return cols
else:
pp(f"info columns must be 4, 8 or 9. but got {len(cols)} ({cols})")
raise AttributeError
def get_git_info(git_path):
"""get git info out of a single git repo
NOTE: not working submodule
"""
info = {}
# sanity check.
p_git = pathlib.Path(git_path)
if not p_git.exists():
info["error"] = "directory not exists"
return info
if not (p_git / ".git").exists():
# BUG: the git will search from this path up to / until found .git .
# currently I expect to send exactly path of repo, aka, NONE of the subpath
# this part may be removed.
info["error"] = "{} is not a git repo. `.git` not found.".format(p_git)
return info
with working_directory(git_path):
b = subprocess.run(
["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, text=True
)
assert b.returncode == 0, "git command failed."
info["commit"] = b.stdout.strip()
b = subprocess.run(
["git", "rev-parse", "--abbrev-ref", "HEAD"],
stdout=subprocess.PIPE,
text=True,
)
assert b.returncode == 0, "git command failed."
info["branch"] = b.stdout.strip()
b = subprocess.run(
["git", "config", "--get", "remote.origin.url"],
stdout=subprocess.PIPE,
text=True,
)
if b.returncode == 0:
info["remote_url"] = b.stdout.strip()
else:
info["remote_url"] = "N/A"
info["path"] = str(git_path)
return info
def find_branch(model_id, commit, dir_base="/opt/data/e2e_simulator/app"):
"""get branch info from local repo folder
for fx model release
"""
p_base = pathlib.Path(dir_base)
assert p_base.exists(), f"{p_base} does not exists."
print("check model_{}".format(model_id))
print(
" * commit graph: http://192.168.200.1:8088/modelshare/model_{}/-/network/master".format(
model_id
)
)
models = list(p_base.glob("*/models/model_{}".format(model_id)))
if len(models) == 0:
print("ERROR: cannot found model_{} from any app.".format(model_id))
p_model = models[0]
with working_directory(p_model):
cmd = ["git", "branch", "-r", "--contains", commit]
b = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
if b.returncode == 0:
o = b.stdout
print(o)
else:
o = None
return o, models[0]
def get_model_info(git_path):
"""model are submodules in app"""
info = get_git_info(git_path)
onnxs = list(git_path.glob("alg/*.onnx"))
if len(onnxs) != 1:
print("ERROR: There should only be ONE onnx. but found {}".format(len(onnxs)))
for o in onnxs:
print(" * {}".format(o.name))
print(" path: {}".format(git_path))
assert False, "Found too many onnx"
info["onnx"] = str(onnxs[0].relative_to(git_path))
return info
def get_app_info(app_path, fn_json=None):
"""get git info for kneron released apps.
The apps are listed here: http://192.168.200.1:8088/modelshare?utf8=%E2%9C%93&filter=solution_
"""
ginfo = {}
ginfo["app"] = get_git_info(app_path)
ginfo["models"] = {}
p_app = pathlib.Path(app_path)
models = p_app.glob("models/*")
for m in models:
ginfo["models"][m.name] = get_model_info(m)
# if given file name to save
if fn_json is not None:
with open(fn_json, "w") as f:
json.dump(ginfo, f, indent=4, sort_keys=True)
return ginfo
def create_noise_input_folder(dir_from, sigma_levels, override=False):
# currently only add noise to simulator_input / simulator_input_1 / simulator_input_2
assert "simulator_input" in str(dir_from), "No input/simulator_input folder."
p_from = pathlib.Path(dir_from)
fn_froms = p_from.glob("*.txt")
ori_s = {f.name: load_txt(str(f)) for f in fn_froms}
con = np.concatenate(list(ori_s.values()))
assert len(con.shape) == 1, "found more than 1 shape for noise input."
i_range = np.max(con) - np.min(con)
# NOTE: sigma_level = 3 => sigma 1, 1 pixel
for sigma_level in sigma_levels:
dir_name = p_from.name.replace(
"simulator_input", "simulator_input_sigma{}".format(sigma_level)
)
p_to = p_from.parent / dir_name
if p_to.exists():
if override:
shutil.rmtree(str(p_to))
else:
continue
p_to.mkdir(mode=0o770, parents=True, exist_ok=True)
print("Create noise input for sigma {}".format(sigma_level))
sigma_control = 3
bit_range = 256 # for 8bit
n_pixel = list(ori_s.values())[0].shape
sigma = i_range * sigma_level / (bit_range * sigma_control)
for k1, v1 in ori_s.items():
k2 = p_to / k1
noise = np.random.normal(loc=0, scale=sigma, size=n_pixel)
v2 = v1 + noise
np.savetxt(str(k2), v2, fmt="%.10f")
def npy2txt(np_txt: dict, input_names: list, p_input):
"""Save numpy file to txt files
np_txt is map, key is input node names,
value: numpy array of input, 3D (HWC) or 4D (BHWC)
input_names are input node names, get from onnx. it should be same as np_txt.key()
p_input is where to save the knerex_input+simulator_input folders
"""
# save texts.
n_inputs = len(input_names)
knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
knerex_inputs[0] = "knerex_input"
simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
simulator_inputs[0] = "simulator_input"
list_inputs = []
for i_in in range(len(input_names)):
# for multiple inputs
np_in_s = np_txt[input_names[i_in]]
dir_in = p_input / knerex_inputs[i_in]
dir_simu = p_input / simulator_inputs[i_in]
list_inputs.append(dir_simu)
dir_in.mkdir(parents=True, exist_ok=True)
safe_link(dir_in, dir_simu, relative=True)
dim_in = np_in_s[0].shape
total_size = np.prod(dim_in)
for i_image, np_image in enumerate(np_in_s):
dim_this = np_image.shape
assert (
dim_in == dim_this
), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
p_txt = dir_in / "in_{:04d}.txt".format(i_image)
# NOTE: we assume the np_images is onnx shaped.
# WARNING: previous version we require channel last
np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")
# link a "test_input.txt"
link_test_input_txt(list_inputs)
def solution_npy2txt(np_txt, input_names, p_input, file_name, as_test_input = False):
"""save numpy file to txt files
np_txt is map, key is input node names,
value: numpy array of input, 3D (HWC) or 4D (BHWC)
input_names are input node names, get from onnx. it should be same as np_txt.key()
p_input is where to save the knerex_input+simulator_input folders
"""
# save texts.
n_inputs = len(input_names)
knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
knerex_inputs[0] = "knerex_input"
simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
simulator_inputs[0] = "simulator_input"
list_inputs = []
for i_in in range(len(input_names)):
# for multiple inputs
np_in_s = np_txt[input_names[i_in]]
dir_in = p_input / knerex_inputs[i_in]
dir_simu = p_input / simulator_inputs[i_in]
list_inputs.append(dir_simu)
dir_in.mkdir(parents=True, exist_ok=True)
safe_link(dir_in, dir_simu, relative=True)
dim_in = np_in_s[0].shape
total_size = np.prod(dim_in)
for i_image, np_image in enumerate(np_in_s):
dim_this = np_image.shape
assert (
dim_in == dim_this
), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
p_txt = dir_in / "{}.txt".format(file_name)
# NOTE: we assume the np_images is onnx shaped.
# WARNING: previous version we require channel last
np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")
# link a "test_input.txt"
if as_test_input:
link_test_input_txt(list_inputs)
def link_test_input_txt(list_inputs, fn_txt="test_input.txt"):
list_p = [pathlib.Path(p) for p in list_inputs]
list_test_input = [p / fn_txt for p in list_p]
list_test_exist = [p for p in list_test_input if p.exists()]
if len(list_test_exist) == len(list_inputs):
# print("{} exists for all input folders.".format(fn_txt))
pass
elif len(list_test_exist) == 0:
# print("link {} in {}".format(fn_txt, list_p))
fn_all = sorted(list(list_p[0].glob("*.txt")))
fn_pick = fn_all[0].name
# same file name must exist in all input folder
assert all(
[(p / fn_pick).exists() for p in list_p]
), "Not all input folders linked."
for p in list_p:
p_from = p / fn_pick
safe_link(p_from, p / fn_txt)
else:
n_missing = len(list_inputs) - len(list_test_exist)
print(
"ERROR: Found input folders: {}, but {}/{} missing {}. {}".format(
list_inputs, n_missing, len(list_inputs), fn_txt, list_test_exist
)
)
raise FileNotFoundError
def safe_link(fn_from, fn_to, relative=True, delete_exists=True):
"""create a link from `fn_from` to `fn_to`.
* if the target exist already, delete the target then link.
"""
f_from = pathlib.Path(fn_from)
f_to = pathlib.Path(fn_to)
assert f_from.exists(), f"source file/dir {f_from} does not exists."
if f_to.is_symlink():
# TODO: missing_ok=False from 3.8
f_to.unlink()
if f_to.exists() and delete_exists:
shutil.rmtree(f_to)
if relative:
f_to.symlink_to(os.path.relpath(f_from, f_to.parent))
else:
f_to.symlink_to(f_from.absolute())
def estimate_mem_available():
p_info = pathlib.Path("/proc/meminfo")
def parse_entry(s):
a, b = s.strip().split(":")
return a.strip(), b.strip(" kB").strip()
with open(p_info, "r") as f:
lines = f.readlines()
meminfo = {}
for line in lines:
k, v = parse_entry(line)
meminfo[k] = v
mems_kB = [int(meminfo[k]) for k in ["MemAvailable", "SwapFree"]]
return sum(mems_kB)
def expand_array(v, n):
"""Expand scalar to array. """
if isinstance(v, (collections.abc.Sequence, np.ndarray)):
# is vector / array, make sure correct length
assert len(v) == n, f"Expect {v} to have length {n} but got {len(v)}"
return np.array(v)
else:
# is scalar, expand it to array
return np.ones(n)*v
def gen_random_string(length):
"""Generate random string use less resource."""
alphabet = string.ascii_letters + string.digits
random_string = ''.join(secrets.choice(alphabet) for _ in range(length))
return random_string
def chunker(seq, size):
"""Cut long list into small lists.
from https://stackoverflow.com/questions/434287/how-to-iterate-over-a-list-in-chunks
"""
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
@contextlib.contextmanager
def working_directory(path):
"""
Changes working directory and returns to previous on exit.
link: https://stackoverflow.com/questions/41742317/how-can-i-change-directory-with-python-pathlib
"""
prev_cwd = pathlib.Path.cwd()
# create if not exist
p = pathlib.Path(path)
p.mkdir(mode=0o770, parents=True, exist_ok=True)
os.chdir(str(p))
try:
yield
finally:
os.chdir(prev_cwd)
def pprint_dict(ld):
"""Convert dict to string then put into report."""
d = defaultdict(set)
for d1 in ld:
for k, v in d1.items():
d[v].add(k)
if len(d) == 0:
return ""
elif len(d) == 1:
return list(d.keys())[0]
else:
s2 = [f"""{k}:{",".join(v)}""" for k, v in d.items()]
return " \\ ".join(s2)
if __name__ == "__main__":
pass