1436 lines
43 KiB
Python
1436 lines
43 KiB
Python
#! /usr/bin/env python3
|
|
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import logging
|
|
import pathlib
|
|
import json
|
|
import contextlib
|
|
import re
|
|
import zipfile
|
|
import lzma
|
|
import pickle
|
|
import math
|
|
import struct
|
|
import hashlib
|
|
import shutil
|
|
import collections.abc
|
|
from collections import defaultdict
|
|
import string
|
|
import secrets
|
|
import tempfile
|
|
import itertools
|
|
|
|
import numpy as np
|
|
import pandas as pd
|
|
|
|
from sys_flow.flow_constants import MODE_HARDWARE
|
|
from sys_flow.onnx_op_stats import onnx_info
|
|
from sys_flow.util_lib import load_zip_jsons
|
|
|
|
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
|
import snoop
|
|
snoop.install(enabled=DEBUG)
|
|
|
|
if DEBUG:
|
|
from IPython.terminal import embed as emb
|
|
|
|
terminal = emb.InteractiveShellEmbed()
|
|
terminal.extension_manager.load_extension("autoreload")
|
|
terminal.run_line_magic("autoreload", "2")
|
|
embed = terminal.mainloop
|
|
else:
|
|
embed = lambda: None
|
|
|
|
|
|
# functions on loading text file
|
|
def twos_comp(val, bits):
|
|
"""Compute the 2's complement of int value val."""
|
|
# if sign bit is set e.g., 8bit: 128-255
|
|
if (val & (1 << (bits - 1))) != 0:
|
|
# compute negative value
|
|
val = (1 << bits) + val
|
|
# return positive value as is
|
|
return val
|
|
|
|
|
|
def array_le2flt(arr, n_flt: int, n_byte: int = 4):
|
|
"""Convert arry of 4 elements of unsigned integer (little endian) to float.
|
|
|
|
INPUT:
|
|
* n_flt: how many float number to extract
|
|
* n_byte: how many byte to convert to 1 float.
|
|
|
|
Used to convert scale to float.
|
|
"""
|
|
return struct.unpack(f"{n_flt}f", struct.pack(f"<{n_flt*n_byte}B", *arr))
|
|
|
|
|
|
def intle2flt(i):
|
|
packed = struct.pack('<I', i)
|
|
return struct.unpack('<f', packed)[0]
|
|
|
|
|
|
array_intle2flt = np.vectorize(intle2flt)
|
|
|
|
|
|
def load_txt(filename):
|
|
"""Load dynasty dump text (float data) as flattened data."""
|
|
return np.array(pd.read_csv(filename, names=["fx"])["fx"], dtype=np.float32)
|
|
|
|
|
|
def txt2np_fl(filename, shape):
|
|
"""Load dynasty dumped text (float data) into numpy with given shape."""
|
|
return np.array(
|
|
pd.read_csv(filename, names=["data"])["data"], dtype=np.float32
|
|
).reshape(shape)
|
|
|
|
|
|
def txt2np_fx(filename, shape):
|
|
"""Load dynasty dumped text (fix point data) into numpy with given shape."""
|
|
return np.array(
|
|
pd.read_csv(filename, names=["data"])["data"], dtype=np.int32
|
|
).reshape(shape)
|
|
|
|
|
|
def df2pkl(df, fn):
|
|
"""Dump python object to a lzma compressed pickle file.
|
|
|
|
fn is suggested to end with .pkl.xz
|
|
"""
|
|
with lzma.open(fn, 'wb') as f:
|
|
pickle.dump(df, f)
|
|
|
|
|
|
def pkl2df(fn):
|
|
"""Load python object from a lzma compressed pickle file."""
|
|
with lzma.open(fn, 'rb') as f:
|
|
df = pickle.load(f)
|
|
return df
|
|
|
|
def dir2pkl(p_dir):
|
|
p_input = pathlib.Path(p_dir)
|
|
p_xz_s = list(p_input.glob("*_inputs/*.xz"))
|
|
print(f"Found {len(p_xz_s)} xz files")
|
|
if len(p_xz_s) == 0:
|
|
print(f"ERROR: found 0 xz file in {p_dir}")
|
|
return
|
|
|
|
d_xz = [pkl2df(a) for a in p_xz_s]
|
|
|
|
dp_ins = set(d_xz[0].keys())
|
|
|
|
assert all(set(d.keys()) == dp_ins for d in d_xz), "xy have different input keys."
|
|
|
|
np_in = {}
|
|
for k_in in dp_ins:
|
|
# there is only one in solution dumped xy file.
|
|
np_in[k_in] = [a[k_in][0] for a in d_xz]
|
|
|
|
# save to working directory
|
|
fn_pkl = f"{p_input.name}.pkl.xz"
|
|
df2pkl(np_in, fn_pkl)
|
|
print(f" np_in dumped to {fn_pkl}")
|
|
|
|
return fn_pkl
|
|
|
|
|
|
# get ioinfo from onnx or bie
|
|
def get_ioinfo_from_onnx(p_onnx):
|
|
"""Get input/output nodes info from onnx.
|
|
|
|
Info includes:
|
|
* input node name with order
|
|
* output node name and shape
|
|
"""
|
|
oinfo = onnx_info(p_onnx)
|
|
input_nodes, output_nodes, _ = oinfo.get_ioinfo()
|
|
out_node_shape = {dp_out: oinfo.dp_shape[dp_out]["dims"] for dp_out in output_nodes}
|
|
# Note: keep same interface for get_ioinfo_from_bie / get_ioinfo_from_bie2
|
|
ioinfo = None
|
|
return input_nodes, output_nodes, out_node_shape, ioinfo
|
|
|
|
|
|
def dp2dyn_dump(dp, graph_in, graph_out, i_loop=None):
|
|
"""Given dynasty dump name for given dp.
|
|
|
|
Some special prefix for model input / output nodes.
|
|
|
|
Some special appendix for nodes in loop.
|
|
"""
|
|
fn_dump = clean_name(dp)
|
|
|
|
if dp in graph_in:
|
|
# if graph input/output, add special prefix
|
|
fn_dump = f"input_{fn_dump}"
|
|
elif dp in graph_out:
|
|
fn_dump = f"output_{fn_dump}"
|
|
else:
|
|
# normal datapath # will not in dp_in
|
|
pass
|
|
|
|
if i_loop is not None:
|
|
fn_dump = f"{fn_dump}_iteration_{i_loop}"
|
|
|
|
return fn_dump
|
|
|
|
|
|
def parse_shape_info(j):
|
|
"""Parse op/dp info from knerex shapeinfo.json."""
|
|
if "op2dps" in j:
|
|
# knerex from 0.23.0 will give op2dps and ops
|
|
nodes_w_dp = list(j["op2dps"].keys()) # no order.
|
|
|
|
# TODO: use this make sure correct
|
|
# nodes_outnode = [a for a in nodes_w_dp if a.startswith("OutputNode_")]
|
|
|
|
# j["ops"] with order, but need to remove OutputNode
|
|
nodes = [node for node in j["ops"] if node in nodes_w_dp]
|
|
node2dp = j["op2dps"]
|
|
dp2node = {dp: op for op, dps in node2dp.items() for dp in dps}
|
|
dp2idx_in_node = {dp: i_dp
|
|
for op, dps in node2dp.items()
|
|
for i_dp, dp in enumerate(dps)}
|
|
|
|
elif "ops" in j:
|
|
# old way. TODELETE. knerex will only give ops. no op2dps
|
|
nodes = j["ops"]
|
|
|
|
dp2node = {}
|
|
node2dp = {}
|
|
for dp in j["dp_info"]:
|
|
nd = j["dp_info"][dp]["node_name"][0]
|
|
dp2node[dp] = nd
|
|
# multi-output not supported without "op2dps"
|
|
# we assume each node will have one datapath
|
|
assert nd not in node2dp, (
|
|
f"node ({nd}) has dp ({node2dp[nd]}) already. "
|
|
f"Trying to append dp ({dp})."
|
|
"Old knerex format without op2dps info, does not support multi-output."
|
|
)
|
|
# one node may have multi dp outputs
|
|
# but we don't have correct order of dp!!!
|
|
node2dp[nd] = [dp]
|
|
# each dp is always 0th dp for its node.
|
|
dp2idx_in_node = {}
|
|
|
|
# remove OutputNode_*, they are dummy nodes without output datapath
|
|
def is_outputNode(node, node2dp):
|
|
return (node not in node2dp) and node.startswith("OutputNode_")
|
|
nodes = [node for node in nodes if not is_outputNode(node, node2dp)]
|
|
|
|
else:
|
|
raise NotImplementedError("Missing op2dps / ops in shapeinfo.json")
|
|
|
|
# onnx_shape
|
|
dp_shape = {k: tuple(j["dp_info"][k]["onnx_shape"]) for k in dp2node.keys()}
|
|
dp_hw_c = {k: j["dp_info"][k]["hw_c_in_onnx"][0] for k in dp2node.keys()}
|
|
|
|
# get graph in/output dp
|
|
graph_dp_out = j["dp_out"]
|
|
graph_dp_in = j["dp_in"]
|
|
|
|
# get dump and index in graph. (support subgraph)
|
|
dp2dump = dict()
|
|
# index is used to sort snr report
|
|
dp2index = dict()
|
|
subgraph = j.get("subgraph", None)
|
|
for i_op, op in enumerate(nodes):
|
|
# NOTE: we can get OPs in straightened order.
|
|
# each OP may have multiple DPs
|
|
dps = node2dp[op]
|
|
|
|
for i_dp, dp in enumerate(dps):
|
|
# some dp may run in loops so dp2dump and dp2index has different index
|
|
# here is main graph, so use "-" placeholder
|
|
dp_index = (dp, "-")
|
|
dp2dump[dp_index] = dp2dyn_dump(dp, graph_dp_in, graph_dp_out)
|
|
idx_2nd = f"o{i_dp}" if len(dps) > 1 else "-"
|
|
dp2index[dp_index] = (i_op, idx_2nd)
|
|
|
|
# check subgraph. some OP may be loop node.
|
|
if subgraph and op in subgraph:
|
|
this_sub = subgraph[op]
|
|
if "op_outs" in this_sub:
|
|
# back compatible for old single-output
|
|
# turn it into tuple
|
|
op_outs = [[t] for t in this_sub["op_outs"]]
|
|
elif "op2dps" in this_sub:
|
|
d2 = this_sub["op2dps"]
|
|
sub_ops = this_sub["ops"]
|
|
op_outs = [d2[sub_op] for sub_op in sub_ops if sub_op in d2]
|
|
else:
|
|
raise NotImplementedError("Missing op2dps / op_outs for "
|
|
f"subgraph {op} in shapeinfo.json")
|
|
|
|
# op_outs is list of list flatten into dps
|
|
sub_outs = list(itertools.chain(*op_outs))
|
|
N_dp = len(sub_outs)
|
|
|
|
n_loop = this_sub["max_count"][0] # why knerx given list here?
|
|
for i_loop in range(n_loop):
|
|
for i_dp, dp_name in enumerate(sub_outs):
|
|
dp_index = (dp_name, i_loop)
|
|
dp2index[dp_index] = (i_op, N_dp*i_loop+i_dp)
|
|
dp2dump[dp_index] = dp2dyn_dump(dp_name,
|
|
graph_dp_in,
|
|
graph_dp_out,
|
|
i_loop=i_loop)
|
|
|
|
return (nodes, dp2node, node2dp, dp2idx_in_node, dp_shape, dp_hw_c,
|
|
dp2index, dp2dump, graph_dp_in, graph_dp_out)
|
|
|
|
|
|
def get_ioinfo_from_knerex_json(j_fx, j_shape):
|
|
"""Get ioinfo from knerex dumped json.
|
|
|
|
This function returns json which is compatiable with
|
|
* *_ioinfo.json dumped by compiler
|
|
* then loaded by compiler_v2.load_ioinfo_json.
|
|
|
|
Specification of ioinfo.json
|
|
* key of "input" / "output"
|
|
* values are list of dict.
|
|
* each dict has keys:
|
|
* "bitw": integer
|
|
* "radix": list/array, per channel
|
|
* "scale": list/array, per channel
|
|
* "ch_dim": integer, index to onnx_shape
|
|
* "onnx_shape": list/array, onnx shape
|
|
* "shape": list/array, sim shape
|
|
* "data_format": string, used by data_converter
|
|
* "stride": list/array, used by data_converter
|
|
|
|
TODO:
|
|
only graph in/out fx_info are send out. we could send out every dp fx_info
|
|
"""
|
|
# helper function
|
|
def get_fx_info(d_radix, d_shape, dp_name, i_dp):
|
|
"""Extract fx info of one datapath.
|
|
|
|
Args:
|
|
d_radix (dict): quantization info for this dp
|
|
d_shape (dict): shape info for this dp
|
|
|
|
NOTE: missing "data_format" / "stride"
|
|
"""
|
|
# suppose to be integer
|
|
conv11 = {
|
|
"output_datapath_bitwidth": "bitw",
|
|
}
|
|
# suppose to be list
|
|
conv12 = {
|
|
"output_datapath_radix": "radix",
|
|
"output_scale": "scale",
|
|
}
|
|
# info in SnrShapeInfo. suppose to be list
|
|
conv22 = {
|
|
"onnx_shape": "onnx_shape",
|
|
"hw_shape": "shape",
|
|
}
|
|
|
|
fx_info = {}
|
|
|
|
# d_radix is per op, it may include multi-dp, use i_dp to get it
|
|
try:
|
|
for k, v in conv11.items():
|
|
fx_info[v] = d_radix[k][i_dp]
|
|
for k, v in conv12.items():
|
|
fx_info[v] = np.array(d_radix[k][i_dp])
|
|
except:
|
|
# back-compatible. not multi-output format. toolchain version < 0.23.0
|
|
# assert i_dp == 0
|
|
# TODELETE.
|
|
for k, v in conv11.items():
|
|
fx_info[v] = d_radix[k]
|
|
for k, v in conv12.items():
|
|
fx_info[v] = np.array(d_radix[k])
|
|
dim = len(fx_info[v].shape)
|
|
assert dim == 1, f"Expect {v} to have 1 dimension, but got {dim} shape: {fx_info[v].shape}"
|
|
|
|
# NOTE: take the 0 element for hw_c_in_onnx
|
|
# knerex should give it a int not list
|
|
fx_info["ch_dim"] = d_shape["hw_c_in_onnx"][0]
|
|
|
|
for k, v in conv22.items():
|
|
fx_info[v] = np.array(d_shape[k])
|
|
|
|
fx_info["name"] = clean_name(dp_name)
|
|
fx_info["ndim"] = len(fx_info["shape"])
|
|
|
|
return fx_info
|
|
|
|
# extract shape info
|
|
_, dp2node, _, dp2idx_in_node, _, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
|
|
|
|
ioinfo = {}
|
|
ioinfo["input"] = [get_fx_info(j_fx[dp2node[dp]],
|
|
j_shape["dp_info"][dp],
|
|
dp,
|
|
dp2idx_in_node.get(dp, 0))
|
|
for dp in dp_in]
|
|
ioinfo["output"] = [get_fx_info(j_fx[dp2node[dp]],
|
|
j_shape["dp_info"][dp],
|
|
dp,
|
|
dp2idx_in_node.get(dp, 0))
|
|
for dp in dp_out]
|
|
|
|
return ioinfo
|
|
|
|
|
|
def is_zip_file(file_path):
|
|
"""Judget a zip or not using magic number."""
|
|
with open(file_path, 'rb') as f:
|
|
return f.read(4) == b'PK\x03\x04'
|
|
|
|
|
|
def get_ioinfo_from_bie(
|
|
p_bie, hw_mode, dyn_bin="/workspace/libs/dynasty/run_fix_inference"
|
|
):
|
|
"""Get input/output nodes info from bie.
|
|
|
|
Info includes:
|
|
* input node name with order
|
|
* output node name and shape
|
|
Not included:
|
|
* (NOT) datapath (in+out) fx info: bw, radix, scales per channel
|
|
"""
|
|
# detour for bie2
|
|
if is_zip_file(p_bie):
|
|
return get_ioinfo_from_bie2(p_bie)
|
|
|
|
p_working = pathlib.Path(tempfile.mkdtemp(prefix="unpack_bie_"))
|
|
|
|
cmd = f"{dyn_bin} -m {p_bie} -t 1 -p kl{hw_mode} -e -o {p_working}"
|
|
cp = run_bash_script(cmd)
|
|
assert (
|
|
cp.returncode == 0
|
|
), f"Failed to extract fx info from bie. Return code {cp.returncode}"
|
|
|
|
p_j = p_working / "SnrShapeInfo.json"
|
|
assert p_j.exists(), f"output missing: {p_j}"
|
|
with open(p_j, "r") as f:
|
|
j_shape = json.load(f)
|
|
|
|
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
|
|
# just need graph out datapath shape
|
|
dp_out_shape = {k: shape_info[k] for k in dp_out}
|
|
|
|
# TODO: delete folder p_working
|
|
|
|
# the last one is optional ioinfo.json
|
|
return dp_in, dp_out, dp_out_shape, None
|
|
|
|
|
|
def get_ioinfo_from_bie2(p_bie2):
|
|
"""Parse ioinfo from bie2 format.
|
|
|
|
NOTE:
|
|
should be same output as get_ioinfo_from_bie.
|
|
"""
|
|
js = load_zip_jsons(p_bie2)
|
|
|
|
k = "shape_info.json" # from 0.23.0
|
|
assert k in js, f"NO {k} found in bie {p_bie2}. Found: {js.keys()}"
|
|
j_shape = js[k]
|
|
|
|
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
|
|
# just need graph out datapath shape
|
|
dp_out_shape = {k: shape_info[k] for k in dp_out}
|
|
|
|
# need ioinfo.json for dynasty
|
|
# but optional
|
|
k = "ioinfo.json"
|
|
if k in js:
|
|
ioinfo = js[k]
|
|
else:
|
|
ioinfo = None
|
|
|
|
return dp_in, dp_out, dp_out_shape, ioinfo
|
|
|
|
|
|
def find_input_txt_folder(p_model, pref="knerex_input"):
|
|
"""Find all input folders.
|
|
|
|
The input folders should be:
|
|
|
|
- knerex_input / knerex_input_1 / knerex_input_2 ... (for models with multiple inputs), or
|
|
- simulator_input / simulator_input_1 / simulator_input_2 ... (for models with multiple inputs)
|
|
"""
|
|
lst = []
|
|
|
|
p_in = p_model / "input" / pref
|
|
if p_in.exists():
|
|
lst.append(p_in)
|
|
else:
|
|
return None
|
|
|
|
for i in range(1, 100):
|
|
p_in = p_model / "input" / f"{pref}_{i}"
|
|
if p_in.exists():
|
|
lst.append(p_in)
|
|
else:
|
|
return lst
|
|
else:
|
|
print("should not arrive here")
|
|
return lst
|
|
|
|
|
|
def get_input_txt_list(p_in):
|
|
"""List input txt names in given folder.
|
|
|
|
test_input.txt will be 1st one if exist.
|
|
"""
|
|
|
|
fns = [fn.name for fn in list(p_in.glob("*.txt"))]
|
|
fn_default = "test_input.txt"
|
|
if fn_default in fns:
|
|
# move fn_default to first one
|
|
fns.remove(fn_default)
|
|
return [fn_default] + fns
|
|
else:
|
|
return fns
|
|
|
|
|
|
def need_compress_command_bin(tc_cat, tc_name):
|
|
"""Special mark for some special case."""
|
|
if tc_cat.startswith("m"):
|
|
big_kernels = [
|
|
"bk23x23",
|
|
"bk25x25",
|
|
"bk27x27",
|
|
"bk29x29",
|
|
"bk31x31",
|
|
"bk33x33",
|
|
"bk35x35",
|
|
]
|
|
return any([a in tc_name for a in big_kernels])
|
|
return False
|
|
|
|
|
|
def guess_model_id(s):
|
|
sr = re.compile("model_(\d+)*")
|
|
try:
|
|
return int(sr.findall(s)[0])
|
|
except:
|
|
return 32768
|
|
|
|
|
|
def clean_case_name(x):
|
|
"""Normalize the case names.
|
|
|
|
The case name in final report, may have extra info:
|
|
|
|
- xxx (known bug) // remove space and after
|
|
- model_ddd_xxxxxx_append // remove _xxxxxxx
|
|
"""
|
|
def remove_append(x):
|
|
"""works without space in the name"""
|
|
return x.split(" ")[0]
|
|
|
|
def remove_model_share_commit(x):
|
|
"""if this is a case of model_share with commit number in it"""
|
|
s = re.compile("(model_\d{3})_[\da-f]{6}(_.*)")
|
|
finds = s.findall(x)
|
|
if len(finds) == 1:
|
|
# foud pattern
|
|
return "".join(finds[0])
|
|
else:
|
|
# no change.
|
|
return x
|
|
|
|
return remove_model_share_commit(remove_append(x))
|
|
|
|
|
|
def relative_path(target, origin):
|
|
"""Return path of target relative to origin.
|
|
|
|
NOTE: .resolve() will make symlink to its target. not working in our case
|
|
"""
|
|
# copy from https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
|
|
p_t = pathlib.Path(target)
|
|
p_o = pathlib.Path(origin)
|
|
try:
|
|
return p_t.absolute().relative_to(p_o.absolute())
|
|
except ValueError as e: # target does not start with origin
|
|
# recursion with origin (eventually origin is root so try will succeed)
|
|
return pathlib.Path("..").joinpath(relative_path(target, p_o.parent))
|
|
|
|
|
|
class CustomCP:
|
|
"""A customized cp to return if timeout."""
|
|
def __init__(self, returncode, stdout=None, stderr=None):
|
|
"""Init it this cp."""
|
|
self.returncode = returncode
|
|
self.stdout = stdout
|
|
self.stderr = stderr
|
|
|
|
def __str__(self):
|
|
"""Make this cp printable."""
|
|
return f'Return Code: {self.returncode}, Error Message: {self.stderr}'
|
|
|
|
|
|
def run_bash_script(command, do_echo=False, fail_then_exit=False, timeout=60*60*6):
|
|
"""Kneron wrap on bash commands.
|
|
|
|
Inputs:
|
|
- timeout: how many seconds to run
|
|
"""
|
|
if type(command) is list:
|
|
command = " ".join(command)
|
|
|
|
try:
|
|
cp = subprocess.run(
|
|
command,
|
|
shell=True,
|
|
executable="/bin/bash",
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=timeout
|
|
)
|
|
except subprocess.TimeoutExpired:
|
|
cp = CustomCP(111, stdout=f"Command: {command}", stderr=f"TIMEOUT ({timeout}s)")
|
|
|
|
if do_echo or (cp.returncode != 0 and DEBUG):
|
|
print("-------------------------------------------------------------")
|
|
print(f"running command: {command}")
|
|
if cp.returncode != 0:
|
|
print(f"Failed at {cp.returncode}")
|
|
print("-------------------------------------------------------------")
|
|
print(cp.stdout)
|
|
print(cp.stderr)
|
|
print("-------------------------------------------------------------")
|
|
|
|
if cp.returncode != 0 and fail_then_exit:
|
|
print(f"Failed to run {command}. Exit code: {cp.returncode}")
|
|
print(f"Will not continue. exit.")
|
|
sys.exit(1)
|
|
return cp
|
|
|
|
|
|
def check_parallel_log(fn_log):
|
|
"""Check the parallel log file.
|
|
|
|
NOTE: sometime dynasty killed by segment fault however return code is 0.
|
|
We need to make both are zero.
|
|
"""
|
|
exitvals = []
|
|
signals = []
|
|
|
|
with open(fn_log, 'r') as file:
|
|
for i, line in enumerate(file):
|
|
if i == 0:
|
|
# skiip the headline
|
|
continue
|
|
parts = line.split()
|
|
if len(parts) > 6: # Ensuring the line has enough parts
|
|
try:
|
|
exitvals.append(int(parts[6]))
|
|
signals.append(int(parts[7]))
|
|
except:
|
|
pass
|
|
|
|
e1 = tuple(a for a in exitvals if a != 0)
|
|
e2 = tuple(a for a in signals if a != 0)
|
|
msg = []
|
|
if len(e1) > 0:
|
|
# usually not happen here
|
|
msg.append(f"error: {e1}")
|
|
if len(e2) > 0:
|
|
msg.append(f"signal: {e2}")
|
|
return "//".join(msg), e1, e2
|
|
|
|
|
|
def set_folder_public(p_out):
|
|
"""Make output path readable for basic group."""
|
|
dir_out = pathlib.Path(p_out)
|
|
if not dir_out.exists():
|
|
# in case it had been delete before trigger test_case.__del__
|
|
return
|
|
script = f"""
|
|
chgrp -R 50000 {dir_out}
|
|
chmod 755 {dir_out}
|
|
find {dir_out} -type d -exec chmod 755 {{}} \;
|
|
find {dir_out} -type f -exec chmod 644 {{}} \;
|
|
"""
|
|
|
|
run_bash_script(script, do_echo=False)
|
|
|
|
|
|
def assert_nodes_exists(fn_onnx, node_list):
|
|
"""Kneron solutions may cast some special process on certain nodes, specified by name.
|
|
|
|
use this script to make sure onnx not changed."""
|
|
import onnx
|
|
|
|
o = onnx.load_model(fn_onnx)
|
|
nodes_all = [a.name for a in o.graph.node]
|
|
missing = False
|
|
print("check {} for special nodes {}".format(fn_onnx, node_list))
|
|
for node in node_list:
|
|
if node not in nodes_all:
|
|
print("ERROR: node {} does not exist. check with ALG team".format(node))
|
|
missing = True
|
|
if missing:
|
|
raise FileNotFoundError
|
|
|
|
|
|
def detect_valid_model(dir_in):
|
|
"""Give a path, detect valid models under it"""
|
|
p_base = pathlib.Path(dir_in).resolve()
|
|
onnx = p_base.glob("**/*.origin.onnx")
|
|
|
|
for o in onnx:
|
|
p_model = o.parent.parent
|
|
if is_valid_case(p_model):
|
|
yield p_model
|
|
|
|
|
|
def is_valid_case(dir_case):
|
|
"""Is this a valid test case?
|
|
|
|
XXXXX/input/XXXXX.origin.onnx (or bie)
|
|
XXXXX must be same
|
|
|
|
XXXXX/input/knerex_input must exist
|
|
Not checking the txt in it.
|
|
"""
|
|
p_case = pathlib.Path(dir_case)
|
|
|
|
if not p_case.is_dir():
|
|
# print(f"{p_case} is not dir.")
|
|
# TODO: return extra string
|
|
return False
|
|
|
|
p_origin = p_case / f"input/{p_case.name}.origin.onnx"
|
|
if not p_origin.exists():
|
|
p_origin = p_case / f"input/{p_case.name}.origin.bie"
|
|
if not p_origin.exists():
|
|
return False
|
|
|
|
p_knerex_input = p_case / "input" / "knerex_input"
|
|
if not p_knerex_input.exists():
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
def filter_cases(dir_base, keywords=[]):
|
|
"""Find all test cases in dir_base.
|
|
|
|
filter out bad test cases, e.g., missing input, origin.onnx
|
|
select by only test case with keywords
|
|
output: list of path to test cases
|
|
"""
|
|
excludekeywords = []
|
|
if "-e" in keywords:
|
|
excludekeywords = keywords[keywords.index("-e") + 1 :]
|
|
keywords = keywords[0 : keywords.index("-e")]
|
|
|
|
whitelist = []
|
|
if "-f" in keywords:
|
|
whitefile = keywords[keywords.index("-f") + 1]
|
|
keywords = keywords[0 : keywords.index("-f")]
|
|
with open(whitefile, "r") as f:
|
|
lineList = f.readlines()
|
|
whitelist[:] = [x.strip() for x in lineList if x.strip()]
|
|
# print("linelist=", lineList)
|
|
|
|
# find all second level sub folder
|
|
case_all = list(pathlib.Path(dir_base).glob("*/*"))
|
|
case_all.sort()
|
|
|
|
# ONLY keep cases including ALL keywords.
|
|
# self.logger.debug("search cases using keywords: {}".format(keywords))
|
|
case_selected = [
|
|
a
|
|
for a in case_all
|
|
if all([True if k in str(a.absolute()) else False for k in keywords])
|
|
]
|
|
|
|
if len(excludekeywords):
|
|
case_selected = [
|
|
a
|
|
for a in case_selected
|
|
if all([False if k in str(a.absolute()) else True for k in excludekeywords])
|
|
]
|
|
|
|
if len(whitelist):
|
|
case_selected = [
|
|
a
|
|
for a in case_selected
|
|
if any(
|
|
[
|
|
True if all(s in str(a.absolute()) for s in k.split()) else False
|
|
for k in whitelist
|
|
]
|
|
)
|
|
]
|
|
|
|
case_selected = [a for a in case_selected if is_valid_case(a)]
|
|
|
|
return case_selected, case_all
|
|
|
|
|
|
def filter_failed_cases(case_selected, fn_config, p_report):
|
|
p_config = pathlib.Path(fn_config)
|
|
command = f"pushd {p_report} >> /dev/null && grep {p_config.name} *.info | sort | tail -n 1 | awk -F: '{{ print $1 }}'"
|
|
cp = subprocess.run(
|
|
command,
|
|
shell=True,
|
|
executable="/bin/bash",
|
|
check=False,
|
|
capture_output=True,
|
|
text=True,
|
|
)
|
|
fn_info = cp.stdout.strip()
|
|
if len(fn_info) == 0:
|
|
# found nothing, do nothing
|
|
return case_selected
|
|
|
|
# fn_info should be `run_YYYYMMDD_hhmmss_TAG_regression.info`
|
|
fn_status = fn_info.replace("_regression.info", "_status.csv")
|
|
p_status = pathlib.Path(p_report) / fn_status
|
|
if not p_status.exists():
|
|
# no status found! something wrong. should I look for another one before?
|
|
return case_selected
|
|
|
|
# load status.csv
|
|
try:
|
|
df = pd.read_csv(str(p_status), header=[0, 1], index_col=[0, 1])
|
|
index_success = set(df.loc[df["general"]["Success"] == "✓"].index)
|
|
cases_failed = [
|
|
a for a in case_selected if (a.parent.name, a.name) not in index_success
|
|
]
|
|
return cases_failed
|
|
except Exception as e:
|
|
pp(e)
|
|
return case_selected
|
|
|
|
|
|
def md5sum(filePath):
|
|
"""Check md5sum of a file/folder.
|
|
|
|
Does not support input as python object.
|
|
pikcle the file if need to.
|
|
|
|
we use string() to get object representation,
|
|
but it will not work properly for large list / numpy matrix.
|
|
because python will only print some part of data.
|
|
"""
|
|
assert type(filePath) in [str, pathlib.PosixPath], f"md5sum works on file only, but got {type(filePath)}"
|
|
|
|
def do_exclude(p_f):
|
|
ignore_patterns = ["__pycache__"]
|
|
for ig in ignore_patterns:
|
|
if ig in str(p_f):
|
|
return True
|
|
|
|
return False
|
|
|
|
def md5_update(m, fp):
|
|
# TODO: maybe use OO is better
|
|
with open(fp, "rb") as fh:
|
|
while True:
|
|
data = fh.read(8192)
|
|
if not data:
|
|
break
|
|
m.update(data)
|
|
return m
|
|
|
|
def md5sum_folder(p_folder):
|
|
l1 = list(p_folder.iterdir())
|
|
l2 = [t for t in l1 if not (do_exclude(t)) and not t.is_dir()]
|
|
# sorting is crucial for md5 calculation
|
|
l2.sort(key=lambda a: str(a))
|
|
|
|
m = hashlib.md5()
|
|
for tf in l2:
|
|
m = md5_update(m, tf)
|
|
|
|
return m.hexdigest()
|
|
|
|
p = pathlib.Path(filePath)
|
|
if p.is_file():
|
|
m = hashlib.md5()
|
|
m = md5_update(m, p)
|
|
return m.hexdigest()
|
|
elif p.is_dir():
|
|
return md5sum_folder(p)
|
|
else:
|
|
raise NotImplementedError(f"{p} is nor file neither folder. Check existence!")
|
|
|
|
|
|
def list2chunks(lst, k):
|
|
"""Yield successive k chunks from lst."""
|
|
n = math.ceil(len(lst) / k)
|
|
for i in range(0, len(lst), n):
|
|
yield lst[i : i + n]
|
|
|
|
|
|
def let_user_pick(options, create_new=False):
|
|
if len(options) == 0:
|
|
if create_new:
|
|
return input("\nInput new message: ")
|
|
else:
|
|
raise AttributeError
|
|
|
|
# if options available, pick one
|
|
if create_new:
|
|
# option to create new one
|
|
options.append("Create new?")
|
|
while True:
|
|
print("Please choose:")
|
|
for idx, element in enumerate(options):
|
|
print("{}) {}".format(idx + 1, element))
|
|
i = input("Enter number: ")
|
|
try:
|
|
ii = int(i) - 1
|
|
if 0 <= ii < len(options):
|
|
if create_new and ii == len(options) - 1:
|
|
# create new
|
|
return input("\nInput new message: ")
|
|
else:
|
|
return options[ii]
|
|
except:
|
|
pass
|
|
|
|
|
|
def create_zip(fn_zip, fns, p_base=None):
|
|
"""Create a zip with give files in base folder.
|
|
|
|
BUG: if diff files with same name in one folder,
|
|
only the last one will be kept.
|
|
"""
|
|
if isinstance(fns, list):
|
|
# no name change. for list of fn come in, just use the original name
|
|
# but need to turn into dict
|
|
fns = [pathlib.Path(fn) for fn in fns if fn]
|
|
fns = {p.name: p for p in fns}
|
|
assert isinstance(fns, dict), f"parameter fns must be list or dict. but got {type(fns)}"
|
|
|
|
with zipfile.ZipFile(fn_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
|
for new_name, fn in fns.items():
|
|
pf = pathlib.Path(fn)
|
|
if not pf.exists():
|
|
continue
|
|
|
|
if p_base is None:
|
|
arcname = new_name
|
|
else:
|
|
pf2 = pf.parent / new_name
|
|
arcname = str(pf2.relative_to(p_base))
|
|
zf.write(filename=str(pf), arcname=arcname)
|
|
|
|
|
|
def dict2json(d, fn_json):
|
|
"""Customize function to save dict to json file.
|
|
|
|
TODO:
|
|
merge similar functions.
|
|
"""
|
|
with open(fn_json, "w") as f:
|
|
json.dump(d, f, indent=4, sort_keys=True)
|
|
|
|
|
|
def load_regression_json(fn_json):
|
|
def convert_key(k):
|
|
d = {str(plt): plt for plt in MODE_HARDWARE}
|
|
return d.get(k, k)
|
|
|
|
def jsonKeys2int(x):
|
|
# refer to https://stackoverflow.com/questions/1450957/pythons-json-module-converts-int-dictionary-keys-to-strings
|
|
if isinstance(x, dict):
|
|
return {convert_key(k): v for k, v in x.items()}
|
|
return x
|
|
|
|
with open(fn_json, "r") as f:
|
|
j = json.load(f, object_hook=jsonKeys2int)
|
|
|
|
return j
|
|
|
|
|
|
def clean_name(this_name):
|
|
"""remove special charaters from given string.
|
|
|
|
Some node names with slash, example: batch_normalization_9/gamma:0_o0
|
|
Make this a function that will all conversion will be same.
|
|
"""
|
|
return this_name.replace("/", "_")
|
|
|
|
|
|
def remove_appendix(this_name):
|
|
return (
|
|
this_name.strip(".onnx")
|
|
.strip(".bie")
|
|
.strip(".origin")
|
|
.strip(".decomposed")
|
|
.strip(".wqbi")
|
|
.strip(".quan")
|
|
.strip(".scaled")
|
|
)
|
|
|
|
|
|
def get_switch_value(this_map, this_key, default):
|
|
if this_key in this_map:
|
|
return this_map[this_key]
|
|
else:
|
|
return default
|
|
|
|
|
|
def set_default(this_map, this_key, this_value):
|
|
if this_key not in this_map:
|
|
this_map[this_key] = this_value
|
|
|
|
|
|
def create_logger(module_name, fn_log=None, level="WARNING"):
|
|
logger = logging.getLogger(module_name)
|
|
levels = {
|
|
"CRITICAL": logging.CRITICAL,
|
|
"ERROR": logging.ERROR,
|
|
"WARNING": logging.WARNING,
|
|
"INFO": logging.INFO,
|
|
"DEBUG": logging.DEBUG,
|
|
}
|
|
logger.setLevel(levels.get(level.upper(), logging.WARNING))
|
|
|
|
if logger.hasHandlers():
|
|
return logger
|
|
|
|
# create formatter
|
|
formatter = logging.Formatter(
|
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
|
)
|
|
|
|
# create console handler and set level to debug
|
|
ch = logging.StreamHandler()
|
|
ch.setLevel(logging.DEBUG)
|
|
# add formatter to ch
|
|
ch.setFormatter(formatter)
|
|
logger.addHandler(ch)
|
|
|
|
if fn_log:
|
|
fh = logging.FileHandler(fn_log)
|
|
fh.setFormatter(formatter)
|
|
logger.addHandler(fh)
|
|
|
|
return logger
|
|
|
|
|
|
def patch_batch_column(cols):
|
|
"""patch r2 for NG.
|
|
old/r2 has columns of 8
|
|
NG has columns of 9
|
|
"""
|
|
if len(cols) == 9:
|
|
"""for ng
|
|
i,0,input_1_o0,1,1,28,28,16W1C8B,4,16W1C8B,4
|
|
o,0,conv2d_4_o0,1,1,1,1,16W1C8B,4,,
|
|
"""
|
|
|
|
# NOTE: bchw.
|
|
# convert to interger?
|
|
return cols
|
|
|
|
elif len(cols) == 4:
|
|
# for 520, the very old format
|
|
"""
|
|
i,0,input.1,3,112,112
|
|
o,0,806,256,1,1
|
|
o,1,847,256,1,1
|
|
"""
|
|
return cols
|
|
|
|
elif len(cols) == 8:
|
|
cols.insert(1, "")
|
|
return cols
|
|
|
|
else:
|
|
pp(f"info columns must be 4, 8 or 9. but got {len(cols)} ({cols})")
|
|
raise AttributeError
|
|
|
|
|
|
def get_git_info(git_path):
|
|
"""get git info out of a single git repo
|
|
|
|
NOTE: not working submodule
|
|
"""
|
|
info = {}
|
|
|
|
# sanity check.
|
|
p_git = pathlib.Path(git_path)
|
|
if not p_git.exists():
|
|
info["error"] = "directory not exists"
|
|
return info
|
|
if not (p_git / ".git").exists():
|
|
# BUG: the git will search from this path up to / until found .git .
|
|
# currently I expect to send exactly path of repo, aka, NONE of the subpath
|
|
# this part may be removed.
|
|
info["error"] = "{} is not a git repo. `.git` not found.".format(p_git)
|
|
return info
|
|
|
|
with working_directory(git_path):
|
|
b = subprocess.run(
|
|
["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, text=True
|
|
)
|
|
assert b.returncode == 0, "git command failed."
|
|
info["commit"] = b.stdout.strip()
|
|
|
|
b = subprocess.run(
|
|
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
|
stdout=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
assert b.returncode == 0, "git command failed."
|
|
info["branch"] = b.stdout.strip()
|
|
|
|
b = subprocess.run(
|
|
["git", "config", "--get", "remote.origin.url"],
|
|
stdout=subprocess.PIPE,
|
|
text=True,
|
|
)
|
|
if b.returncode == 0:
|
|
info["remote_url"] = b.stdout.strip()
|
|
else:
|
|
info["remote_url"] = "N/A"
|
|
|
|
info["path"] = str(git_path)
|
|
|
|
return info
|
|
|
|
|
|
def find_branch(model_id, commit, dir_base="/opt/data/e2e_simulator/app"):
|
|
"""get branch info from local repo folder
|
|
for fx model release
|
|
"""
|
|
p_base = pathlib.Path(dir_base)
|
|
assert p_base.exists(), f"{p_base} does not exists."
|
|
|
|
print("check model_{}".format(model_id))
|
|
print(
|
|
" * commit graph: http://192.168.200.1:8088/modelshare/model_{}/-/network/master".format(
|
|
model_id
|
|
)
|
|
)
|
|
|
|
models = list(p_base.glob("*/models/model_{}".format(model_id)))
|
|
if len(models) == 0:
|
|
print("ERROR: cannot found model_{} from any app.".format(model_id))
|
|
|
|
p_model = models[0]
|
|
with working_directory(p_model):
|
|
cmd = ["git", "branch", "-r", "--contains", commit]
|
|
b = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
|
|
if b.returncode == 0:
|
|
o = b.stdout
|
|
print(o)
|
|
else:
|
|
o = None
|
|
|
|
return o, models[0]
|
|
|
|
|
|
def get_model_info(git_path):
|
|
"""model are submodules in app"""
|
|
|
|
info = get_git_info(git_path)
|
|
|
|
onnxs = list(git_path.glob("alg/*.onnx"))
|
|
if len(onnxs) != 1:
|
|
print("ERROR: There should only be ONE onnx. but found {}".format(len(onnxs)))
|
|
for o in onnxs:
|
|
print(" * {}".format(o.name))
|
|
print(" path: {}".format(git_path))
|
|
assert False, "Found too many onnx"
|
|
|
|
info["onnx"] = str(onnxs[0].relative_to(git_path))
|
|
return info
|
|
|
|
|
|
def get_app_info(app_path, fn_json=None):
|
|
"""get git info for kneron released apps.
|
|
|
|
The apps are listed here: http://192.168.200.1:8088/modelshare?utf8=%E2%9C%93&filter=solution_
|
|
"""
|
|
ginfo = {}
|
|
ginfo["app"] = get_git_info(app_path)
|
|
ginfo["models"] = {}
|
|
|
|
p_app = pathlib.Path(app_path)
|
|
|
|
models = p_app.glob("models/*")
|
|
for m in models:
|
|
ginfo["models"][m.name] = get_model_info(m)
|
|
|
|
# if given file name to save
|
|
if fn_json is not None:
|
|
with open(fn_json, "w") as f:
|
|
json.dump(ginfo, f, indent=4, sort_keys=True)
|
|
|
|
return ginfo
|
|
|
|
|
|
def create_noise_input_folder(dir_from, sigma_levels, override=False):
|
|
# currently only add noise to simulator_input / simulator_input_1 / simulator_input_2
|
|
assert "simulator_input" in str(dir_from), "No input/simulator_input folder."
|
|
|
|
p_from = pathlib.Path(dir_from)
|
|
|
|
fn_froms = p_from.glob("*.txt")
|
|
ori_s = {f.name: load_txt(str(f)) for f in fn_froms}
|
|
|
|
con = np.concatenate(list(ori_s.values()))
|
|
assert len(con.shape) == 1, "found more than 1 shape for noise input."
|
|
i_range = np.max(con) - np.min(con)
|
|
|
|
# NOTE: sigma_level = 3 => sigma 1, 1 pixel
|
|
for sigma_level in sigma_levels:
|
|
dir_name = p_from.name.replace(
|
|
"simulator_input", "simulator_input_sigma{}".format(sigma_level)
|
|
)
|
|
p_to = p_from.parent / dir_name
|
|
if p_to.exists():
|
|
if override:
|
|
shutil.rmtree(str(p_to))
|
|
else:
|
|
continue
|
|
p_to.mkdir(mode=0o770, parents=True, exist_ok=True)
|
|
|
|
print("Create noise input for sigma {}".format(sigma_level))
|
|
sigma_control = 3
|
|
bit_range = 256 # for 8bit
|
|
n_pixel = list(ori_s.values())[0].shape
|
|
sigma = i_range * sigma_level / (bit_range * sigma_control)
|
|
|
|
for k1, v1 in ori_s.items():
|
|
k2 = p_to / k1
|
|
noise = np.random.normal(loc=0, scale=sigma, size=n_pixel)
|
|
v2 = v1 + noise
|
|
np.savetxt(str(k2), v2, fmt="%.10f")
|
|
|
|
|
|
def npy2txt(np_txt: dict, input_names: list, p_input):
|
|
"""Save numpy file to txt files
|
|
|
|
np_txt is map, key is input node names,
|
|
value: numpy array of input, 3D (HWC) or 4D (BHWC)
|
|
|
|
input_names are input node names, get from onnx. it should be same as np_txt.key()
|
|
|
|
p_input is where to save the knerex_input+simulator_input folders
|
|
|
|
"""
|
|
# save texts.
|
|
n_inputs = len(input_names)
|
|
knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
|
|
knerex_inputs[0] = "knerex_input"
|
|
simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
|
|
simulator_inputs[0] = "simulator_input"
|
|
|
|
list_inputs = []
|
|
for i_in in range(len(input_names)):
|
|
# for multiple inputs
|
|
np_in_s = np_txt[input_names[i_in]]
|
|
dir_in = p_input / knerex_inputs[i_in]
|
|
dir_simu = p_input / simulator_inputs[i_in]
|
|
list_inputs.append(dir_simu)
|
|
dir_in.mkdir(parents=True, exist_ok=True)
|
|
safe_link(dir_in, dir_simu, relative=True)
|
|
|
|
dim_in = np_in_s[0].shape
|
|
total_size = np.prod(dim_in)
|
|
|
|
for i_image, np_image in enumerate(np_in_s):
|
|
dim_this = np_image.shape
|
|
assert (
|
|
dim_in == dim_this
|
|
), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
|
|
p_txt = dir_in / "in_{:04d}.txt".format(i_image)
|
|
# NOTE: we assume the np_images is onnx shaped.
|
|
# WARNING: previous version we require channel last
|
|
np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")
|
|
|
|
# link a "test_input.txt"
|
|
link_test_input_txt(list_inputs)
|
|
|
|
def solution_npy2txt(np_txt, input_names, p_input, file_name, as_test_input = False):
|
|
"""save numpy file to txt files
|
|
|
|
np_txt is map, key is input node names,
|
|
value: numpy array of input, 3D (HWC) or 4D (BHWC)
|
|
|
|
input_names are input node names, get from onnx. it should be same as np_txt.key()
|
|
|
|
p_input is where to save the knerex_input+simulator_input folders
|
|
|
|
"""
|
|
# save texts.
|
|
n_inputs = len(input_names)
|
|
knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
|
|
knerex_inputs[0] = "knerex_input"
|
|
simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
|
|
simulator_inputs[0] = "simulator_input"
|
|
|
|
list_inputs = []
|
|
for i_in in range(len(input_names)):
|
|
# for multiple inputs
|
|
np_in_s = np_txt[input_names[i_in]]
|
|
dir_in = p_input / knerex_inputs[i_in]
|
|
dir_simu = p_input / simulator_inputs[i_in]
|
|
list_inputs.append(dir_simu)
|
|
dir_in.mkdir(parents=True, exist_ok=True)
|
|
safe_link(dir_in, dir_simu, relative=True)
|
|
|
|
dim_in = np_in_s[0].shape
|
|
total_size = np.prod(dim_in)
|
|
|
|
for i_image, np_image in enumerate(np_in_s):
|
|
dim_this = np_image.shape
|
|
assert (
|
|
dim_in == dim_this
|
|
), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
|
|
p_txt = dir_in / "{}.txt".format(file_name)
|
|
# NOTE: we assume the np_images is onnx shaped.
|
|
# WARNING: previous version we require channel last
|
|
np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")
|
|
|
|
# link a "test_input.txt"
|
|
if as_test_input:
|
|
link_test_input_txt(list_inputs)
|
|
|
|
|
|
def link_test_input_txt(list_inputs, fn_txt="test_input.txt"):
|
|
list_p = [pathlib.Path(p) for p in list_inputs]
|
|
list_test_input = [p / fn_txt for p in list_p]
|
|
list_test_exist = [p for p in list_test_input if p.exists()]
|
|
if len(list_test_exist) == len(list_inputs):
|
|
# print("{} exists for all input folders.".format(fn_txt))
|
|
pass
|
|
elif len(list_test_exist) == 0:
|
|
# print("link {} in {}".format(fn_txt, list_p))
|
|
fn_all = sorted(list(list_p[0].glob("*.txt")))
|
|
fn_pick = fn_all[0].name
|
|
# same file name must exist in all input folder
|
|
assert all(
|
|
[(p / fn_pick).exists() for p in list_p]
|
|
), "Not all input folders linked."
|
|
for p in list_p:
|
|
p_from = p / fn_pick
|
|
safe_link(p_from, p / fn_txt)
|
|
else:
|
|
n_missing = len(list_inputs) - len(list_test_exist)
|
|
print(
|
|
"ERROR: Found input folders: {}, but {}/{} missing {}. {}".format(
|
|
list_inputs, n_missing, len(list_inputs), fn_txt, list_test_exist
|
|
)
|
|
)
|
|
raise FileNotFoundError
|
|
|
|
|
|
def safe_link(fn_from, fn_to, relative=True, delete_exists=True):
|
|
"""create a link from `fn_from` to `fn_to`.
|
|
|
|
* if the target exist already, delete the target then link.
|
|
"""
|
|
|
|
f_from = pathlib.Path(fn_from)
|
|
f_to = pathlib.Path(fn_to)
|
|
|
|
assert f_from.exists(), f"source file/dir {f_from} does not exists."
|
|
|
|
if f_to.is_symlink():
|
|
# TODO: missing_ok=False from 3.8
|
|
f_to.unlink()
|
|
if f_to.exists() and delete_exists:
|
|
shutil.rmtree(f_to)
|
|
if relative:
|
|
f_to.symlink_to(os.path.relpath(f_from, f_to.parent))
|
|
else:
|
|
f_to.symlink_to(f_from.absolute())
|
|
|
|
|
|
def estimate_mem_available():
|
|
p_info = pathlib.Path("/proc/meminfo")
|
|
|
|
def parse_entry(s):
|
|
a, b = s.strip().split(":")
|
|
return a.strip(), b.strip(" kB").strip()
|
|
|
|
with open(p_info, "r") as f:
|
|
lines = f.readlines()
|
|
meminfo = {}
|
|
for line in lines:
|
|
k, v = parse_entry(line)
|
|
meminfo[k] = v
|
|
|
|
mems_kB = [int(meminfo[k]) for k in ["MemAvailable", "SwapFree"]]
|
|
return sum(mems_kB)
|
|
|
|
|
|
def expand_array(v, n):
|
|
"""Expand scalar to array. """
|
|
if isinstance(v, (collections.abc.Sequence, np.ndarray)):
|
|
# is vector / array, make sure correct length
|
|
assert len(v) == n, f"Expect {v} to have length {n} but got {len(v)}"
|
|
return np.array(v)
|
|
else:
|
|
# is scalar, expand it to array
|
|
return np.ones(n)*v
|
|
|
|
|
|
def gen_random_string(length):
|
|
"""Generate random string use less resource."""
|
|
alphabet = string.ascii_letters + string.digits
|
|
random_string = ''.join(secrets.choice(alphabet) for _ in range(length))
|
|
return random_string
|
|
|
|
|
|
def chunker(seq, size):
|
|
"""Cut long list into small lists.
|
|
|
|
from https://stackoverflow.com/questions/434287/how-to-iterate-over-a-list-in-chunks
|
|
"""
|
|
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
|
|
|
|
|
@contextlib.contextmanager
|
|
def working_directory(path):
|
|
"""
|
|
Changes working directory and returns to previous on exit.
|
|
|
|
link: https://stackoverflow.com/questions/41742317/how-can-i-change-directory-with-python-pathlib
|
|
"""
|
|
prev_cwd = pathlib.Path.cwd()
|
|
|
|
# create if not exist
|
|
p = pathlib.Path(path)
|
|
p.mkdir(mode=0o770, parents=True, exist_ok=True)
|
|
os.chdir(str(p))
|
|
|
|
try:
|
|
yield
|
|
finally:
|
|
os.chdir(prev_cwd)
|
|
|
|
|
|
def pprint_dict(ld):
|
|
"""Convert dict to string then put into report."""
|
|
d = defaultdict(set)
|
|
for d1 in ld:
|
|
for k, v in d1.items():
|
|
d[v].add(k)
|
|
|
|
if len(d) == 0:
|
|
return ""
|
|
elif len(d) == 1:
|
|
return list(d.keys())[0]
|
|
else:
|
|
s2 = [f"""{k}:{",".join(v)}""" for k, v in d.items()]
|
|
return " \\ ".join(s2)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
pass
|