2237 lines
74 KiB
Python
2237 lines
74 KiB
Python
#! /usr/bin/env python3
|
||
|
||
import os
|
||
import sys
|
||
import subprocess
|
||
import logging
|
||
from pathlib import Path, PosixPath
|
||
import json
|
||
import contextlib
|
||
import re
|
||
|
||
import zipfile
|
||
import lzma
|
||
import gzip
|
||
|
||
import pickle
|
||
import math
|
||
import struct
|
||
import hashlib
|
||
import shutil
|
||
import collections.abc
|
||
from collections import defaultdict
|
||
import string
|
||
import secrets
|
||
import tempfile
|
||
import itertools
|
||
from datetime import datetime
|
||
from bs4 import BeautifulSoup
|
||
|
||
import numpy as np
|
||
import pandas as pd
|
||
|
||
from concurrent.futures import ProcessPoolExecutor
|
||
|
||
from sys_flow_v2.flow_constants import MODE_HARDWARE
|
||
from sys_flow_v2.onnx_op_stats import onnx_info
|
||
from sys_flow_v2.util_lib import load_zip_jsons
|
||
|
||
import snoop
|
||
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
|
||
snoop.install(enabled=DEBUG)
|
||
|
||
if DEBUG:
|
||
from IPython.terminal import embed as emb
|
||
|
||
terminal = emb.InteractiveShellEmbed()
|
||
terminal.extension_manager.load_extension("autoreload")
|
||
terminal.run_line_magic("autoreload", "2")
|
||
embed = terminal.mainloop
|
||
else:
|
||
embed = lambda: None
|
||
|
||
|
||
# functions on loading text file
|
||
def twos_comp(val, bits):
|
||
"""Compute the 2's complement of int value val."""
|
||
# if sign bit is set e.g., 8bit: 128-255
|
||
if (val & (1 << (bits - 1))) != 0:
|
||
# compute negative value
|
||
val = (1 << bits) + val
|
||
# return positive value as is
|
||
return val
|
||
|
||
|
||
def array_le2flt(arr, n_flt: int, n_byte: int = 4):
|
||
"""Convert arry of 4 elements of unsigned integer (little endian) to float.
|
||
|
||
INPUT:
|
||
* n_flt: how many float number to extract
|
||
* n_byte: how many byte to convert to 1 float.
|
||
|
||
Used to convert scale to float.
|
||
"""
|
||
return struct.unpack(f"{n_flt}f", struct.pack(f"<{n_flt*n_byte}B", *arr))
|
||
|
||
|
||
def intle2flt(i):
|
||
packed = struct.pack('<I', i)
|
||
return struct.unpack('<f', packed)[0]
|
||
|
||
|
||
array_intle2flt = np.vectorize(intle2flt)
|
||
|
||
|
||
def load_txt(filename):
|
||
"""Load dynasty dump text (float data) as flattened data."""
|
||
return np.array(pd.read_csv(filename, names=["fx"])["fx"], dtype=np.float32)
|
||
|
||
|
||
def verify_txt_suffix(fn):
|
||
"""Verify text file by suffix."""
|
||
if DEBUG:
|
||
suffix = [".txt", ".seq"]
|
||
is_txt = any([fn.endswith(a) for a in suffix])
|
||
if not is_txt:
|
||
raise ValueError(f"only support `.npy`/`.txt`/`.seq`. but got {fn}")
|
||
|
||
|
||
def txt2np_fl(filename, shape):
|
||
"""Load dynasty dumped text (float data) into numpy with given shape."""
|
||
fn = Path(filename).name
|
||
if fn.endswith(".npy"):
|
||
# TODO: maybe check shape
|
||
return np.load(filename).reshape(shape)
|
||
else:
|
||
# treat as txt
|
||
verify_txt_suffix(fn)
|
||
return np.array(
|
||
pd.read_csv(filename, names=["data"])["data"], dtype=np.float32
|
||
).reshape(shape)
|
||
|
||
|
||
def txt2np_fx(filename, shape):
|
||
"""Load dynasty dumped text (fix point data) into numpy with given shape."""
|
||
fn = Path(filename).name
|
||
if fn.endswith(".npy"):
|
||
# TODO: maybe check shape
|
||
return np.load(filename)
|
||
else:
|
||
verify_txt_suffix(fn)
|
||
return np.array(
|
||
pd.read_csv(filename, names=["data"])["data"], dtype=np.int32
|
||
).reshape(shape)
|
||
|
||
|
||
def df2pkl(df, fn):
|
||
"""Dump python object to a lzma compressed pickle file.
|
||
|
||
fn is suggested to end with .pkl.xz
|
||
"""
|
||
with lzma.open(fn, 'wb') as f:
|
||
pickle.dump(df, f)
|
||
|
||
|
||
def pkl2df(fn):
|
||
"""Load python object from a lzma compressed pickle file."""
|
||
with lzma.open(fn, 'rb') as f:
|
||
df = pickle.load(f)
|
||
return df
|
||
|
||
|
||
def dir2pkl(p_dir):
|
||
p_input = Path(p_dir)
|
||
p_xz_s = list(p_input.glob("*_inputs/*.xz"))
|
||
print(f"Found {len(p_xz_s)} xz files")
|
||
if len(p_xz_s) == 0:
|
||
print(f"ERROR: found 0 xz file in {p_dir}")
|
||
return
|
||
|
||
d_xz = [pkl2df(a) for a in p_xz_s]
|
||
|
||
dp_ins = set(d_xz[0].keys())
|
||
|
||
assert all(set(d.keys()) == dp_ins for d in d_xz), "xy have different input keys."
|
||
|
||
np_in = {}
|
||
for k_in in dp_ins:
|
||
# there is only one in solution dumped xy file.
|
||
np_in[k_in] = [a[k_in][0] for a in d_xz]
|
||
|
||
# save to working directory
|
||
fn_pkl = f"{p_input.name}.pkl.xz"
|
||
df2pkl(np_in, fn_pkl)
|
||
print(f" np_in dumped to {fn_pkl}")
|
||
|
||
return fn_pkl
|
||
|
||
|
||
# get ioinfo from onnx or bie
|
||
def get_ioinfo_from_onnx(p_onnx):
|
||
"""Get input/output nodes info from onnx.
|
||
|
||
Info includes:
|
||
* input node name with order
|
||
* output node name and shape
|
||
"""
|
||
oinfo = onnx_info(p_onnx)
|
||
input_nodes, output_nodes, _ = oinfo.get_ioinfo()
|
||
out_node_shape = {dp_out: oinfo.dp_shape[dp_out]["dims"] for dp_out in output_nodes}
|
||
# Note: keep same interface for get_ioinfo_from_bie / get_ioinfo_from_bie2
|
||
ioinfo = None
|
||
return input_nodes, output_nodes, out_node_shape, ioinfo
|
||
|
||
|
||
def dp2dyn_dump(dp, graph_in, graph_out, i_loop=None):
|
||
"""Given dynasty dump name for given dp.
|
||
|
||
Some special prefix for model input / output nodes.
|
||
|
||
Some special appendix for nodes in loop.
|
||
"""
|
||
fn_dump = clean_name(dp)
|
||
|
||
if dp in graph_in:
|
||
# if graph input/output, add special prefix
|
||
fn_dump = f"input_{fn_dump}"
|
||
elif dp in graph_out:
|
||
fn_dump = f"output_{fn_dump}"
|
||
else:
|
||
# normal datapath # will not in dp_in
|
||
pass
|
||
|
||
if i_loop is not None:
|
||
fn_dump = f"{fn_dump}_iteration_{i_loop}"
|
||
|
||
return fn_dump
|
||
|
||
|
||
def parse_shape_info(j):
|
||
"""Parse op/dp info from knerex shapeinfo.json.
|
||
|
||
- 0.23.0 name is shape_info.json
|
||
- 0.26.0 name changed to snr_shape_info.json
|
||
compiler provide another shape_info.jso n
|
||
"""
|
||
if "op2dps" in j:
|
||
# knerex from 0.23.0 will give op2dps and ops
|
||
nodes_w_dp = list(j["op2dps"].keys()) # no order.
|
||
|
||
# TODO: use this make sure correct
|
||
# nodes_outnode = [a for a in nodes_w_dp if a.startswith("OutputNode_")]
|
||
|
||
# j["ops"] with order, but need to remove OutputNode
|
||
nodes = [node for node in j["ops"] if node in nodes_w_dp]
|
||
node2dp = j["op2dps"]
|
||
dp2node = {dp: op for op, dps in node2dp.items() for dp in dps}
|
||
dp2idx_in_node = {dp: i_dp
|
||
for op, dps in node2dp.items()
|
||
for i_dp, dp in enumerate(dps)}
|
||
|
||
elif "ops" in j:
|
||
# old way. TODELETE. knerex will only give ops. no op2dps
|
||
nodes = j["ops"]
|
||
|
||
dp2node = {}
|
||
node2dp = {}
|
||
for dp in j["dp_info"]:
|
||
nd = j["dp_info"][dp]["node_name"][0]
|
||
dp2node[dp] = nd
|
||
# multi-output not supported without "op2dps"
|
||
# we assume each node will have one datapath
|
||
assert nd not in node2dp, (
|
||
f"node ({nd}) has dp ({node2dp[nd]}) already. "
|
||
f"Trying to append dp ({dp})."
|
||
"Old knerex format without op2dps info, does not support multi-output."
|
||
)
|
||
# one node may have multi dp outputs
|
||
# but we don't have correct order of dp!!!
|
||
node2dp[nd] = [dp]
|
||
# each dp is always 0th dp for its node.
|
||
dp2idx_in_node = {}
|
||
|
||
# remove OutputNode_*, they are dummy nodes without output datapath
|
||
def is_outputNode(node, node2dp):
|
||
return (node not in node2dp) and node.startswith("OutputNode_")
|
||
nodes = [node for node in nodes if not is_outputNode(node, node2dp)]
|
||
|
||
else:
|
||
raise NotImplementedError("Missing op2dps / ops in shapeinfo.json")
|
||
|
||
# onnx_shape
|
||
dp_shape = {k: tuple(j["dp_info"][k]["onnx_shape"]) for k in dp2node.keys()}
|
||
dp_hw_c = {k: j["dp_info"][k]["hw_c_in_onnx"][0] for k in dp2node.keys()}
|
||
|
||
# get graph in/output dp
|
||
graph_dp_out = j["dp_out"]
|
||
graph_dp_in = j["dp_in"]
|
||
|
||
# get dump and index in graph. (support subgraph)
|
||
dp2dump = dict()
|
||
# index is used to sort snr report
|
||
dp2index = dict()
|
||
subgraph = j.get("subgraph", None)
|
||
for i_op, op in enumerate(nodes):
|
||
# NOTE: we can get OPs in straightened order.
|
||
# each OP may have multiple DPs
|
||
dps = node2dp[op]
|
||
|
||
for i_dp, dp in enumerate(dps):
|
||
# some dp may run in loops so dp2dump and dp2index has different index
|
||
# here is main graph, so use "-" placeholder
|
||
dp_index = (dp, "-")
|
||
dp2dump[dp_index] = dp2dyn_dump(dp, graph_dp_in, graph_dp_out)
|
||
idx_2nd = f"o{i_dp}" if len(dps) > 1 else "-"
|
||
dp2index[dp_index] = (i_op, idx_2nd)
|
||
|
||
# check subgraph. some OP may be loop node.
|
||
if subgraph and op in subgraph:
|
||
this_sub = subgraph[op]
|
||
if "op_outs" in this_sub:
|
||
# back compatible for old single-output
|
||
# turn it into tuple
|
||
op_outs = [[t] for t in this_sub["op_outs"]]
|
||
elif "op2dps" in this_sub:
|
||
d2 = this_sub["op2dps"]
|
||
sub_ops = this_sub["ops"]
|
||
op_outs = [d2[sub_op] for sub_op in sub_ops if sub_op in d2]
|
||
else:
|
||
raise NotImplementedError("Missing op2dps / op_outs for "
|
||
f"subgraph {op} in shapeinfo.json")
|
||
|
||
# op_outs is list of list flatten into dps
|
||
sub_outs = list(itertools.chain(*op_outs))
|
||
N_dp = len(sub_outs)
|
||
|
||
n_loop = this_sub["max_count"][0] # why knerx given list here?
|
||
for i_loop in range(n_loop):
|
||
for i_dp, dp_name in enumerate(sub_outs):
|
||
dp_index = (dp_name, i_loop)
|
||
dp2index[dp_index] = (i_op, N_dp * i_loop + i_dp)
|
||
dp2dump[dp_index] = dp2dyn_dump(dp_name,
|
||
graph_dp_in,
|
||
graph_dp_out,
|
||
i_loop=i_loop)
|
||
|
||
return (nodes, dp2node, node2dp, dp2idx_in_node, dp_shape, dp_hw_c,
|
||
dp2index, dp2dump, graph_dp_in, graph_dp_out)
|
||
|
||
|
||
def is_zip_file(file_path):
|
||
"""Judget a zip or not using magic number."""
|
||
with open(file_path, 'rb') as f:
|
||
return f.read(4) == b'PK\x03\x04'
|
||
|
||
|
||
def get_ioinfo_from_bie(
|
||
p_bie, hw_mode, dyn_bin="/workspace/libs_V2/dynasty/run_fix_inference"
|
||
):
|
||
"""Get input/output nodes info from bie.
|
||
|
||
Info includes:
|
||
* input node name with order
|
||
* output node name and shape
|
||
Not included:
|
||
* (NOT) datapath (in+out) fx info: bw, radix, scales per channel
|
||
"""
|
||
# detour for bie2
|
||
if is_zip_file(p_bie):
|
||
return get_ioinfo_from_bie2(p_bie)
|
||
|
||
p_working = Path(tempfile.mkdtemp(prefix="unpack_bie_"))
|
||
|
||
cmd = f"{dyn_bin} -m {p_bie} -t 1 -p kl{hw_mode} -e -o {p_working}"
|
||
cp = run_bash_script(cmd)
|
||
assert (
|
||
cp.returncode == 0
|
||
), f"Failed to extract fx info from bie. Return code {cp.returncode}"
|
||
|
||
p_j = p_working / "SnrShapeInfo.json"
|
||
assert p_j.exists(), f"output missing: {p_j}"
|
||
with open(p_j, "r") as f:
|
||
j_shape = json.load(f)
|
||
|
||
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
|
||
# just need graph out datapath shape
|
||
dp_out_shape = {k: shape_info[k] for k in dp_out}
|
||
|
||
# TODO: delete folder p_working
|
||
|
||
# the last one is optional ioinfo.json
|
||
# only available in 0.24.0 for bit-true-match
|
||
return dp_in, dp_out, dp_out_shape, None
|
||
|
||
|
||
def load_ioinfo_json_and_verify(js, dp_in, dp_out):
|
||
"""Load ioinfo.json (usualy for debug).
|
||
|
||
- dynasty need ioinfo.json/calculation_info.json for dynasty for btm in some SPECIAL case. optional
|
||
- ioinfo.json available in 0.24.0 (regression convert from compiler dump then insert.)
|
||
- calculation_info.json available in 0.26.0 (inserted by compiler)
|
||
|
||
DIFFERENT FORMAT!
|
||
"""
|
||
k = "ioinfo.json"
|
||
if k in js:
|
||
ioinfo = js[k]
|
||
|
||
if DEBUG:
|
||
# do some verify
|
||
set_dp_in2 = set([a["name"] for a in ioinfo["input"]])
|
||
if set_dp_in2 != set(dp_in):
|
||
msg = f"knerex {k} info does not match. given dp_in: {dp_in}, but ioinfo gives: {set_dp_in2}"
|
||
raise ValueError(msg)
|
||
|
||
set_dp_out2 = set([a["name"] for a in ioinfo["output"]])
|
||
if set_dp_out2 != set(dp_out):
|
||
msg = f"knerex {k} info does not match. given dp_out: {dp_out}, but ioinfo gives: {set_dp_out2}"
|
||
raise ValueError(msg)
|
||
else:
|
||
ioinfo = None
|
||
return ioinfo
|
||
|
||
|
||
def extract_outputnode(js):
|
||
js_k = [k for k in js.keys() if k.startswith("OutputNode_")]
|
||
return js_k
|
||
|
||
|
||
def extract_dp2node(js):
|
||
dp2node = {}
|
||
tk = "input_tensor_name" # specified by knerex
|
||
for k in extract_outputnode(js):
|
||
if tk in js[k]:
|
||
nodename = k
|
||
# for now, js[k][tk] is a list of one tensor.
|
||
dpname = "OutputNode_" + js[k][tk][0]
|
||
if nodename != dpname:
|
||
dp2node[dpname] = nodename
|
||
return dp2node
|
||
|
||
|
||
def get_ioinfo_from_radix_info(js, dp_in, dp_out):
|
||
"""Simulate ioinfo from radix_info.json from knerex.
|
||
|
||
TODO: make it compatible with ioinfo.json. refer to parse_setup_json_v2
|
||
"""
|
||
ioinfo = {}
|
||
ioinfo["note"] = "created by get_ioinfo_from_radix_info."
|
||
|
||
k1 = "radix_info.json"
|
||
if k1 in js:
|
||
# NOTE: radix_info.json/OutputNode_NODE_NAME may be different from datapath name.
|
||
# here is the fix
|
||
dp2node = extract_dp2node(js[k1])
|
||
for dp, node in dp2node.items():
|
||
if dp not in js[k1]:
|
||
# knerex give radix_info.json on node name. but regression is looking for datapath name.
|
||
# so copy it.
|
||
js[k1][dp] = js[k1][node]
|
||
|
||
ioinfo["input"] = {k: js[k1][k] for k in dp_in}
|
||
try:
|
||
ioinfo["output"] = {k: js[k1][f"OutputNode_{k}"] for k in dp_out}
|
||
except KeyError:
|
||
lst_output_nodes = extract_outputnode(js[k1])
|
||
raise KeyError(f"""
|
||
output tensor name different from node name. Please use latest toolchain.
|
||
radix_info.json provided output nodes: {lst_output_nodes}
|
||
SnrShapeInfo.json provided output nodes: {list(dp_out)}
|
||
""")
|
||
|
||
k2 = "calculation_info.json"
|
||
if k2 in js:
|
||
ioinfo["calculation_info"] = js[k2]
|
||
|
||
if len(ioinfo) == 1:
|
||
return None
|
||
return ioinfo
|
||
|
||
|
||
def get_ioinfo_from_bie2(p_bie2):
|
||
"""Parse ioinfo from bie2 format.
|
||
|
||
NOTE:
|
||
should be same output as get_ioinfo_from_bie.
|
||
"""
|
||
js = load_zip_jsons(p_bie2)
|
||
|
||
k1 = "shape_info.json" # from 0.23.0
|
||
k2 = "snr_shape_info.json" # from 0.25.0
|
||
if k2 not in js and k1 not in js:
|
||
msg = f"NO {k2} or {k1} found in bie {p_bie2}. Only found: {list(js.keys())}"
|
||
raise FileNotFoundError(msg)
|
||
k = k2 if k2 in js else k1
|
||
_, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(js[k])
|
||
# just need graph out datapath shape
|
||
dp_out_shape = {k: shape_info[k] for k in dp_out}
|
||
|
||
ioinfo = load_ioinfo_json_and_verify(js, dp_in, dp_out)
|
||
|
||
if ioinfo is None:
|
||
ioinfo = get_ioinfo_from_radix_info(js, dp_in, dp_out)
|
||
|
||
return dp_in, dp_out, dp_out_shape, ioinfo
|
||
|
||
|
||
def find_input_txt_folder(p_model, pref="knerex_input"):
|
||
"""Find all input folders.
|
||
|
||
The input folders should be:
|
||
|
||
- knerex_input / knerex_input_1 / knerex_input_2 ... (for models with multiple inputs), or
|
||
- simulator_input / simulator_input_1 / simulator_input_2 ... (for models with multiple inputs)
|
||
"""
|
||
lst = []
|
||
|
||
p_in = p_model / "input" / pref
|
||
if p_in.exists():
|
||
lst.append(p_in)
|
||
else:
|
||
return None
|
||
|
||
for i in range(1, 100):
|
||
p_in = p_model / "input" / f"{pref}_{i}"
|
||
if p_in.exists():
|
||
lst.append(p_in)
|
||
else:
|
||
return lst
|
||
else:
|
||
print("should not arrive here")
|
||
return lst
|
||
|
||
|
||
def get_input_txt_list(p_in):
|
||
"""List input txt names in given folder.
|
||
|
||
test_input.txt will be 1st one if exist.
|
||
"""
|
||
|
||
fns = [fn.name for fn in list(p_in.glob("*.txt"))]
|
||
fn_default = "test_input.txt"
|
||
if fn_default in fns:
|
||
# move fn_default to first one
|
||
fns.remove(fn_default)
|
||
return [fn_default] + fns
|
||
else:
|
||
return fns
|
||
|
||
|
||
def need_compress_command_bin(tc_cat, tc_name):
|
||
"""Special mark for some special case."""
|
||
if tc_cat.startswith("m"):
|
||
big_kernels = [
|
||
"bk23x23",
|
||
"bk25x25",
|
||
"bk27x27",
|
||
"bk29x29",
|
||
"bk31x31",
|
||
"bk33x33",
|
||
"bk35x35",
|
||
]
|
||
return any([a in tc_name for a in big_kernels])
|
||
return False
|
||
|
||
|
||
def guess_model_id(s):
|
||
sr = re.compile("model_(\d+)*")
|
||
try:
|
||
return int(sr.findall(s)[0])
|
||
except:
|
||
return 32768
|
||
|
||
|
||
def clean_case_name(x):
|
||
"""Normalize the case names.
|
||
|
||
The case name in final report, may have extra info:
|
||
|
||
- xxx (known bug) // remove space and after
|
||
- model_ddd_xxxxxx_append // remove _xxxxxxx
|
||
"""
|
||
def remove_append(x):
|
||
"""works without space in the name"""
|
||
return x.split(" ")[0]
|
||
|
||
def remove_model_share_commit(x):
|
||
"""if this is a case of model_share with commit number in it"""
|
||
s = re.compile("(model_\d{3})_[\da-f]{6}(_.*)")
|
||
finds = s.findall(x)
|
||
if len(finds) == 1:
|
||
# foud pattern
|
||
return "".join(finds[0])
|
||
else:
|
||
# no change.
|
||
return x
|
||
|
||
return remove_model_share_commit(remove_append(x))
|
||
|
||
|
||
def relative_path(target, origin):
|
||
"""Return path of target relative to origin.
|
||
|
||
NOTE: .resolve() will make symlink to its target. not working in our case
|
||
"""
|
||
# copy from https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
|
||
p_t = Path(target)
|
||
p_o = Path(origin)
|
||
try:
|
||
return p_t.absolute().relative_to(p_o.absolute())
|
||
except ValueError as e: # target does not start with origin
|
||
# recursion with origin (eventually origin is root so try will succeed)
|
||
return Path("..").joinpath(relative_path(target, p_o.parent))
|
||
|
||
|
||
class CustomCP:
|
||
"""A customized cp to return if timeout."""
|
||
def __init__(self, returncode, stdout=None, stderr=None):
|
||
"""Init it this cp."""
|
||
self.returncode = returncode
|
||
self.stdout = stdout
|
||
self.stderr = stderr
|
||
|
||
def __str__(self):
|
||
"""Make this cp printable."""
|
||
return f'Return Code: {self.returncode}, Error Message: {self.stderr}'
|
||
|
||
|
||
def run_bash_script(command, do_echo=False, fail_then_exit=False, timeout=60*60*2):
|
||
"""Kneron wrap on bash commands.
|
||
|
||
Inputs:
|
||
- timeout: how many seconds to run
|
||
"""
|
||
if type(command) is list:
|
||
command = " ".join(command)
|
||
|
||
try:
|
||
cp = subprocess.run(
|
||
command,
|
||
shell=True,
|
||
executable="/bin/bash",
|
||
check=False,
|
||
capture_output=True,
|
||
text=True,
|
||
timeout=timeout
|
||
)
|
||
except subprocess.TimeoutExpired:
|
||
cp = CustomCP(111, stdout=f"Command: {command}", stderr=f"TIMEOUT ({timeout}s)")
|
||
|
||
if do_echo or (cp.returncode != 0 and DEBUG):
|
||
print("-------------------------------------------------------------")
|
||
print(f"running command: {command}")
|
||
if cp.returncode != 0:
|
||
print(f"Failed at {cp.returncode}")
|
||
print("-------------------------------------------------------------")
|
||
print(cp.stdout)
|
||
print(cp.stderr)
|
||
print("-------------------------------------------------------------")
|
||
|
||
if cp.returncode != 0 and fail_then_exit:
|
||
print(f"Failed to run {command}. Exit code: {cp.returncode}")
|
||
print("Will not continue. exit.")
|
||
sys.exit(1)
|
||
return cp
|
||
|
||
|
||
def check_parallel_log(fn_log):
|
||
"""Check the parallel log file.
|
||
|
||
NOTE: sometime dynasty killed by segment fault however return code is 0.
|
||
We need to make both are zero.
|
||
"""
|
||
exitvals = []
|
||
signals = []
|
||
|
||
with open(fn_log, 'r') as file:
|
||
for i, line in enumerate(file):
|
||
if i == 0:
|
||
# skiip the headline
|
||
continue
|
||
parts = line.split()
|
||
if len(parts) > 6: # Ensuring the line has enough parts
|
||
try:
|
||
exitvals.append(int(parts[6]))
|
||
signals.append(int(parts[7]))
|
||
except:
|
||
pass
|
||
|
||
e1 = tuple(a for a in exitvals if a != 0)
|
||
e2 = tuple(a for a in signals if a != 0)
|
||
msg = []
|
||
if len(e1) > 0:
|
||
# usually not happen here
|
||
msg.append(f"error: {e1}")
|
||
if len(e2) > 0:
|
||
msg.append(f"signal: {e2}")
|
||
return "//".join(msg), e1, e2
|
||
|
||
|
||
def set_folder_public(p_out):
|
||
"""Make output path readable for basic group."""
|
||
dir_out = Path(p_out)
|
||
if not dir_out.exists():
|
||
# in case it had been delete before trigger test_case.__del__
|
||
return
|
||
script = f"""
|
||
chgrp -R 50000 {dir_out}
|
||
chmod 755 {dir_out}
|
||
find {dir_out} -type d -exec chmod 755 {{}} \;
|
||
find {dir_out} -type f -exec chmod 644 {{}} \;
|
||
"""
|
||
|
||
run_bash_script(script, do_echo=False)
|
||
|
||
|
||
def assert_nodes_exists(fn_onnx, node_list):
|
||
"""Kneron solutions may cast some special process on certain nodes, specified by name.
|
||
|
||
use this script to make sure onnx not changed."""
|
||
import onnx
|
||
|
||
o = onnx.load_model(fn_onnx, load_external_data=False)
|
||
nodes_all = [a.name for a in o.graph.node]
|
||
missing = False
|
||
print("check {} for special nodes {}".format(fn_onnx, node_list))
|
||
for node in node_list:
|
||
if node not in nodes_all:
|
||
print("ERROR: node {} does not exist. check with ALG team".format(node))
|
||
missing = True
|
||
if missing:
|
||
raise FileNotFoundError
|
||
|
||
|
||
def detect_valid_model(dir_in):
|
||
"""Give a path, detect valid models under it"""
|
||
p_base = Path(dir_in).resolve()
|
||
onnx = p_base.glob("**/*.origin.onnx")
|
||
|
||
for o in onnx:
|
||
p_model = o.parent.parent
|
||
if is_valid_case(p_model):
|
||
yield p_model
|
||
|
||
|
||
def is_valid_case(dir_case):
|
||
"""Is this a valid test case?
|
||
|
||
XXXXX/input/XXXXX.origin.onnx (or bie)
|
||
XXXXX must be same
|
||
|
||
XXXXX/input/knerex_input must exist
|
||
Not checking the txt in it.
|
||
"""
|
||
p_case = Path(dir_case)
|
||
|
||
if not p_case.is_dir():
|
||
# print(f"{p_case} is not dir.")
|
||
# TODO: return extra string
|
||
return False
|
||
|
||
p_origin = p_case / f"input/{p_case.name}.origin.onnx"
|
||
if not p_origin.exists():
|
||
p_origin = p_case / f"input/{p_case.name}.origin.bie"
|
||
if not p_origin.exists():
|
||
return False
|
||
|
||
p_knerex_input = p_case / "input" / "knerex_input"
|
||
if not p_knerex_input.exists():
|
||
return False
|
||
|
||
return True
|
||
|
||
|
||
def is_success_case(p_case):
|
||
"""Use output/success for quick judge.
|
||
|
||
NOTE: currently only check output/success.
|
||
not output_dongle / etc.
|
||
"""
|
||
p_sign = p_case / "output/success"
|
||
return p_sign.exists()
|
||
|
||
|
||
def filter_cases(dir_base, keywords=[]):
|
||
"""Find all test cases in dir_base.
|
||
|
||
filter out bad test cases, e.g., missing input, origin.onnx
|
||
select by only test case with keywords
|
||
output: list of path to test cases
|
||
"""
|
||
excludekeywords = []
|
||
if "-e" in keywords:
|
||
excludekeywords = keywords[keywords.index("-e") + 1:]
|
||
keywords = keywords[0: keywords.index("-e")]
|
||
|
||
whitelist = []
|
||
if "-f" in keywords:
|
||
whitefile = keywords[keywords.index("-f") + 1]
|
||
keywords = keywords[0: keywords.index("-f")]
|
||
with open(whitefile, "r") as f:
|
||
lineList = f.readlines()
|
||
whitelist[:] = [x.strip() for x in lineList if x.strip()]
|
||
|
||
# find all second level sub folder
|
||
case_all = list(Path(dir_base).glob("*/*"))
|
||
case_all.sort()
|
||
|
||
# ONLY keep cases including ALL keywords.
|
||
# self.logger.debug("search cases using keywords: {}".format(keywords))
|
||
case_selected = [
|
||
a
|
||
for a in case_all
|
||
if all([True if (k in a.name or k in a.parent.name) else False for k in keywords])
|
||
]
|
||
|
||
if len(excludekeywords):
|
||
case_selected = [
|
||
a
|
||
for a in case_selected
|
||
if all([False if (k in a.name or k in a.parent.name) else True for k in excludekeywords])
|
||
]
|
||
|
||
if len(whitelist):
|
||
case_selected = [
|
||
a
|
||
for a in case_selected
|
||
if any(
|
||
[
|
||
True if all((s in a.name or s in a.parent.name) for s in k.split()) else False
|
||
for k in whitelist
|
||
]
|
||
)
|
||
]
|
||
case_selected = [a for a in case_selected if is_valid_case(a)]
|
||
|
||
case_success = [a for a in case_selected if is_success_case(a)]
|
||
case_not_success = [a for a in case_selected if not is_success_case(a)]
|
||
|
||
return case_selected, case_all, case_success, case_not_success
|
||
|
||
|
||
# TODELETE
|
||
# def filter_failed_cases(case_selected, fn_config, p_report):
|
||
# p_config = Path(fn_config)
|
||
# command = f"pushd {p_report} >> /dev/null && grep {p_config.name} *.info | sort | tail -n 1 | awk -F: '{{ print $1 }}'"
|
||
# cp = subprocess.run(
|
||
# command,
|
||
# shell=True,
|
||
# executable="/bin/bash",
|
||
# check=False,
|
||
# capture_output=True,
|
||
# text=True,
|
||
# )
|
||
# fn_info = cp.stdout.strip()
|
||
# if len(fn_info) == 0:
|
||
# # found nothing, do nothing
|
||
# return case_selected
|
||
#
|
||
# # fn_info should be `run_YYYYMMDD_hhmmss_TAG_regression.info`
|
||
# fn_status = fn_info.replace("_regression.info", "_status.csv")
|
||
# p_status = Path(p_report) / fn_status
|
||
# if not p_status.exists():
|
||
# # no status found! something wrong. should I look for another one before?
|
||
# return case_selected
|
||
#
|
||
# # load status.csv
|
||
# try:
|
||
# df = pd.read_csv(str(p_status), header=[0, 1], index_col=[0, 1])
|
||
# index_success = set(df.loc[df["general"]["Success"] == "✓"].index)
|
||
# cases_failed = [
|
||
# a for a in case_selected if (a.parent.name, a.name) not in index_success
|
||
# ]
|
||
# return cases_failed
|
||
# except Exception as e:
|
||
# pp(e)
|
||
# return case_selected
|
||
|
||
|
||
def md5sum(filePath):
|
||
"""Check md5sum of a file/folder.
|
||
|
||
Does not support input as python object.
|
||
pikcle the file if need to.
|
||
|
||
we use string() to get object representation,
|
||
but it will not work properly for large list / numpy matrix.
|
||
because python will only print some part of data.
|
||
"""
|
||
assert type(filePath) in [str, PosixPath], f"md5sum works on file only, but got {type(filePath)}"
|
||
|
||
def do_exclude(p_f):
|
||
ignore_patterns = ["__pycache__"]
|
||
for ig in ignore_patterns:
|
||
if ig in str(p_f):
|
||
return True
|
||
|
||
return False
|
||
|
||
def md5_update(m, fp):
|
||
# TODO: maybe use OO is better
|
||
with open(fp, "rb") as fh:
|
||
while True:
|
||
data = fh.read(8192)
|
||
if not data:
|
||
break
|
||
m.update(data)
|
||
return m
|
||
|
||
def md5sum_folder(p_folder):
|
||
l1 = list(p_folder.iterdir())
|
||
l2 = [t for t in l1 if not (do_exclude(t)) and not t.is_dir()]
|
||
# sorting is crucial for md5 calculation
|
||
l2.sort(key=lambda a: str(a))
|
||
|
||
m = hashlib.md5()
|
||
for tf in l2:
|
||
m = md5_update(m, tf)
|
||
|
||
return m.hexdigest()
|
||
|
||
p = Path(filePath)
|
||
if p.is_file():
|
||
m = hashlib.md5()
|
||
m = md5_update(m, p)
|
||
return m.hexdigest()
|
||
elif p.is_dir():
|
||
return md5sum_folder(p)
|
||
else:
|
||
raise NotImplementedError(f"{p} is nor file neither folder. Check existence!")
|
||
|
||
|
||
def list2chunks(lst, k):
|
||
"""Yield successive k chunks from lst."""
|
||
n = math.ceil(len(lst) / k)
|
||
for i in range(0, len(lst), n):
|
||
yield lst[i : i + n]
|
||
|
||
|
||
def let_user_pick(options, create_new=False):
|
||
if len(options) == 0:
|
||
if create_new:
|
||
return input("\nInput new message: ")
|
||
else:
|
||
raise AttributeError
|
||
|
||
# if options available, pick one
|
||
if create_new:
|
||
# option to create new one
|
||
options.append("Create new?")
|
||
while True:
|
||
print("Please choose:")
|
||
for idx, element in enumerate(options):
|
||
print("{}) {}".format(idx + 1, element))
|
||
i = input("Enter number: ")
|
||
try:
|
||
ii = int(i) - 1
|
||
if 0 <= ii < len(options):
|
||
if create_new and ii == len(options) - 1:
|
||
# create new
|
||
return input("\nInput new message: ")
|
||
else:
|
||
return options[ii]
|
||
except:
|
||
pass
|
||
|
||
|
||
def create_zip(fn_zip, fns, p_base=None):
|
||
"""Create a zip with give files in base folder.
|
||
|
||
BUG: if diff files with same name in one folder,
|
||
only the last one will be kept.
|
||
"""
|
||
if isinstance(fns, list):
|
||
# no name change. for list of fn come in, just use the original name
|
||
# but need to turn into dict
|
||
fns = [Path(fn) for fn in fns if fn]
|
||
fns = {p.name: p for p in fns}
|
||
assert isinstance(fns, dict), f"parameter fns must be list or dict. but got {type(fns)}"
|
||
|
||
with zipfile.ZipFile(fn_zip, "w", zipfile.ZIP_DEFLATED) as zf:
|
||
for new_name, fn in fns.items():
|
||
pf = Path(fn)
|
||
if not pf.exists():
|
||
continue
|
||
|
||
if p_base is None:
|
||
arcname = new_name
|
||
else:
|
||
pf2 = pf.parent / new_name
|
||
arcname = str(pf2.relative_to(p_base))
|
||
zf.write(filename=str(pf), arcname=arcname)
|
||
|
||
|
||
def preprocess_json(json_str):
|
||
# 将 inf 替换为字符串 "Infinity"(JSON5标准支持)
|
||
json_str = re.sub(r':\s*inf\b', ': "Infinity"', json_str, flags=re.IGNORECASE)
|
||
json_str = re.sub(r':\s*-inf\b', ': "-Infinity"', json_str, flags=re.IGNORECASE)
|
||
return json_str
|
||
|
||
|
||
class NumpyEncoder(json.JSONEncoder):
|
||
"""To save numpy array in json.
|
||
|
||
From `numpy array is not json serializable`_ .
|
||
|
||
.. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
|
||
"""
|
||
|
||
def default(self, obj):
|
||
"""Set default way."""
|
||
if isinstance(obj, np.ndarray):
|
||
return obj.tolist()
|
||
return json.JSONEncoder.default(self, obj)
|
||
|
||
|
||
def dict2json(d, fn_json):
|
||
"""Customize function to save dict to json file.
|
||
|
||
TODO:
|
||
merge similar functions.
|
||
"""
|
||
with open(fn_json, "w") as f:
|
||
json.dump(d, f, indent=4, sort_keys=True, cls=NumpyEncoder)
|
||
|
||
|
||
def load_regression_json(fn_json):
|
||
def convert_key(k):
|
||
d = {str(plt): plt for plt in MODE_HARDWARE}
|
||
return d.get(k, k)
|
||
|
||
def jsonKeys2int(x):
|
||
# refer to https://stackoverflow.com/questions/1450957/pythons-json-module-converts-int-dictionary-keys-to-strings
|
||
if isinstance(x, dict):
|
||
return {convert_key(k): v for k, v in x.items()}
|
||
return x
|
||
|
||
with open(fn_json, "r") as f:
|
||
j = json.load(f, object_hook=jsonKeys2int)
|
||
|
||
return j
|
||
|
||
|
||
def clean_name(this_name):
|
||
"""Remove special charaters from given string.
|
||
|
||
Some node names with slash, example: batch_normalization_9/gamma:0_o0
|
||
Make this a function that will all conversion will be same.
|
||
"""
|
||
return this_name.replace("/", "_")
|
||
|
||
|
||
def clean_file_name(this_name):
|
||
"""Clean up given name that can be used as file name.
|
||
|
||
input `this_name` is given by customers.
|
||
It may contains " ", "()", etc.
|
||
|
||
Returns:
|
||
str: 清理后的文件名,移除了空格、括号等特殊字符
|
||
"""
|
||
if not this_name:
|
||
return "model"
|
||
|
||
# 获取文件扩展名
|
||
p_file = Path(this_name)
|
||
suffix = p_file.suffix
|
||
name_without_ext = p_file.stem
|
||
|
||
# 移除或替换可能造成问题的字符
|
||
# 空格替换为下划线
|
||
cleaned_name = name_without_ext.replace(" ", "_")
|
||
|
||
# 对于UTF-8字符,使用更宽松的正则表达式
|
||
# 保留字母数字(包括Unicode字母)、下划线、连字符和点号
|
||
# \w 包含Unicode字母和数字
|
||
cleaned_name = re.sub(r'[^\w._-]', '', cleaned_name, flags=re.UNICODE)
|
||
|
||
# 移除连续的下划线
|
||
cleaned_name = re.sub(r'_+', '_', cleaned_name)
|
||
|
||
# 移除开头和结尾的下划线或点号
|
||
cleaned_name = cleaned_name.strip('_.')
|
||
|
||
# 如果清理后为空,返回默认名称
|
||
if not cleaned_name:
|
||
cleaned_name = "model"
|
||
|
||
# if cleaned_name is too long, give warning then raise error
|
||
if len(cleaned_name) > 200:
|
||
raise ValueError(f"File name {cleaned_name} is too long, truncated to 200 characters.")
|
||
|
||
# 重新组合文件名和扩展名
|
||
return cleaned_name + suffix
|
||
|
||
|
||
def remove_appendix(this_name):
|
||
"""Kneron toolchain generated onnx may have multiple appendix.
|
||
Remove all of them.
|
||
"""
|
||
return (
|
||
this_name.removesuffix(".onnx")
|
||
.removesuffix(".bie")
|
||
.removesuffix(".origin")
|
||
.removesuffix(".decomposed")
|
||
.removesuffix(".wqbi")
|
||
.removesuffix(".quan")
|
||
.removesuffix(".scaled")
|
||
)
|
||
|
||
|
||
def load_np_in(np_in):
|
||
"""加载并验证输入数据。
|
||
|
||
Args:
|
||
np_in: 可以是以下类型之一:
|
||
- dict: 直接返回该字典
|
||
- str或Path: 指向pickle文件的路径,将加载并返回其中的字典数据
|
||
- .pkl: 直接用 pickle 加载
|
||
- .pkl.xz: 用 pkl2df 加载
|
||
|
||
Returns:
|
||
dict: 包含输入数据的字典
|
||
|
||
Raises:
|
||
FileNotFoundError: 当输入文件不存在时
|
||
TypeError: 当输入类型不正确或加载的数据不是字典时
|
||
RuntimeError: 当加载文件失败时
|
||
"""
|
||
if isinstance(np_in, dict):
|
||
return np_in
|
||
|
||
# Convert to Path object if string
|
||
if isinstance(np_in, (str, Path)):
|
||
p_in = Path(np_in)
|
||
if not p_in.exists():
|
||
raise FileNotFoundError(f"Input file does not exist: {p_in}")
|
||
|
||
try:
|
||
if p_in.name.endswith('.pkl.xz'):
|
||
result = pkl2df(p_in)
|
||
elif p_in.name.endswith('.pkl'):
|
||
with open(p_in, 'rb') as f:
|
||
result = pickle.load(f)
|
||
else:
|
||
raise ValueError(f"Unsupported file format: {p_in.name}")
|
||
|
||
if not isinstance(result, dict):
|
||
raise TypeError(f"Loaded object is not a dictionary: {type(result)}")
|
||
return result
|
||
except Exception as e:
|
||
raise RuntimeError(f"Failed to load dictionary from {p_in}: {str(e)}")
|
||
|
||
raise TypeError(f"Input must be a dictionary or file path, got: {type(np_in)}")
|
||
|
||
|
||
def verify_input_shape_onnx_npy(p_onnx, np_txt):
|
||
"""Verify the np_txt to be same shape as p_onnx input."""
|
||
o = onnx_info(p_onnx)
|
||
d_in_shape = o.get_onnx_input_size()
|
||
|
||
# check keys ()
|
||
k1 = set(d_in_shape.keys())
|
||
k2 = set(np_txt.keys())
|
||
assert k1 == k2, f"Onnx specified input nodes: {list(k1)}, but the numpy passed in is {list(k2)}. Please check input numpy data."
|
||
|
||
# check np_txt elements are list and have same length
|
||
all_list = [isinstance(v, list) for k, v in np_txt.items()]
|
||
assert all(all_list), """Not all npy input are lists. The format should be like: {"in1":[np1_1, np1_2], "in2:[np2_1, np2_2]}"""
|
||
|
||
all_len = [len(v) for k, v in np_txt.items()]
|
||
assert len(set(all_len)) == 1, f"""
|
||
The list of input per input node should be same.
|
||
But given list have different lengths: {all_len}."""
|
||
assert all_len[0] > 0, """np_txt got EMPTY list!
|
||
Please check your script, especially the image path."""
|
||
|
||
for k in list(k2):
|
||
o_shape = tuple(d_in_shape[k])
|
||
diff_shape = [tuple(v.shape) for v in np_txt[k] if v.shape != o_shape]
|
||
assert len(diff_shape) == 0, f"""
|
||
Input node ({k}) has shape ({o_shape}),
|
||
but the numpy list has different shapes of: {list(set(diff_shape))}.
|
||
Please check the numpy input.
|
||
"""
|
||
|
||
|
||
# TODELETE: just just dict.get(x, y)
|
||
def get_switch_value(this_map, this_key, default):
|
||
if this_key in this_map:
|
||
return this_map[this_key]
|
||
else:
|
||
return default
|
||
|
||
|
||
def set_default(this_map, this_key, this_value):
|
||
if this_key not in this_map:
|
||
this_map[this_key] = this_value
|
||
|
||
|
||
def create_logger(module_name, fn_log=None, level="WARNING"):
|
||
logger = logging.getLogger(module_name)
|
||
levels = {
|
||
"CRITICAL": logging.CRITICAL,
|
||
"ERROR": logging.ERROR,
|
||
"WARNING": logging.WARNING,
|
||
"INFO": logging.INFO,
|
||
"DEBUG": logging.DEBUG,
|
||
}
|
||
logger.setLevel(levels.get(level.upper(), logging.WARNING))
|
||
|
||
if logger.hasHandlers():
|
||
return logger
|
||
|
||
# create formatter
|
||
formatter = logging.Formatter(
|
||
"%(asctime)s - %(name)s - %(levelname)s - %(message)s"
|
||
)
|
||
|
||
# create console handler and set level to debug
|
||
ch = logging.StreamHandler()
|
||
ch.setLevel(logging.DEBUG)
|
||
# add formatter to ch
|
||
ch.setFormatter(formatter)
|
||
logger.addHandler(ch)
|
||
|
||
if fn_log:
|
||
fh = logging.FileHandler(fn_log)
|
||
fh.setFormatter(formatter)
|
||
logger.addHandler(fh)
|
||
|
||
return logger
|
||
|
||
|
||
def patch_batch_column(cols):
|
||
"""patch r2 for NG.
|
||
old/r2 has columns of 8
|
||
NG has columns of 9
|
||
"""
|
||
if len(cols) == 9:
|
||
"""for ng
|
||
i,0,input_1_o0,1,1,28,28,16W1C8B,4,16W1C8B,4
|
||
o,0,conv2d_4_o0,1,1,1,1,16W1C8B,4,,
|
||
"""
|
||
|
||
# NOTE: bchw.
|
||
# convert to interger?
|
||
return cols
|
||
|
||
elif len(cols) == 4:
|
||
# for 520, the very old format
|
||
"""
|
||
i,0,input.1,3,112,112
|
||
o,0,806,256,1,1
|
||
o,1,847,256,1,1
|
||
"""
|
||
return cols
|
||
|
||
elif len(cols) == 8:
|
||
cols.insert(1, "")
|
||
return cols
|
||
|
||
else:
|
||
pp(f"info columns must be 4, 8 or 9. but got {len(cols)} ({cols})")
|
||
raise AttributeError
|
||
|
||
|
||
def get_git_info(git_path):
|
||
"""get git info out of a single git repo
|
||
|
||
NOTE: not working submodule
|
||
"""
|
||
info = {}
|
||
|
||
# sanity check.
|
||
p_git = Path(git_path)
|
||
if not p_git.exists():
|
||
info["error"] = "directory not exists"
|
||
return info
|
||
if not (p_git / ".git").exists():
|
||
# BUG: the git will search from this path up to / until found .git .
|
||
# currently I expect to send exactly path of repo, aka, NONE of the subpath
|
||
# this part may be removed.
|
||
info["error"] = "{} is not a git repo. `.git` not found.".format(p_git)
|
||
return info
|
||
|
||
with working_directory(git_path):
|
||
b = subprocess.run(
|
||
["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, text=True
|
||
)
|
||
assert b.returncode == 0, "git command failed."
|
||
info["commit"] = b.stdout.strip()
|
||
|
||
b = subprocess.run(
|
||
["git", "rev-parse", "--abbrev-ref", "HEAD"],
|
||
stdout=subprocess.PIPE,
|
||
text=True,
|
||
)
|
||
assert b.returncode == 0, "git command failed."
|
||
info["branch"] = b.stdout.strip()
|
||
|
||
b = subprocess.run(
|
||
["git", "config", "--get", "remote.origin.url"],
|
||
stdout=subprocess.PIPE,
|
||
text=True,
|
||
)
|
||
if b.returncode == 0:
|
||
info["remote_url"] = b.stdout.strip()
|
||
else:
|
||
info["remote_url"] = "N/A"
|
||
|
||
info["path"] = str(git_path)
|
||
|
||
return info
|
||
|
||
|
||
def find_branch(model_id, commit, dir_base="/opt/data/e2e_simulator/app"):
|
||
"""get branch info from local repo folder
|
||
for fx model release
|
||
"""
|
||
p_base = Path(dir_base)
|
||
assert p_base.exists(), f"{p_base} does not exists."
|
||
|
||
print("check model_{}".format(model_id))
|
||
print(
|
||
" * commit graph: http://192.168.200.1:8088/modelshare/model_{}/-/network/master".format(
|
||
model_id
|
||
)
|
||
)
|
||
|
||
models = list(p_base.glob("*/models/model_{}".format(model_id)))
|
||
if len(models) == 0:
|
||
print("ERROR: cannot found model_{} from any app.".format(model_id))
|
||
|
||
p_model = models[0]
|
||
with working_directory(p_model):
|
||
cmd = ["git", "branch", "-r", "--contains", commit]
|
||
b = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
|
||
if b.returncode == 0:
|
||
o = b.stdout
|
||
print(o)
|
||
else:
|
||
o = None
|
||
|
||
return o, models[0]
|
||
|
||
|
||
def get_model_info(git_path):
|
||
"""model are submodules in app"""
|
||
|
||
info = get_git_info(git_path)
|
||
|
||
onnxs = list(git_path.glob("alg/*.onnx"))
|
||
if len(onnxs) != 1:
|
||
print("ERROR: There should only be ONE onnx. but found {}".format(len(onnxs)))
|
||
for o in onnxs:
|
||
print(" * {}".format(o.name))
|
||
print(" path: {}".format(git_path))
|
||
assert False, "Found too many onnx"
|
||
|
||
info["onnx"] = str(onnxs[0].relative_to(git_path))
|
||
return info
|
||
|
||
|
||
def get_app_info(app_path, fn_json=None):
|
||
"""get git info for kneron released apps.
|
||
|
||
The apps are listed here: http://192.168.200.1:8088/modelshare?utf8=%E2%9C%93&filter=solution_
|
||
"""
|
||
ginfo = {}
|
||
ginfo["app"] = get_git_info(app_path)
|
||
ginfo["models"] = {}
|
||
|
||
p_app = Path(app_path)
|
||
|
||
models = p_app.glob("models/*")
|
||
for m in models:
|
||
ginfo["models"][m.name] = get_model_info(m)
|
||
|
||
# if given file name to save
|
||
if fn_json is not None:
|
||
with open(fn_json, "w") as f:
|
||
json.dump(ginfo, f, indent=4, sort_keys=True)
|
||
|
||
return ginfo
|
||
|
||
|
||
def create_noise_input_folder(dir_from, sigma_levels, override=False):
|
||
"""Create noise input to test model robustness.
|
||
|
||
currently only add noise to simulator_input / simulator_input_1 / simulator_input_2
|
||
"""
|
||
assert "simulator_input" in str(dir_from), "No input/simulator_input folder."
|
||
|
||
p_from = Path(dir_from)
|
||
|
||
fn_froms = p_from.glob("*.txt")
|
||
ori_s = {f.name: load_txt(str(f)) for f in fn_froms}
|
||
|
||
con = np.concatenate(list(ori_s.values()))
|
||
assert len(con.shape) == 1, "found more than 1 shape for noise input."
|
||
i_range = np.max(con) - np.min(con)
|
||
|
||
# NOTE: sigma_level = 3 => sigma 1, 1 pixel
|
||
for sigma_level in sigma_levels:
|
||
dir_name = p_from.name.replace(
|
||
"simulator_input", "simulator_input_sigma{}".format(sigma_level)
|
||
)
|
||
p_to = p_from.parent / dir_name
|
||
if p_to.exists():
|
||
if override:
|
||
shutil.rmtree(str(p_to))
|
||
else:
|
||
continue
|
||
p_to.mkdir(mode=0o770, parents=True, exist_ok=True)
|
||
|
||
print("Create noise input for sigma {}".format(sigma_level))
|
||
sigma_control = 3
|
||
bit_range = 256 # for 8bit
|
||
n_pixel = list(ori_s.values())[0].shape
|
||
sigma = i_range * sigma_level / (bit_range * sigma_control)
|
||
|
||
for k1, v1 in ori_s.items():
|
||
k2 = p_to / k1
|
||
noise = np.random.normal(loc=0, scale=sigma, size=n_pixel)
|
||
v2 = v1 + noise
|
||
np.savetxt(str(k2), v2, fmt="%.10f")
|
||
|
||
|
||
def save_array(file_path, array, fmt, compression="npy"):
|
||
"""Save array to txt file, meanwhile with optional compression."""
|
||
# NOTE: we assume the np_images is onnx shaped.
|
||
# WARNING: previous version we require channel last
|
||
if compression == 'gzip':
|
||
with gzip.open(file_path.with_suffix('.txt.gz'), 'wt', compresslevel=4) as f:
|
||
np.savetxt(f, array.ravel(), fmt=fmt)
|
||
elif compression == "npy":
|
||
# NOTE: dynasty float only takes float32 numpy input.
|
||
# does not support integer. but they are note changed here.
|
||
if np.issubdtype(array.dtype, np.floating):
|
||
array = array.astype(np.float32)
|
||
# NOTE: this numpy save to disk to be read by dynasty-float. which expect c-style.
|
||
# but sometime the numpy use fortrune style.
|
||
# np.ravel(order="C") is different from np.ravel(order="F")
|
||
# np.ascontiguousarray is force numpy array to be continues in memory (similar to order="C")
|
||
np.save(file_path.with_suffix('.npy'), np.ascontiguousarray(array))
|
||
else:
|
||
np.savetxt(file_path, array.ravel(), fmt=fmt)
|
||
|
||
|
||
def sanity_check_npy(np_txt, input_names, filename=None, compression="npy", ch_last=False):
|
||
"""Sanity check the inputs. make sure they match each other."""
|
||
# santity check on node names.
|
||
names1 = set(input_names)
|
||
names2 = set(np_txt.keys())
|
||
if names1 != names2:
|
||
raise ValueError(f"ERROR: input name does not match: onnx input ({list(names1)}) vs given np ({list(names2)})")
|
||
|
||
# sanity check np_txt, which is dict of list of numpy array
|
||
n_pairs = {k: len(v) for k, v in np_txt.items()}
|
||
if len(set(n_pairs.values())) > 1:
|
||
raise ValueError(f"np_txt input nodes have DIFFERENT length: {n_pairs}. They must be SAME.")
|
||
|
||
# 如果filename不为None,检查其是否为字符串列表且长度与np_txt中的列表长度相同
|
||
tl2 = list(n_pairs.values())[0]
|
||
if filename is not None:
|
||
if not isinstance(filename, list) or not all(isinstance(f, str) for f in filename):
|
||
raise ValueError("filename should be a list of strings.")
|
||
tl1 = len(filename)
|
||
if tl1 != tl2:
|
||
raise ValueError(f"The length of `filename` ({tl1}) should be the same as the pari length ({tl2}) in np_txt.")
|
||
# NOTE: the filename suffix is not changed.
|
||
|
||
# 检查同一列表中的numpy数组是否具有相同的形状
|
||
for key, lst in np_txt.items():
|
||
shapes = [arr.shape for arr in lst]
|
||
if len(set(shapes)) != 1:
|
||
raise ValueError(f"All npy arr in list '{key}' should have the same shape. Please check `np_txt`.")
|
||
|
||
# NOTE: did not compare the shape against onnx specified shape.
|
||
|
||
# ch_last is for dynasty-float-so inference. which take only txt for now.
|
||
if ch_last and (compression != "txt"):
|
||
raise ValueError(f"dump to channel-last must be `txt` file, but got {compression}.")
|
||
return
|
||
|
||
|
||
def preset_pairname(np_txt, compression="npy"):
|
||
"""Create default filename for input pairs."""
|
||
suffix = "npy" if compression == "npy" else "txt"
|
||
tl2 = [len(v) for k, v in np_txt.items()][0] # lens should be same
|
||
pairname = [f"in_{i:04d}.{suffix}" for i in range(tl2)]
|
||
pairname[0] = f"test_input.{suffix}"
|
||
return pairname
|
||
|
||
|
||
def get_paired_inputs(p_txt_inputs, pair_names=None, suffix="npy", verify_exist=True):
|
||
"""Check multiple INPUT NODES for this MODEL.
|
||
|
||
Give 1st input image name, give a list with whole input set (might be 1 or more.)
|
||
|
||
TODO:
|
||
need refactor into flow_utils
|
||
|
||
Args:
|
||
p_txt_inputs: where txt files exists.
|
||
paire_names: the txt filenames in the first input folder.
|
||
(should be same in other folder.)
|
||
"""
|
||
# if given txt files then use it otherwise search for it
|
||
fns = [str(p) for p in pair_names] if pair_names else sorted([fn.name for fn in list(p_txt_inputs[0].glob(f"*.{suffix}"))])
|
||
|
||
paired_inputs = []
|
||
for fn in fns:
|
||
# find a pair of inputs
|
||
pair = [p / fn for p in p_txt_inputs]
|
||
if verify_exist:
|
||
assert all([f.exists() for f in pair])
|
||
paired_inputs.append(pair)
|
||
return paired_inputs
|
||
|
||
|
||
def convert_to_channel_last(np_1):
|
||
"""Convert a numpy array to channel last.
|
||
|
||
For the call of dynasty float so only.
|
||
|
||
dynasty float / fx will only accept onnx-shape input.
|
||
"""
|
||
input_shape = np_1.shape
|
||
if len(input_shape) > 3:
|
||
# for e2e / app_release. only do channel-last txt if dimension >= 4
|
||
axes = range(len(input_shape))
|
||
axes = [axes[0], *axes[2:], axes[1]]
|
||
np_1 = np.transpose(np_1, axes)
|
||
return np_1
|
||
|
||
|
||
def npy2txt(np_txt: dict, input_names: list, p_input,
|
||
exists_then_skip=False,
|
||
pairname=None,
|
||
compression="npy", # txt or npy
|
||
as_test_input=True,
|
||
ch_last=False,
|
||
n_writer=4,
|
||
default_btm_name="test_input",
|
||
knerex_prefix="knerex_input",
|
||
sim_prefix="simulator_input",
|
||
do_sanity_check=False):
|
||
"""Save numpy file to txt files.
|
||
|
||
np_txt is map, key is input node names,
|
||
value: numpy array of input, 3D (HWC) or 4D (BHWC)
|
||
|
||
input_names are input node names, get from onnx. it should be same as np_txt.key(), but with order specified by onnx. The order matters!
|
||
|
||
p_input is where to save the knerex_input+simulator_input folders.
|
||
usually be `model/input/`.
|
||
If for inference, it could be any path.
|
||
|
||
pairname: optional, to specify what the npy/txt files to dump. Otherwise will use `in_0000.npy` format.
|
||
|
||
ch_last: default False. knerex dynasty float so call need channel-last text file. Keep this option for toolchain/inference_dynasty_so()
|
||
|
||
as_test_input: to keep a `test_input.txt` in input folder.
|
||
"""
|
||
if do_sanity_check or DEBUG:
|
||
sanity_check_npy(np_txt, input_names, pairname, compression=compression, ch_last=ch_last)
|
||
if pairname is None:
|
||
pairname = preset_pairname(np_txt, compression)
|
||
|
||
# prepare texts folders.
|
||
# NOTE: the folder names are fixed. always dump to knerex_input,
|
||
# then link simulator_input to it.
|
||
# So it is ok to call in inference_* series.
|
||
n_inputs = len(input_names) # number of input nodes for this model
|
||
names_knerex_inputs = [f"{knerex_prefix}_{i}" for i in range(n_inputs)]
|
||
names_knerex_inputs[0] = knerex_prefix
|
||
names_simulator_inputs = [f"{sim_prefix}_{i}" for i in range(n_inputs)]
|
||
names_simulator_inputs[0] = sim_prefix
|
||
|
||
n_pairs = [len(v) for k, v in np_txt.items()][0] # number of pair of inputs, each pair has one input per input node
|
||
|
||
p_input = Path(p_input)
|
||
p_knerex_inputs = [p_input / names_knerex_inputs[i] for i in range(len(input_names))]
|
||
p_simu_inputs = [p_input / names_simulator_inputs[i] for i in range(len(input_names))]
|
||
|
||
do_dump = True
|
||
if exists_then_skip:
|
||
all_knerex_input_exist = all([p.exists() for p in p_knerex_inputs])
|
||
if all_knerex_input_exist:
|
||
print(f"\n\nWARNING: knerex inputs exists alreay! skip dump to disk. If you need to dump, please remove: {p_knerex_inputs} . or set `exsits_then_skip` to False.\n\n")
|
||
do_dump = False
|
||
|
||
if do_dump:
|
||
if n_pairs > 1 and n_writer > 1:
|
||
# write to disk at with multi-process
|
||
with ProcessPoolExecutor(max_workers=n_writer) as executor:
|
||
futures = []
|
||
for i_in, name in enumerate(input_names):
|
||
# prepare folder for this input node
|
||
dir_in = p_knerex_inputs[i_in]
|
||
dir_in.mkdir(parents=True, exist_ok=True)
|
||
# for corresponding simulator input
|
||
safe_link(dir_in, p_simu_inputs[i_in], relative=True)
|
||
|
||
np_in_s = np_txt[name]
|
||
for i_image, np_image in enumerate(np_in_s):
|
||
p_txt = dir_in / pairname[i_image]
|
||
|
||
# back-compatible for dynasty-float-so inference.
|
||
if ch_last:
|
||
# if compression != "txt":
|
||
# raise ValueError(f"dump to channel-last must be `txt` file, but got {compression}.")
|
||
np_image = convert_to_channel_last(np_image)
|
||
|
||
# Schedule the save operation
|
||
# TODO: precision may increase from 6 to 15 for some solutions if using txt. but now prefer npy
|
||
future = executor.submit(save_array, p_txt, np_image, '%.6f', compression)
|
||
futures.append(future)
|
||
|
||
# Optionally, handle the results as they complete (or just use as below)
|
||
for future in futures:
|
||
future.result() # waiting for all files to be processed
|
||
else:
|
||
for i_in, name in enumerate(input_names):
|
||
# prepare folder for this input node
|
||
dir_in = p_knerex_inputs[i_in]
|
||
dir_in.mkdir(parents=True, exist_ok=True)
|
||
# for corresponding simulator input
|
||
safe_link(dir_in, p_simu_inputs[i_in], relative=True)
|
||
|
||
np_in_s = np_txt[name]
|
||
for i_image, np_image in enumerate(np_in_s):
|
||
p_txt = dir_in / pairname[i_image]
|
||
|
||
# back-compatible for dynasty-float-so inference.
|
||
if ch_last:
|
||
np_image = convert_to_channel_last(np_image)
|
||
|
||
save_array(p_txt, np_image, '%.6f', compression)
|
||
|
||
suffix = "npy" if compression == "npy" else "txt"
|
||
fn_default = f"{default_btm_name}.{suffix}"
|
||
if as_test_input:
|
||
link_test_input_txt(p_simu_inputs, fn_default=fn_default)
|
||
|
||
# prepare for dynasty inference
|
||
simulator_paired_inputs = get_paired_inputs(p_simu_inputs, pairname, suffix=suffix, verify_exist=False)
|
||
return p_knerex_inputs, simulator_paired_inputs, pairname
|
||
|
||
|
||
def solution_npy2txt(np_txt, input_names, p_input, file_name, as_test_input = False):
|
||
"""Save numpy file to txt files for solution.
|
||
|
||
np_txt is map, key is input node names,
|
||
value: numpy array of input, 3D (HWC) or 4D (BHWC)
|
||
|
||
input_names are input node names, get from onnx. it should be same as np_txt.key()
|
||
|
||
p_input is where to save the knerex_input+simulator_input folders
|
||
|
||
"""
|
||
# save texts.
|
||
n_inputs = len(input_names)
|
||
knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
|
||
knerex_inputs[0] = "knerex_input"
|
||
simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
|
||
simulator_inputs[0] = "simulator_input"
|
||
|
||
list_inputs = []
|
||
for i_in in range(len(input_names)):
|
||
# for multiple inputs
|
||
np_in_s = np_txt[input_names[i_in]]
|
||
dir_in = p_input / knerex_inputs[i_in]
|
||
dir_simu = p_input / simulator_inputs[i_in]
|
||
list_inputs.append(dir_simu)
|
||
dir_in.mkdir(parents=True, exist_ok=True)
|
||
safe_link(dir_in, dir_simu, relative=True)
|
||
|
||
dim_in = np_in_s[0].shape
|
||
total_size = np.prod(dim_in)
|
||
|
||
for i_image, np_image in enumerate(np_in_s):
|
||
dim_this = np_image.shape
|
||
assert (
|
||
dim_in == dim_this
|
||
), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
|
||
p_txt = dir_in / "{}.txt".format(file_name)
|
||
# NOTE: we assume the np_images is onnx shaped.
|
||
# WARNING: previous version we require channel last
|
||
np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")
|
||
|
||
# link a "test_input.txt"
|
||
if as_test_input:
|
||
link_test_input_txt(list_inputs)
|
||
|
||
|
||
def link_test_input_txt(list_in_nodes, fn_default="test_input.npy"):
|
||
"""Make sure test_input.{npy,txt} exists for btm in regression flow."""
|
||
list_p = [Path(p) for p in list_in_nodes]
|
||
list_test_input = [p / fn_default for p in list_p]
|
||
list_test_exist = [p for p in list_test_input if p.exists()]
|
||
if len(list_test_exist) == len(list_in_nodes):
|
||
# print("{} exists for all input folders.".format(fn_default))
|
||
return
|
||
elif len(list_test_exist) == 0:
|
||
# so no input_test.txt exists at all
|
||
# print("link {} in {}".format(fn_default, list_p))
|
||
suffix = Path(fn_default).suffix
|
||
fn_pick = sorted(list(list_p[0].glob(f"*{suffix}")))[0].name
|
||
# same file name must exist in all input folder
|
||
if not all([(p / fn_pick).exists() for p in list_p]):
|
||
raise FileNotFoundError(f"Not all input folder has {fn_pick}. Please check input folders: {list_in_nodes}")
|
||
[safe_link(p / fn_pick, p / fn_default) for p in list_p]
|
||
else:
|
||
# error: some has test_input.txt, there must be mis-alignment.
|
||
n_missing = len(list_in_nodes) - len(list_test_exist)
|
||
raise FileNotFoundError(f"ERROR: Found input folders: {list_in_nodes}, but {n_missing}/{len(list_in_nodes)} missing {fn_default}. we have only {list_test_exist}")
|
||
|
||
|
||
def safe_link(fn_from, fn_to, relative=True, delete_exists=True):
|
||
"""Create a link from `fn_from` to `fn_to`.
|
||
|
||
* if the target exist already, delete the target then link.
|
||
"""
|
||
|
||
f_from = Path(fn_from)
|
||
f_to = Path(fn_to)
|
||
|
||
assert f_from.exists(), f"source file/dir {f_from} does not exists."
|
||
|
||
if f_to.is_symlink():
|
||
# TODO: missing_ok=False from 3.8
|
||
f_to.unlink()
|
||
if f_to.exists() and delete_exists:
|
||
shutil.rmtree(f_to)
|
||
if relative:
|
||
f_to.symlink_to(os.path.relpath(f_from, f_to.parent))
|
||
else:
|
||
f_to.symlink_to(f_from.absolute())
|
||
|
||
|
||
def estimate_mem_available():
|
||
p_info = Path("/proc/meminfo")
|
||
|
||
def parse_entry(s):
|
||
a, b = s.strip().split(":")
|
||
return a.strip(), b.removesuffix("kB").strip()
|
||
|
||
with open(p_info, "r") as f:
|
||
lines = f.readlines()
|
||
meminfo = {}
|
||
for line in lines:
|
||
k, v = parse_entry(line)
|
||
meminfo[k] = v
|
||
|
||
mems_kB = [int(meminfo[k]) for k in ["MemAvailable", "SwapFree"]]
|
||
return sum(mems_kB)
|
||
|
||
|
||
def expand_array(v, n):
|
||
"""Expand scalar to array. """
|
||
if isinstance(v, (collections.abc.Sequence, np.ndarray)):
|
||
# is vector / array, make sure correct length
|
||
assert len(v) == n, f"Expect {v} to have length {n} but got {len(v)}"
|
||
return np.array(v)
|
||
else:
|
||
# is scalar, expand it to array
|
||
return np.ones(n)*v
|
||
|
||
|
||
def gen_random_string(length):
|
||
"""Generate random string use less resource."""
|
||
alphabet = string.ascii_letters + string.digits
|
||
random_string = ''.join(secrets.choice(alphabet) for _ in range(length))
|
||
return random_string
|
||
|
||
|
||
def chunker(seq, size):
|
||
"""Cut long list into small lists.
|
||
|
||
from https://stackoverflow.com/questions/434287/how-to-iterate-over-a-list-in-chunks
|
||
"""
|
||
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
|
||
|
||
|
||
@contextlib.contextmanager
|
||
def working_directory(path):
|
||
"""
|
||
Changes working directory and returns to previous on exit.
|
||
|
||
link: https://stackoverflow.com/questions/41742317/how-can-i-change-directory-with-python-pathlib
|
||
"""
|
||
prev_cwd = Path.cwd()
|
||
|
||
# create if not exist
|
||
p = Path(path)
|
||
p.mkdir(mode=0o770, parents=True, exist_ok=True)
|
||
os.chdir(str(p))
|
||
|
||
try:
|
||
yield
|
||
finally:
|
||
os.chdir(prev_cwd)
|
||
|
||
|
||
def pprint_dict(ld):
|
||
"""Convert dict to string then put into report."""
|
||
d = defaultdict(set)
|
||
for d1 in ld:
|
||
for k, v in d1.items():
|
||
d[v].add(k)
|
||
|
||
if len(d) == 0:
|
||
return ""
|
||
elif len(d) == 1:
|
||
return list(d.keys())[0]
|
||
else:
|
||
s2 = [f"""{k}:{",".join(v)}""" for k, v in d.items()]
|
||
return " \\ ".join(s2)
|
||
|
||
|
||
def get_timestamp():
|
||
"""Get timestamp from bash env."""
|
||
timestamp = os.environ.get('regression_timestamp', None)
|
||
# 如果不存在,则创建一个新的时间戳
|
||
if timestamp is None:
|
||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||
print(f"Created new timestamp: {timestamp}")
|
||
else:
|
||
print(f"Using existing TIMESTAMP: {timestamp}")
|
||
return timestamp
|
||
|
||
|
||
def report_dict2df(d, p_status=None):
|
||
"""Convert regression results to dataframe for report."""
|
||
# check format. two level dictionary, each key is two level.
|
||
# TODO
|
||
|
||
# 将嵌套字典转换为 Pandas DataFrame
|
||
df = pd.DataFrame.from_dict(d, orient='index')
|
||
|
||
# 将 index 和 columns 设置为 MultiIndex
|
||
df.index = pd.MultiIndex.from_tuples(df.index, names=['category', 'case'])
|
||
df.columns = pd.MultiIndex.from_tuples(df.columns) # , names=['platform', 'stage']
|
||
if DEBUG:
|
||
print(df)
|
||
|
||
if p_status is not None:
|
||
p_status.parent.mkdir(parents=True, exist_ok=True)
|
||
df.to_pickle(p_status, compression='xz')
|
||
|
||
return df
|
||
|
||
|
||
def html_merge_cell(html_string):
|
||
"""Merge cells in html string.
|
||
|
||
If a cell is "↑", merge it to above cell.
|
||
按照从下到上的顺序处理每列中的"↑"单元格,将其合并到正上方的单元格。
|
||
先添加所有rowspan属性,然后再重新扫描表格,从右下到左上删除所有包含"↑"符号的单元格。
|
||
"""
|
||
|
||
# 解析HTML字符串
|
||
soup = BeautifulSoup(html_string, 'html.parser')
|
||
|
||
# 获取所有表格行
|
||
rows = soup.find_all('tr')
|
||
|
||
# 跳过表头行,只处理数据行
|
||
data_rows = rows[1:] if len(rows) > 0 else []
|
||
|
||
# 如果没有数据行,直接返回
|
||
if not data_rows:
|
||
return html_string
|
||
|
||
# 获取表格的列数
|
||
max_cols = max([len(row.find_all(['td', 'th'])) for row in rows]) if rows else 0
|
||
|
||
# 第一步:从最后一行向上遍历,对每行中的所有列进行处理,添加rowspan属性
|
||
for row_idx in range(len(data_rows) - 1, -1, -1):
|
||
row = data_rows[row_idx]
|
||
cells = row.find_all('td')
|
||
|
||
# 对当前行的每一列进行处理(从最后一列到第一列)
|
||
for col_idx in range(min(len(cells), max_cols) - 1, -1, -1):
|
||
cell = cells[col_idx]
|
||
|
||
# 检查单元格内容是否为"↑"
|
||
if cell.get_text().strip() == "↑":
|
||
# 获取span属性值(如果存在)
|
||
span_n_under = 1
|
||
if 'rowspan' in cell.attrs:
|
||
span_n_under = int(cell['rowspan'])
|
||
|
||
# 查找正上方的单元格
|
||
if row_idx > 0: # 确保不是第一行
|
||
above_row = data_rows[row_idx - 1]
|
||
above_cells = above_row.find_all('td')
|
||
|
||
# 确保上方行有足够的单元格
|
||
if col_idx < len(above_cells):
|
||
above_cell = above_cells[col_idx]
|
||
|
||
# 为上方单元格添加或更新rowspan属性
|
||
if 'rowspan' in above_cell.attrs:
|
||
# should never reach here.
|
||
above_cell['rowspan'] = str(int(above_cell['rowspan']) + 1 + span_n_under)
|
||
else:
|
||
above_cell['rowspan'] = str(1 + span_n_under) # 当前单元格 + 下方单元格
|
||
|
||
# 第二步:重新扫描整个表格,从右下到左上删除所有包含"↑"符号的单元格
|
||
# 重新获取所有行(因为上一步可能修改了DOM结构)
|
||
rows = soup.find_all('tr')
|
||
data_rows = rows[1:] if len(rows) > 0 else []
|
||
|
||
# 从最后一行向上遍历
|
||
for row_idx in range(len(data_rows) - 1, -1, -1):
|
||
row = data_rows[row_idx]
|
||
cells = row.find_all('td')
|
||
|
||
# 从最后一列到第一列遍历
|
||
for col_idx in range(len(cells) - 1, -1, -1):
|
||
cell = cells[col_idx]
|
||
|
||
# 如果单元格内容为"↑",则删除它
|
||
if cell.get_text().strip() == "↑":
|
||
cell.decompose()
|
||
|
||
# 返回修改后的HTML字符串
|
||
return str(soup)
|
||
|
||
|
||
def html_highlight_node_backend(html_string):
|
||
"""高亮后端节点列中的不同类型节点。
|
||
|
||
- (weight) 节点:小字体、灰色
|
||
- (op) 节点:粗体
|
||
- (model_input) 节点:粗体
|
||
"""
|
||
# 解析HTML字符串
|
||
soup = BeautifulSoup(html_string, 'html.parser')
|
||
|
||
# 获取所有表格行
|
||
rows = soup.find_all('tr')
|
||
|
||
if not rows:
|
||
return html_string
|
||
|
||
# 处理所有数据行中的所有单元格(跳过表头)
|
||
data_rows = rows[1:] if len(rows) > 1 else []
|
||
|
||
for row in data_rows:
|
||
cells = row.find_all(['td', 'th'])
|
||
|
||
for cell in cells:
|
||
cell_text = cell.get_text().strip()
|
||
|
||
# 跳过空单元格和合并标记
|
||
if not cell_text or cell_text == '↑':
|
||
continue
|
||
|
||
# 根据前缀应用不同样式
|
||
if cell_text.startswith('(weight) '):
|
||
# 小字体、灰色
|
||
cell['style'] = 'font-size: 0.75em; color: #666666;'
|
||
elif cell_text.startswith('(op) '):
|
||
# 粗体
|
||
cell['style'] = 'font-weight: bold;'
|
||
elif cell_text.startswith('(model_input) '):
|
||
# 斜体
|
||
cell['style'] = 'font-style: italic;'
|
||
|
||
# 返回修改后的HTML字符串
|
||
return str(soup)
|
||
|
||
|
||
def html_add_footnote(html_string):
|
||
"""Add tooltip to certain column name in table in html string."""
|
||
tooltips = {
|
||
"node": (None, "decomposed node for optimization stage 1"),
|
||
"node origin": ("origin node", "node in optimized onnx from original model."),
|
||
"node backend": ("opt stage 2 node", "decomposed node for optimization stage 2"),
|
||
"CMD_node_idx": (None, "Index of command node."),
|
||
"runtime(ms)": (None, "NPU runtime of all units without sync."),
|
||
"CFUNC_runtime(ms)": (None, "runtime of main computing unit (include conv and post conv) in ms."),
|
||
"PFUNC_runtime(ms)": (None, "runtime of auxiliary computing unit (include pool and format convertor) in ms."),
|
||
"SYNC_runtime(ms)": (None, "NPU runtime with sync (e.g., CONV and DMA run at same time)."),
|
||
}
|
||
|
||
soup = BeautifulSoup(html_string, 'html.parser')
|
||
|
||
# 添加自定义CSS来增强tooltip显示效果
|
||
head = soup.find('head')
|
||
if not head:
|
||
head = soup.new_tag('head')
|
||
if soup.html:
|
||
soup.html.insert(0, head)
|
||
else:
|
||
soup.insert(0, head)
|
||
|
||
# 添加CSS样式来增加tooltip字体大小
|
||
style_tag = soup.new_tag('style')
|
||
style_tag.string = """
|
||
/* 增强tooltip显示效果 */
|
||
[title] {
|
||
position: relative;
|
||
}
|
||
|
||
/* 自定义tooltip样式(仅在支持的浏览器中生效) */
|
||
[title]:hover::after {
|
||
content: attr(title);
|
||
position: absolute;
|
||
bottom: 100%;
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
background-color: #333;
|
||
color: white;
|
||
padding: 8px 12px;
|
||
border-radius: 6px;
|
||
font-size: 14px;
|
||
font-weight: normal;
|
||
white-space: nowrap;
|
||
z-index: 1000;
|
||
box-shadow: 0 2px 8px rgba(0,0,0,0.2);
|
||
margin-bottom: 5px;
|
||
}
|
||
|
||
[title]:hover::before {
|
||
content: '';
|
||
position: absolute;
|
||
bottom: 100%;
|
||
left: 50%;
|
||
transform: translateX(-50%);
|
||
border: 5px solid transparent;
|
||
border-top-color: #333;
|
||
z-index: 1000;
|
||
}
|
||
|
||
/* 隐藏默认的title属性tooltip */
|
||
[title]:hover {
|
||
title: '';
|
||
}
|
||
"""
|
||
head.append(style_tag)
|
||
|
||
# 1. 搜索所有表格
|
||
tables = soup.find_all('table')
|
||
|
||
for table in tables:
|
||
# 2. 为每个表格搜索列名(第一行)
|
||
rows = table.find_all('tr')
|
||
if not rows:
|
||
continue
|
||
|
||
header_row = rows[0]
|
||
headers = header_row.find_all(['th', 'td'])
|
||
|
||
# 3. 为每个列名添加tooltip
|
||
for header in headers:
|
||
header_text = header.get_text().strip()
|
||
|
||
if header_text in tooltips:
|
||
new_name, description = tooltips[header_text]
|
||
|
||
# 3.1 如果new_name不为None,更改列名
|
||
if new_name is not None:
|
||
display_name = new_name
|
||
else:
|
||
display_name = header_text
|
||
|
||
# 3.2 添加tooltip属性和更明显的样式
|
||
header.string = display_name
|
||
header['title'] = description
|
||
# 更明显的视觉提示:蓝色背景、虚线边框、帮助指针
|
||
current_style = header.get('style', '')
|
||
tooltip_style = (
|
||
' cursor: help;'
|
||
' border-bottom: 2px dotted #0066cc;'
|
||
' background-color: #f0f8ff;'
|
||
' padding: 2px 4px;'
|
||
' border-radius: 3px;'
|
||
' position: relative;'
|
||
)
|
||
header['style'] = current_style + tooltip_style
|
||
|
||
return str(soup)
|
||
|
||
|
||
def test_html_merge():
|
||
# 设置文件路径
|
||
input_path = '/home/wenliang/workflow_v3/regression_ng5/test_v2/test_report_cell_merge/'
|
||
|
||
# 定义要处理的文件对
|
||
file_pairs = [
|
||
('model_littlenet_original_v2.html', 'model_littlenet_merged_v2.html'),
|
||
('model_littlenet_original.html', 'model_littlenet_merged.html'),
|
||
('model_resnet_origin.html', 'model_resnet_merged.html'),
|
||
('model_resnet_origin_v2.html', 'model_resnet_merged_v2.html')
|
||
]
|
||
|
||
# 处理每对文件
|
||
for input_filename, output_filename in file_pairs:
|
||
input_file = input_path + input_filename
|
||
output_file = input_path + output_filename
|
||
|
||
print(f"\n{'-'*50}")
|
||
print(f"处理文件对: {input_filename} -> {output_filename}")
|
||
|
||
try:
|
||
# 读取原始HTML文件
|
||
print(f"读取文件: {input_file}")
|
||
with open(input_file, 'r', encoding='utf-8') as f:
|
||
html_content = f.read()
|
||
|
||
# 调用html_merge_cell函数处理HTML
|
||
print("处理HTML中的单元格合并...")
|
||
merged_html = html_merge_cell(html_content)
|
||
|
||
# 保存处理后的HTML
|
||
print(f"保存结果到: {output_file}")
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
f.write(merged_html)
|
||
|
||
print("处理成功!")
|
||
except Exception as e:
|
||
print(f"处理文件时出错: {e}")
|
||
|
||
print(f"\n{'-'*50}")
|
||
print("所有文件处理完成!")
|
||
|
||
|
||
def gen_backend_node_graph(p_json, p_svg, skip_dot=False):
|
||
"""Generate backend node graph from node_format_opt_be.json file in SVG format."""
|
||
# 处理两种情况:p_json可能是文件路径或已加载的数据
|
||
if isinstance(p_json, (str, Path)):
|
||
# p_json是文件路径
|
||
with open(p_json, 'r') as f:
|
||
data = json.load(f)
|
||
else:
|
||
# p_json已经是加载的数据
|
||
data = p_json
|
||
|
||
p_svg = Path(p_svg)
|
||
|
||
# 创建GraphViz DOT文件内容
|
||
dot_content = []
|
||
dot_content.append('digraph G {')
|
||
dot_content.append(' rankdir=TB;') # 从上到下的布局
|
||
dot_content.append(' label="based on opt stage 2 node info.";') # 图标题
|
||
dot_content.append(' labelloc="t";') # 标题位置在顶部
|
||
dot_content.append(' labeljust="r";') # 标题向右对齐
|
||
dot_content.append(' fontsize=14;') # 标题字体大小
|
||
dot_content.append(' node [shape=box, style=filled, fillcolor=lightblue];') # 操作节点样式
|
||
dot_content.append(' edge [color=black];')
|
||
|
||
# 数据节点样式
|
||
dot_content.append(' node [shape=ellipse, style=filled, fillcolor=lightgreen] data;')
|
||
|
||
# 定义所有节点和边
|
||
data_nodes = set() # 用于跟踪已添加的数据节点
|
||
op_nodes = set() # 用于跟踪已添加的操作节点
|
||
|
||
# 遍历所有操作节点
|
||
for op_name, op_info in data.items():
|
||
# 添加操作节点
|
||
dot_content.append(f' "{op_name}" [shape=box, style=filled, fillcolor=lightblue];')
|
||
op_nodes.add(op_name)
|
||
|
||
# 处理输入数据
|
||
if 'inputs' in op_info:
|
||
for input_data in op_info['inputs']:
|
||
for data_name, data_format in input_data.items():
|
||
# 添加数据节点(如果尚未添加)
|
||
if data_name not in data_nodes:
|
||
dot_content.append(f' "{data_name}" [shape=ellipse, style=filled, fillcolor=lightgreen, label="{data_name}\n[{data_format}]"];')
|
||
data_nodes.add(data_name)
|
||
|
||
# 添加边:输入数据 -> 操作
|
||
dot_content.append(f' "{data_name}" -> "{op_name}";')
|
||
|
||
# 处理输出数据
|
||
if 'outputs' in op_info:
|
||
for output_data in op_info['outputs']:
|
||
for data_name, data_format in output_data.items():
|
||
# 添加数据节点(如果尚未添加)
|
||
if data_name not in data_nodes:
|
||
dot_content.append(f' "{data_name}" [shape=ellipse, style=filled, fillcolor=lightgreen, label="{data_name}\n[{data_format}]"];')
|
||
data_nodes.add(data_name)
|
||
|
||
# 添加边:操作 -> 输出数据
|
||
dot_content.append(f' "{op_name}" -> "{data_name}";')
|
||
|
||
# 结束DOT文件
|
||
dot_content.append('}')
|
||
|
||
if skip_dot:
|
||
return data_nodes, op_nodes
|
||
|
||
# 将DOT内容写入文件
|
||
p_dot = p_svg.with_suffix('.dot')
|
||
with open(p_dot, 'w') as f:
|
||
f.write('\n'.join(dot_content))
|
||
logging.debug(f"后端节点图已生成并保存到: {p_dot}")
|
||
|
||
# 使用dot命令生成SVG文件
|
||
try:
|
||
# 检查graphviz/dot是否安装
|
||
check_result = run_bash_script('dot -V', timeout=10)
|
||
if check_result.returncode != 0:
|
||
raise Exception("Graphviz/dot命令未找到,请安装graphviz")
|
||
|
||
# 生成SVG文件,使用600秒超时
|
||
dot_cmd = f'dot -Tsvg "{p_dot}" -o "{p_svg}"'
|
||
result = run_bash_script(dot_cmd, timeout=600)
|
||
|
||
if result.returncode == 0:
|
||
logging.debug(f"SVG图像已生成并保存到: {p_svg}")
|
||
else:
|
||
logging.error(f"生成SVG文件失败: {result.stderr}")
|
||
except Exception as e:
|
||
logging.error(f"生成SVG文件时出现异常: {e}")
|
||
|
||
# if dot not found, still return the info
|
||
return data_nodes, op_nodes
|
||
|
||
def be_node_name_add_prefix(records, op_nodes, model_ins):
|
||
"""Add prefix (op, weight, model_in) to be node name.
|
||
|
||
records is list of dict with raw data collected.
|
||
op_nodes is set of op node names.
|
||
model_ins is set of model input node names.
|
||
|
||
the leftover node is weight, which should not have other properties, e.g. CFUNC_runtime(ms).
|
||
"""
|
||
k_be = "node backend"
|
||
k_1 = "CFUNC_runtime(ms)"
|
||
for d in records:
|
||
if k_be not in d:
|
||
continue
|
||
if d[k_be] == "↑":
|
||
continue
|
||
|
||
if d[k_be] in model_ins:
|
||
d[k_be] = f"(model_input) {d[k_be]}"
|
||
elif d[k_be] in op_nodes:
|
||
d[k_be] = f"(op) {d[k_be]}"
|
||
elif k_1 not in d:
|
||
# make sure it is weight
|
||
d[k_be] = f"(weight) {d[k_be]}"
|
||
else:
|
||
# this is op with other properties but not in op_nodes.
|
||
# should not reach here.
|
||
logging.error(f"undetermined type for node: {d[k_be]}")
|
||
return records
|
||
|
||
|
||
def test_backend_node_graph():
|
||
p_regression = Path("/home/wenliang/workflow_v3/regression_ng5/test_v2/test_utils/")
|
||
p_json = p_regression / "node_format_opt_be.json"
|
||
p_svg = p_regression / "node_format_opt_be.svg" # 传递SVG路径而不是DOT路径
|
||
gen_backend_node_graph(p_json, p_svg)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
pass
|