#! /usr/bin/env python3 import os import sys import subprocess import logging import pathlib import json import contextlib import re import zipfile import lzma import pickle import math import struct import hashlib import shutil import collections.abc from collections import defaultdict import string import secrets import tempfile import itertools import numpy as np import pandas as pd from sys_flow.flow_constants import MODE_HARDWARE from sys_flow.onnx_op_stats import onnx_info from sys_flow.util_lib import load_zip_jsons DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False import snoop snoop.install(enabled=DEBUG) if DEBUG: from IPython.terminal import embed as emb terminal = emb.InteractiveShellEmbed() terminal.extension_manager.load_extension("autoreload") terminal.run_line_magic("autoreload", "2") embed = terminal.mainloop else: embed = lambda: None # functions on loading text file def twos_comp(val, bits): """Compute the 2's complement of int value val.""" # if sign bit is set e.g., 8bit: 128-255 if (val & (1 << (bits - 1))) != 0: # compute negative value val = (1 << bits) + val # return positive value as is return val def array_le2flt(arr, n_flt: int, n_byte: int = 4): """Convert arry of 4 elements of unsigned integer (little endian) to float. INPUT: * n_flt: how many float number to extract * n_byte: how many byte to convert to 1 float. Used to convert scale to float. """ return struct.unpack(f"{n_flt}f", struct.pack(f"<{n_flt*n_byte}B", *arr)) def intle2flt(i): packed = struct.pack(' 1 else "-" dp2index[dp_index] = (i_op, idx_2nd) # check subgraph. some OP may be loop node. if subgraph and op in subgraph: this_sub = subgraph[op] if "op_outs" in this_sub: # back compatible for old single-output # turn it into tuple op_outs = [[t] for t in this_sub["op_outs"]] elif "op2dps" in this_sub: d2 = this_sub["op2dps"] sub_ops = this_sub["ops"] op_outs = [d2[sub_op] for sub_op in sub_ops if sub_op in d2] else: raise NotImplementedError("Missing op2dps / op_outs for " f"subgraph {op} in shapeinfo.json") # op_outs is list of list flatten into dps sub_outs = list(itertools.chain(*op_outs)) N_dp = len(sub_outs) n_loop = this_sub["max_count"][0] # why knerx given list here? for i_loop in range(n_loop): for i_dp, dp_name in enumerate(sub_outs): dp_index = (dp_name, i_loop) dp2index[dp_index] = (i_op, N_dp*i_loop+i_dp) dp2dump[dp_index] = dp2dyn_dump(dp_name, graph_dp_in, graph_dp_out, i_loop=i_loop) return (nodes, dp2node, node2dp, dp2idx_in_node, dp_shape, dp_hw_c, dp2index, dp2dump, graph_dp_in, graph_dp_out) def get_ioinfo_from_knerex_json(j_fx, j_shape): """Get ioinfo from knerex dumped json. This function returns json which is compatiable with * *_ioinfo.json dumped by compiler * then loaded by compiler_v2.load_ioinfo_json. Specification of ioinfo.json * key of "input" / "output" * values are list of dict. * each dict has keys: * "bitw": integer * "radix": list/array, per channel * "scale": list/array, per channel * "ch_dim": integer, index to onnx_shape * "onnx_shape": list/array, onnx shape * "shape": list/array, sim shape * "data_format": string, used by data_converter * "stride": list/array, used by data_converter TODO: only graph in/out fx_info are send out. we could send out every dp fx_info """ # helper function def get_fx_info(d_radix, d_shape, dp_name, i_dp): """Extract fx info of one datapath. Args: d_radix (dict): quantization info for this dp d_shape (dict): shape info for this dp NOTE: missing "data_format" / "stride" """ # suppose to be integer conv11 = { "output_datapath_bitwidth": "bitw", } # suppose to be list conv12 = { "output_datapath_radix": "radix", "output_scale": "scale", } # info in SnrShapeInfo. suppose to be list conv22 = { "onnx_shape": "onnx_shape", "hw_shape": "shape", } fx_info = {} # d_radix is per op, it may include multi-dp, use i_dp to get it try: for k, v in conv11.items(): fx_info[v] = d_radix[k][i_dp] for k, v in conv12.items(): fx_info[v] = np.array(d_radix[k][i_dp]) except: # back-compatible. not multi-output format. toolchain version < 0.23.0 # assert i_dp == 0 # TODELETE. for k, v in conv11.items(): fx_info[v] = d_radix[k] for k, v in conv12.items(): fx_info[v] = np.array(d_radix[k]) dim = len(fx_info[v].shape) assert dim == 1, f"Expect {v} to have 1 dimension, but got {dim} shape: {fx_info[v].shape}" # NOTE: take the 0 element for hw_c_in_onnx # knerex should give it a int not list fx_info["ch_dim"] = d_shape["hw_c_in_onnx"][0] for k, v in conv22.items(): fx_info[v] = np.array(d_shape[k]) fx_info["name"] = clean_name(dp_name) fx_info["ndim"] = len(fx_info["shape"]) return fx_info # extract shape info _, dp2node, _, dp2idx_in_node, _, _, _, _, dp_in, dp_out = parse_shape_info(j_shape) ioinfo = {} ioinfo["input"] = [get_fx_info(j_fx[dp2node[dp]], j_shape["dp_info"][dp], dp, dp2idx_in_node.get(dp, 0)) for dp in dp_in] ioinfo["output"] = [get_fx_info(j_fx[dp2node[dp]], j_shape["dp_info"][dp], dp, dp2idx_in_node.get(dp, 0)) for dp in dp_out] return ioinfo def is_zip_file(file_path): """Judget a zip or not using magic number.""" with open(file_path, 'rb') as f: return f.read(4) == b'PK\x03\x04' def get_ioinfo_from_bie( p_bie, hw_mode, dyn_bin="/workspace/libs/dynasty/run_fix_inference" ): """Get input/output nodes info from bie. Info includes: * input node name with order * output node name and shape Not included: * (NOT) datapath (in+out) fx info: bw, radix, scales per channel """ # detour for bie2 if is_zip_file(p_bie): return get_ioinfo_from_bie2(p_bie) p_working = pathlib.Path(tempfile.mkdtemp(prefix="unpack_bie_")) cmd = f"{dyn_bin} -m {p_bie} -t 1 -p kl{hw_mode} -e -o {p_working}" cp = run_bash_script(cmd) assert ( cp.returncode == 0 ), f"Failed to extract fx info from bie. Return code {cp.returncode}" p_j = p_working / "SnrShapeInfo.json" assert p_j.exists(), f"output missing: {p_j}" with open(p_j, "r") as f: j_shape = json.load(f) _, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape) # just need graph out datapath shape dp_out_shape = {k: shape_info[k] for k in dp_out} # TODO: delete folder p_working # the last one is optional ioinfo.json return dp_in, dp_out, dp_out_shape, None def get_ioinfo_from_bie2(p_bie2): """Parse ioinfo from bie2 format. NOTE: should be same output as get_ioinfo_from_bie. """ js = load_zip_jsons(p_bie2) k = "shape_info.json" # from 0.23.0 assert k in js, f"NO {k} found in bie {p_bie2}. Found: {js.keys()}" j_shape = js[k] _, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape) # just need graph out datapath shape dp_out_shape = {k: shape_info[k] for k in dp_out} # need ioinfo.json for dynasty # but optional k = "ioinfo.json" if k in js: ioinfo = js[k] else: ioinfo = None return dp_in, dp_out, dp_out_shape, ioinfo def find_input_txt_folder(p_model, pref="knerex_input"): """Find all input folders. The input folders should be: - knerex_input / knerex_input_1 / knerex_input_2 ... (for models with multiple inputs), or - simulator_input / simulator_input_1 / simulator_input_2 ... (for models with multiple inputs) """ lst = [] p_in = p_model / "input" / pref if p_in.exists(): lst.append(p_in) else: return None for i in range(1, 100): p_in = p_model / "input" / f"{pref}_{i}" if p_in.exists(): lst.append(p_in) else: return lst else: print("should not arrive here") return lst def get_input_txt_list(p_in): """List input txt names in given folder. test_input.txt will be 1st one if exist. """ fns = [fn.name for fn in list(p_in.glob("*.txt"))] fn_default = "test_input.txt" if fn_default in fns: # move fn_default to first one fns.remove(fn_default) return [fn_default] + fns else: return fns def need_compress_command_bin(tc_cat, tc_name): """Special mark for some special case.""" if tc_cat.startswith("m"): big_kernels = [ "bk23x23", "bk25x25", "bk27x27", "bk29x29", "bk31x31", "bk33x33", "bk35x35", ] return any([a in tc_name for a in big_kernels]) return False def guess_model_id(s): sr = re.compile("model_(\d+)*") try: return int(sr.findall(s)[0]) except: return 32768 def clean_case_name(x): """Normalize the case names. The case name in final report, may have extra info: - xxx (known bug) // remove space and after - model_ddd_xxxxxx_append // remove _xxxxxxx """ def remove_append(x): """works without space in the name""" return x.split(" ")[0] def remove_model_share_commit(x): """if this is a case of model_share with commit number in it""" s = re.compile("(model_\d{3})_[\da-f]{6}(_.*)") finds = s.findall(x) if len(finds) == 1: # foud pattern return "".join(finds[0]) else: # no change. return x return remove_model_share_commit(remove_append(x)) def relative_path(target, origin): """Return path of target relative to origin. NOTE: .resolve() will make symlink to its target. not working in our case """ # copy from https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level p_t = pathlib.Path(target) p_o = pathlib.Path(origin) try: return p_t.absolute().relative_to(p_o.absolute()) except ValueError as e: # target does not start with origin # recursion with origin (eventually origin is root so try will succeed) return pathlib.Path("..").joinpath(relative_path(target, p_o.parent)) class CustomCP: """A customized cp to return if timeout.""" def __init__(self, returncode, stdout=None, stderr=None): """Init it this cp.""" self.returncode = returncode self.stdout = stdout self.stderr = stderr def __str__(self): """Make this cp printable.""" return f'Return Code: {self.returncode}, Error Message: {self.stderr}' def run_bash_script(command, do_echo=False, fail_then_exit=False, timeout=60*60*6): """Kneron wrap on bash commands. Inputs: - timeout: how many seconds to run """ if type(command) is list: command = " ".join(command) try: cp = subprocess.run( command, shell=True, executable="/bin/bash", check=False, capture_output=True, text=True, timeout=timeout ) except subprocess.TimeoutExpired: cp = CustomCP(111, stdout=f"Command: {command}", stderr=f"TIMEOUT ({timeout}s)") if do_echo or (cp.returncode != 0 and DEBUG): print("-------------------------------------------------------------") print(f"running command: {command}") if cp.returncode != 0: print(f"Failed at {cp.returncode}") print("-------------------------------------------------------------") print(cp.stdout) print(cp.stderr) print("-------------------------------------------------------------") if cp.returncode != 0 and fail_then_exit: print(f"Failed to run {command}. Exit code: {cp.returncode}") print(f"Will not continue. exit.") sys.exit(1) return cp def check_parallel_log(fn_log): """Check the parallel log file. NOTE: sometime dynasty killed by segment fault however return code is 0. We need to make both are zero. """ exitvals = [] signals = [] with open(fn_log, 'r') as file: for i, line in enumerate(file): if i == 0: # skiip the headline continue parts = line.split() if len(parts) > 6: # Ensuring the line has enough parts try: exitvals.append(int(parts[6])) signals.append(int(parts[7])) except: pass e1 = tuple(a for a in exitvals if a != 0) e2 = tuple(a for a in signals if a != 0) msg = [] if len(e1) > 0: # usually not happen here msg.append(f"error: {e1}") if len(e2) > 0: msg.append(f"signal: {e2}") return "//".join(msg), e1, e2 def set_folder_public(p_out): """Make output path readable for basic group.""" dir_out = pathlib.Path(p_out) if not dir_out.exists(): # in case it had been delete before trigger test_case.__del__ return script = f""" chgrp -R 50000 {dir_out} chmod 755 {dir_out} find {dir_out} -type d -exec chmod 755 {{}} \; find {dir_out} -type f -exec chmod 644 {{}} \; """ run_bash_script(script, do_echo=False) def assert_nodes_exists(fn_onnx, node_list): """Kneron solutions may cast some special process on certain nodes, specified by name. use this script to make sure onnx not changed.""" import onnx o = onnx.load_model(fn_onnx) nodes_all = [a.name for a in o.graph.node] missing = False print("check {} for special nodes {}".format(fn_onnx, node_list)) for node in node_list: if node not in nodes_all: print("ERROR: node {} does not exist. check with ALG team".format(node)) missing = True if missing: raise FileNotFoundError def detect_valid_model(dir_in): """Give a path, detect valid models under it""" p_base = pathlib.Path(dir_in).resolve() onnx = p_base.glob("**/*.origin.onnx") for o in onnx: p_model = o.parent.parent if is_valid_case(p_model): yield p_model def is_valid_case(dir_case): """Is this a valid test case? XXXXX/input/XXXXX.origin.onnx (or bie) XXXXX must be same XXXXX/input/knerex_input must exist Not checking the txt in it. """ p_case = pathlib.Path(dir_case) if not p_case.is_dir(): # print(f"{p_case} is not dir.") # TODO: return extra string return False p_origin = p_case / f"input/{p_case.name}.origin.onnx" if not p_origin.exists(): p_origin = p_case / f"input/{p_case.name}.origin.bie" if not p_origin.exists(): return False p_knerex_input = p_case / "input" / "knerex_input" if not p_knerex_input.exists(): return False return True def filter_cases(dir_base, keywords=[]): """Find all test cases in dir_base. filter out bad test cases, e.g., missing input, origin.onnx select by only test case with keywords output: list of path to test cases """ excludekeywords = [] if "-e" in keywords: excludekeywords = keywords[keywords.index("-e") + 1 :] keywords = keywords[0 : keywords.index("-e")] whitelist = [] if "-f" in keywords: whitefile = keywords[keywords.index("-f") + 1] keywords = keywords[0 : keywords.index("-f")] with open(whitefile, "r") as f: lineList = f.readlines() whitelist[:] = [x.strip() for x in lineList if x.strip()] # print("linelist=", lineList) # find all second level sub folder case_all = list(pathlib.Path(dir_base).glob("*/*")) case_all.sort() # ONLY keep cases including ALL keywords. # self.logger.debug("search cases using keywords: {}".format(keywords)) case_selected = [ a for a in case_all if all([True if k in str(a.absolute()) else False for k in keywords]) ] if len(excludekeywords): case_selected = [ a for a in case_selected if all([False if k in str(a.absolute()) else True for k in excludekeywords]) ] if len(whitelist): case_selected = [ a for a in case_selected if any( [ True if all(s in str(a.absolute()) for s in k.split()) else False for k in whitelist ] ) ] case_selected = [a for a in case_selected if is_valid_case(a)] return case_selected, case_all def filter_failed_cases(case_selected, fn_config, p_report): p_config = pathlib.Path(fn_config) command = f"pushd {p_report} >> /dev/null && grep {p_config.name} *.info | sort | tail -n 1 | awk -F: '{{ print $1 }}'" cp = subprocess.run( command, shell=True, executable="/bin/bash", check=False, capture_output=True, text=True, ) fn_info = cp.stdout.strip() if len(fn_info) == 0: # found nothing, do nothing return case_selected # fn_info should be `run_YYYYMMDD_hhmmss_TAG_regression.info` fn_status = fn_info.replace("_regression.info", "_status.csv") p_status = pathlib.Path(p_report) / fn_status if not p_status.exists(): # no status found! something wrong. should I look for another one before? return case_selected # load status.csv try: df = pd.read_csv(str(p_status), header=[0, 1], index_col=[0, 1]) index_success = set(df.loc[df["general"]["Success"] == "✓"].index) cases_failed = [ a for a in case_selected if (a.parent.name, a.name) not in index_success ] return cases_failed except Exception as e: pp(e) return case_selected def md5sum(filePath): """Check md5sum of a file/folder. Does not support input as python object. pikcle the file if need to. we use string() to get object representation, but it will not work properly for large list / numpy matrix. because python will only print some part of data. """ assert type(filePath) in [str, pathlib.PosixPath], f"md5sum works on file only, but got {type(filePath)}" def do_exclude(p_f): ignore_patterns = ["__pycache__"] for ig in ignore_patterns: if ig in str(p_f): return True return False def md5_update(m, fp): # TODO: maybe use OO is better with open(fp, "rb") as fh: while True: data = fh.read(8192) if not data: break m.update(data) return m def md5sum_folder(p_folder): l1 = list(p_folder.iterdir()) l2 = [t for t in l1 if not (do_exclude(t)) and not t.is_dir()] # sorting is crucial for md5 calculation l2.sort(key=lambda a: str(a)) m = hashlib.md5() for tf in l2: m = md5_update(m, tf) return m.hexdigest() p = pathlib.Path(filePath) if p.is_file(): m = hashlib.md5() m = md5_update(m, p) return m.hexdigest() elif p.is_dir(): return md5sum_folder(p) else: raise NotImplementedError(f"{p} is nor file neither folder. Check existence!") def list2chunks(lst, k): """Yield successive k chunks from lst.""" n = math.ceil(len(lst) / k) for i in range(0, len(lst), n): yield lst[i : i + n] def let_user_pick(options, create_new=False): if len(options) == 0: if create_new: return input("\nInput new message: ") else: raise AttributeError # if options available, pick one if create_new: # option to create new one options.append("Create new?") while True: print("Please choose:") for idx, element in enumerate(options): print("{}) {}".format(idx + 1, element)) i = input("Enter number: ") try: ii = int(i) - 1 if 0 <= ii < len(options): if create_new and ii == len(options) - 1: # create new return input("\nInput new message: ") else: return options[ii] except: pass def create_zip(fn_zip, fns, p_base=None): """Create a zip with give files in base folder. BUG: if diff files with same name in one folder, only the last one will be kept. """ if isinstance(fns, list): # no name change. for list of fn come in, just use the original name # but need to turn into dict fns = [pathlib.Path(fn) for fn in fns if fn] fns = {p.name: p for p in fns} assert isinstance(fns, dict), f"parameter fns must be list or dict. but got {type(fns)}" with zipfile.ZipFile(fn_zip, "w", zipfile.ZIP_DEFLATED) as zf: for new_name, fn in fns.items(): pf = pathlib.Path(fn) if not pf.exists(): continue if p_base is None: arcname = new_name else: pf2 = pf.parent / new_name arcname = str(pf2.relative_to(p_base)) zf.write(filename=str(pf), arcname=arcname) def dict2json(d, fn_json): """Customize function to save dict to json file. TODO: merge similar functions. """ with open(fn_json, "w") as f: json.dump(d, f, indent=4, sort_keys=True) def load_regression_json(fn_json): def convert_key(k): d = {str(plt): plt for plt in MODE_HARDWARE} return d.get(k, k) def jsonKeys2int(x): # refer to https://stackoverflow.com/questions/1450957/pythons-json-module-converts-int-dictionary-keys-to-strings if isinstance(x, dict): return {convert_key(k): v for k, v in x.items()} return x with open(fn_json, "r") as f: j = json.load(f, object_hook=jsonKeys2int) return j def clean_name(this_name): """remove special charaters from given string. Some node names with slash, example: batch_normalization_9/gamma:0_o0 Make this a function that will all conversion will be same. """ return this_name.replace("/", "_") def remove_appendix(this_name): return ( this_name.strip(".onnx") .strip(".bie") .strip(".origin") .strip(".decomposed") .strip(".wqbi") .strip(".quan") .strip(".scaled") ) def get_switch_value(this_map, this_key, default): if this_key in this_map: return this_map[this_key] else: return default def set_default(this_map, this_key, this_value): if this_key not in this_map: this_map[this_key] = this_value def create_logger(module_name, fn_log=None, level="WARNING"): logger = logging.getLogger(module_name) levels = { "CRITICAL": logging.CRITICAL, "ERROR": logging.ERROR, "WARNING": logging.WARNING, "INFO": logging.INFO, "DEBUG": logging.DEBUG, } logger.setLevel(levels.get(level.upper(), logging.WARNING)) if logger.hasHandlers(): return logger # create formatter formatter = logging.Formatter( "%(asctime)s - %(name)s - %(levelname)s - %(message)s" ) # create console handler and set level to debug ch = logging.StreamHandler() ch.setLevel(logging.DEBUG) # add formatter to ch ch.setFormatter(formatter) logger.addHandler(ch) if fn_log: fh = logging.FileHandler(fn_log) fh.setFormatter(formatter) logger.addHandler(fh) return logger def patch_batch_column(cols): """patch r2 for NG. old/r2 has columns of 8 NG has columns of 9 """ if len(cols) == 9: """for ng i,0,input_1_o0,1,1,28,28,16W1C8B,4,16W1C8B,4 o,0,conv2d_4_o0,1,1,1,1,16W1C8B,4,, """ # NOTE: bchw. # convert to interger? return cols elif len(cols) == 4: # for 520, the very old format """ i,0,input.1,3,112,112 o,0,806,256,1,1 o,1,847,256,1,1 """ return cols elif len(cols) == 8: cols.insert(1, "") return cols else: pp(f"info columns must be 4, 8 or 9. but got {len(cols)} ({cols})") raise AttributeError def get_git_info(git_path): """get git info out of a single git repo NOTE: not working submodule """ info = {} # sanity check. p_git = pathlib.Path(git_path) if not p_git.exists(): info["error"] = "directory not exists" return info if not (p_git / ".git").exists(): # BUG: the git will search from this path up to / until found .git . # currently I expect to send exactly path of repo, aka, NONE of the subpath # this part may be removed. info["error"] = "{} is not a git repo. `.git` not found.".format(p_git) return info with working_directory(git_path): b = subprocess.run( ["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, text=True ) assert b.returncode == 0, "git command failed." info["commit"] = b.stdout.strip() b = subprocess.run( ["git", "rev-parse", "--abbrev-ref", "HEAD"], stdout=subprocess.PIPE, text=True, ) assert b.returncode == 0, "git command failed." info["branch"] = b.stdout.strip() b = subprocess.run( ["git", "config", "--get", "remote.origin.url"], stdout=subprocess.PIPE, text=True, ) if b.returncode == 0: info["remote_url"] = b.stdout.strip() else: info["remote_url"] = "N/A" info["path"] = str(git_path) return info def find_branch(model_id, commit, dir_base="/opt/data/e2e_simulator/app"): """get branch info from local repo folder for fx model release """ p_base = pathlib.Path(dir_base) assert p_base.exists(), f"{p_base} does not exists." print("check model_{}".format(model_id)) print( " * commit graph: http://192.168.200.1:8088/modelshare/model_{}/-/network/master".format( model_id ) ) models = list(p_base.glob("*/models/model_{}".format(model_id))) if len(models) == 0: print("ERROR: cannot found model_{} from any app.".format(model_id)) p_model = models[0] with working_directory(p_model): cmd = ["git", "branch", "-r", "--contains", commit] b = subprocess.run(cmd, stdout=subprocess.PIPE, text=True) if b.returncode == 0: o = b.stdout print(o) else: o = None return o, models[0] def get_model_info(git_path): """model are submodules in app""" info = get_git_info(git_path) onnxs = list(git_path.glob("alg/*.onnx")) if len(onnxs) != 1: print("ERROR: There should only be ONE onnx. but found {}".format(len(onnxs))) for o in onnxs: print(" * {}".format(o.name)) print(" path: {}".format(git_path)) assert False, "Found too many onnx" info["onnx"] = str(onnxs[0].relative_to(git_path)) return info def get_app_info(app_path, fn_json=None): """get git info for kneron released apps. The apps are listed here: http://192.168.200.1:8088/modelshare?utf8=%E2%9C%93&filter=solution_ """ ginfo = {} ginfo["app"] = get_git_info(app_path) ginfo["models"] = {} p_app = pathlib.Path(app_path) models = p_app.glob("models/*") for m in models: ginfo["models"][m.name] = get_model_info(m) # if given file name to save if fn_json is not None: with open(fn_json, "w") as f: json.dump(ginfo, f, indent=4, sort_keys=True) return ginfo def create_noise_input_folder(dir_from, sigma_levels, override=False): # currently only add noise to simulator_input / simulator_input_1 / simulator_input_2 assert "simulator_input" in str(dir_from), "No input/simulator_input folder." p_from = pathlib.Path(dir_from) fn_froms = p_from.glob("*.txt") ori_s = {f.name: load_txt(str(f)) for f in fn_froms} con = np.concatenate(list(ori_s.values())) assert len(con.shape) == 1, "found more than 1 shape for noise input." i_range = np.max(con) - np.min(con) # NOTE: sigma_level = 3 => sigma 1, 1 pixel for sigma_level in sigma_levels: dir_name = p_from.name.replace( "simulator_input", "simulator_input_sigma{}".format(sigma_level) ) p_to = p_from.parent / dir_name if p_to.exists(): if override: shutil.rmtree(str(p_to)) else: continue p_to.mkdir(mode=0o770, parents=True, exist_ok=True) print("Create noise input for sigma {}".format(sigma_level)) sigma_control = 3 bit_range = 256 # for 8bit n_pixel = list(ori_s.values())[0].shape sigma = i_range * sigma_level / (bit_range * sigma_control) for k1, v1 in ori_s.items(): k2 = p_to / k1 noise = np.random.normal(loc=0, scale=sigma, size=n_pixel) v2 = v1 + noise np.savetxt(str(k2), v2, fmt="%.10f") def npy2txt(np_txt: dict, input_names: list, p_input): """Save numpy file to txt files np_txt is map, key is input node names, value: numpy array of input, 3D (HWC) or 4D (BHWC) input_names are input node names, get from onnx. it should be same as np_txt.key() p_input is where to save the knerex_input+simulator_input folders """ # save texts. n_inputs = len(input_names) knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)] knerex_inputs[0] = "knerex_input" simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)] simulator_inputs[0] = "simulator_input" list_inputs = [] for i_in in range(len(input_names)): # for multiple inputs np_in_s = np_txt[input_names[i_in]] dir_in = p_input / knerex_inputs[i_in] dir_simu = p_input / simulator_inputs[i_in] list_inputs.append(dir_simu) dir_in.mkdir(parents=True, exist_ok=True) safe_link(dir_in, dir_simu, relative=True) dim_in = np_in_s[0].shape total_size = np.prod(dim_in) for i_image, np_image in enumerate(np_in_s): dim_this = np_image.shape assert ( dim_in == dim_this ), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}" p_txt = dir_in / "in_{:04d}.txt".format(i_image) # NOTE: we assume the np_images is onnx shaped. # WARNING: previous version we require channel last np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f") # link a "test_input.txt" link_test_input_txt(list_inputs) def solution_npy2txt(np_txt, input_names, p_input, file_name, as_test_input = False): """save numpy file to txt files np_txt is map, key is input node names, value: numpy array of input, 3D (HWC) or 4D (BHWC) input_names are input node names, get from onnx. it should be same as np_txt.key() p_input is where to save the knerex_input+simulator_input folders """ # save texts. n_inputs = len(input_names) knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)] knerex_inputs[0] = "knerex_input" simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)] simulator_inputs[0] = "simulator_input" list_inputs = [] for i_in in range(len(input_names)): # for multiple inputs np_in_s = np_txt[input_names[i_in]] dir_in = p_input / knerex_inputs[i_in] dir_simu = p_input / simulator_inputs[i_in] list_inputs.append(dir_simu) dir_in.mkdir(parents=True, exist_ok=True) safe_link(dir_in, dir_simu, relative=True) dim_in = np_in_s[0].shape total_size = np.prod(dim_in) for i_image, np_image in enumerate(np_in_s): dim_this = np_image.shape assert ( dim_in == dim_this ), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}" p_txt = dir_in / "{}.txt".format(file_name) # NOTE: we assume the np_images is onnx shaped. # WARNING: previous version we require channel last np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f") # link a "test_input.txt" if as_test_input: link_test_input_txt(list_inputs) def link_test_input_txt(list_inputs, fn_txt="test_input.txt"): list_p = [pathlib.Path(p) for p in list_inputs] list_test_input = [p / fn_txt for p in list_p] list_test_exist = [p for p in list_test_input if p.exists()] if len(list_test_exist) == len(list_inputs): # print("{} exists for all input folders.".format(fn_txt)) pass elif len(list_test_exist) == 0: # print("link {} in {}".format(fn_txt, list_p)) fn_all = sorted(list(list_p[0].glob("*.txt"))) fn_pick = fn_all[0].name # same file name must exist in all input folder assert all( [(p / fn_pick).exists() for p in list_p] ), "Not all input folders linked." for p in list_p: p_from = p / fn_pick safe_link(p_from, p / fn_txt) else: n_missing = len(list_inputs) - len(list_test_exist) print( "ERROR: Found input folders: {}, but {}/{} missing {}. {}".format( list_inputs, n_missing, len(list_inputs), fn_txt, list_test_exist ) ) raise FileNotFoundError def safe_link(fn_from, fn_to, relative=True, delete_exists=True): """create a link from `fn_from` to `fn_to`. * if the target exist already, delete the target then link. """ f_from = pathlib.Path(fn_from) f_to = pathlib.Path(fn_to) assert f_from.exists(), f"source file/dir {f_from} does not exists." if f_to.is_symlink(): # TODO: missing_ok=False from 3.8 f_to.unlink() if f_to.exists() and delete_exists: shutil.rmtree(f_to) if relative: f_to.symlink_to(os.path.relpath(f_from, f_to.parent)) else: f_to.symlink_to(f_from.absolute()) def estimate_mem_available(): p_info = pathlib.Path("/proc/meminfo") def parse_entry(s): a, b = s.strip().split(":") return a.strip(), b.strip(" kB").strip() with open(p_info, "r") as f: lines = f.readlines() meminfo = {} for line in lines: k, v = parse_entry(line) meminfo[k] = v mems_kB = [int(meminfo[k]) for k in ["MemAvailable", "SwapFree"]] return sum(mems_kB) def expand_array(v, n): """Expand scalar to array. """ if isinstance(v, (collections.abc.Sequence, np.ndarray)): # is vector / array, make sure correct length assert len(v) == n, f"Expect {v} to have length {n} but got {len(v)}" return np.array(v) else: # is scalar, expand it to array return np.ones(n)*v def gen_random_string(length): """Generate random string use less resource.""" alphabet = string.ascii_letters + string.digits random_string = ''.join(secrets.choice(alphabet) for _ in range(length)) return random_string def chunker(seq, size): """Cut long list into small lists. from https://stackoverflow.com/questions/434287/how-to-iterate-over-a-list-in-chunks """ return (seq[pos:pos + size] for pos in range(0, len(seq), size)) @contextlib.contextmanager def working_directory(path): """ Changes working directory and returns to previous on exit. link: https://stackoverflow.com/questions/41742317/how-can-i-change-directory-with-python-pathlib """ prev_cwd = pathlib.Path.cwd() # create if not exist p = pathlib.Path(path) p.mkdir(mode=0o770, parents=True, exist_ok=True) os.chdir(str(p)) try: yield finally: os.chdir(prev_cwd) def pprint_dict(ld): """Convert dict to string then put into report.""" d = defaultdict(set) for d1 in ld: for k, v in d1.items(): d[v].add(k) if len(d) == 0: return "" elif len(d) == 1: return list(d.keys())[0] else: s2 = [f"""{k}:{",".join(v)}""" for k, v in d.items()] return " \\ ".join(s2) if __name__ == "__main__": pass