#! /usr/bin/env python3

import os
import sys
import subprocess
import logging
from pathlib import Path, PosixPath
import json
import contextlib
import re

import zipfile
import lzma
import gzip

import pickle
import math
import struct
import hashlib
import shutil
import collections.abc
from collections import defaultdict
import string
import secrets
import tempfile
import itertools
from datetime import datetime
from bs4 import BeautifulSoup

import numpy as np
import pandas as pd

from concurrent.futures import ProcessPoolExecutor

from sys_flow_v2.flow_constants import MODE_HARDWARE
from sys_flow_v2.onnx_op_stats import onnx_info
from sys_flow_v2.util_lib import load_zip_jsons

import snoop
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
snoop.install(enabled=DEBUG)

if DEBUG:
    from IPython.terminal import embed as emb

    terminal = emb.InteractiveShellEmbed()
    terminal.extension_manager.load_extension("autoreload")
    terminal.run_line_magic("autoreload", "2")
    embed = terminal.mainloop
else:
    embed = lambda: None


# functions on loading text file
def twos_comp(val, bits):
    """Compute the 2's complement of int value val."""
    # if sign bit is set e.g., 8bit: 128-255
    if (val & (1 << (bits - 1))) != 0:
        # compute negative value
        val = (1 << bits) + val
    # return positive value as is
    return val


def array_le2flt(arr, n_flt: int, n_byte: int = 4):
    """Convert arry of 4 elements of unsigned integer (little endian) to float.

    INPUT:
        * n_flt: how many float number to extract
        * n_byte: how many byte to convert to 1 float.

    Used to convert scale to float.
    """
    return struct.unpack(f"{n_flt}f", struct.pack(f"<{n_flt*n_byte}B", *arr))


def intle2flt(i):
    packed = struct.pack('<I', i)
    return struct.unpack('<f', packed)[0]


array_intle2flt = np.vectorize(intle2flt)


def load_txt(filename):
    """Load dynasty dump text (float data) as flattened data."""
    return np.array(pd.read_csv(filename, names=["fx"])["fx"], dtype=np.float32)


def verify_txt_suffix(fn):
    """Verify text file by suffix."""
    if DEBUG:
        suffix = [".txt", ".seq"]
        is_txt = any([fn.endswith(a) for a in suffix])
        if not is_txt:
            raise ValueError(f"only support `.npy`/`.txt`/`.seq`. but got {fn}")


def txt2np_fl(filename, shape):
    """Load dynasty dumped text (float data) into numpy with given shape."""
    fn = Path(filename).name
    if fn.endswith(".npy"):
        # TODO: maybe check shape
        return np.load(filename).reshape(shape)
    else:
        # treat as txt
        verify_txt_suffix(fn)
        return np.array(
            pd.read_csv(filename, names=["data"])["data"], dtype=np.float32
        ).reshape(shape)


def txt2np_fx(filename, shape):
    """Load dynasty dumped text (fix point data) into numpy with given shape."""
    fn = Path(filename).name
    if fn.endswith(".npy"):
        # TODO: maybe check shape
        return np.load(filename)
    else:
        verify_txt_suffix(fn)
        return np.array(
            pd.read_csv(filename, names=["data"])["data"], dtype=np.int32
        ).reshape(shape)


def df2pkl(df, fn):
    """Dump python object to a lzma compressed pickle file.

    fn is suggested to end with .pkl.xz
    """
    with lzma.open(fn, 'wb') as f:
        pickle.dump(df, f)


def pkl2df(fn):
    """Load python object from a lzma compressed pickle file."""
    with lzma.open(fn, 'rb') as f:
        df = pickle.load(f)
    return df


def dir2pkl(p_dir):
    p_input = Path(p_dir)
    p_xz_s = list(p_input.glob("*_inputs/*.xz"))
    print(f"Found {len(p_xz_s)} xz files")
    if len(p_xz_s) == 0:
        print(f"ERROR: found 0 xz file in {p_dir}")
        return

    d_xz = [pkl2df(a) for a in p_xz_s]

    dp_ins = set(d_xz[0].keys())

    assert all(set(d.keys()) == dp_ins for d in d_xz), "xy have different input keys."

    np_in = {}
    for k_in in dp_ins:
        # there is only one in solution dumped xy file.
        np_in[k_in] = [a[k_in][0] for a in d_xz]

    # save to working directory
    fn_pkl = f"{p_input.name}.pkl.xz"
    df2pkl(np_in, fn_pkl)
    print(f"    np_in dumped to {fn_pkl}")

    return fn_pkl


# get ioinfo from onnx or bie
def get_ioinfo_from_onnx(p_onnx):
    """Get input/output nodes info from onnx.

    Info includes:
        * input node name with order
        * output node name and shape
    """
    oinfo = onnx_info(p_onnx)
    input_nodes, output_nodes, _ = oinfo.get_ioinfo()
    out_node_shape = {dp_out: oinfo.dp_shape[dp_out]["dims"] for dp_out in output_nodes}
    # Note: keep same interface for get_ioinfo_from_bie / get_ioinfo_from_bie2
    ioinfo = None
    return input_nodes, output_nodes, out_node_shape, ioinfo


def dp2dyn_dump(dp, graph_in, graph_out, i_loop=None):
    """Given dynasty dump name for given dp.

    Some special prefix for model input / output nodes.

    Some special appendix for nodes in loop.
    """
    fn_dump = clean_name(dp)

    if dp in graph_in:
        # if graph input/output, add special prefix
        fn_dump = f"input_{fn_dump}"
    elif dp in graph_out:
        fn_dump = f"output_{fn_dump}"
    else:
        # normal datapath # will not in dp_in
        pass

    if i_loop is not None:
        fn_dump = f"{fn_dump}_iteration_{i_loop}"

    return fn_dump


def parse_shape_info(j):
    """Parse op/dp info from knerex shapeinfo.json.

    - 0.23.0 name is shape_info.json
    - 0.26.0 name changed to snr_shape_info.json
        compiler provide another shape_info.jso n
    """
    if "op2dps" in j:
        # knerex from 0.23.0 will give op2dps and ops
        nodes_w_dp = list(j["op2dps"].keys())    # no order.

        # TODO: use this make sure correct
        # nodes_outnode = [a for a in nodes_w_dp if a.startswith("OutputNode_")]

        # j["ops"] with order, but need to remove OutputNode
        nodes = [node for node in j["ops"] if node in nodes_w_dp]
        node2dp = j["op2dps"]
        dp2node = {dp: op for op, dps in node2dp.items() for dp in dps}
        dp2idx_in_node = {dp: i_dp
                          for op, dps in node2dp.items()
                          for i_dp, dp in enumerate(dps)}

    elif "ops" in j:
        # old way. TODELETE. knerex will only give ops. no op2dps
        nodes = j["ops"]

        dp2node = {}
        node2dp = {}
        for dp in j["dp_info"]:
            nd = j["dp_info"][dp]["node_name"][0]
            dp2node[dp] = nd
            # multi-output not supported without "op2dps"
            # we assume each node will have one datapath
            assert nd not in node2dp, (
                f"node ({nd}) has dp ({node2dp[nd]}) already. "
                f"Trying to append dp ({dp})."
                "Old knerex format without op2dps info, does not support multi-output."
            )
            # one node may have multi dp outputs
            # but we don't have correct order of dp!!!
            node2dp[nd] = [dp]
        # each dp is always 0th dp for its node.
        dp2idx_in_node = {}

        # remove OutputNode_*, they are dummy nodes without output datapath
        def is_outputNode(node, node2dp):
            return (node not in node2dp) and node.startswith("OutputNode_")
        nodes = [node for node in nodes if not is_outputNode(node, node2dp)]

    else:
        raise NotImplementedError("Missing op2dps / ops in shapeinfo.json")

    # onnx_shape
    dp_shape = {k: tuple(j["dp_info"][k]["onnx_shape"]) for k in dp2node.keys()}
    dp_hw_c = {k: j["dp_info"][k]["hw_c_in_onnx"][0] for k in dp2node.keys()}

    # get graph in/output dp
    graph_dp_out = j["dp_out"]
    graph_dp_in = j["dp_in"]

    # get dump and index in graph. (support subgraph)
    dp2dump = dict()
    # index is used to sort snr report
    dp2index = dict()
    subgraph = j.get("subgraph", None)
    for i_op, op in enumerate(nodes):
        # NOTE: we can get OPs in straightened order.
        # each OP may have multiple DPs
        dps = node2dp[op]

        for i_dp, dp in enumerate(dps):
            # some dp may run in loops so dp2dump and dp2index has different index
            # here is main graph, so use "-" placeholder
            dp_index = (dp, "-")
            dp2dump[dp_index] = dp2dyn_dump(dp, graph_dp_in, graph_dp_out)
            idx_2nd = f"o{i_dp}" if len(dps) > 1 else "-"
            dp2index[dp_index] = (i_op, idx_2nd)

        # check subgraph. some OP may be loop node.
        if subgraph and op in subgraph:
            this_sub = subgraph[op]
            if "op_outs" in this_sub:
                # back compatible for old single-output
                # turn it into tuple
                op_outs = [[t] for t in this_sub["op_outs"]]
            elif "op2dps" in this_sub:
                d2 = this_sub["op2dps"]
                sub_ops = this_sub["ops"]
                op_outs = [d2[sub_op] for sub_op in sub_ops if sub_op in d2]
            else:
                raise NotImplementedError("Missing op2dps / op_outs for "
                                          f"subgraph {op} in shapeinfo.json")

            # op_outs is list of list flatten into dps
            sub_outs = list(itertools.chain(*op_outs))
            N_dp = len(sub_outs)

            n_loop = this_sub["max_count"][0]  # why knerx given list here?
            for i_loop in range(n_loop):
                for i_dp, dp_name in enumerate(sub_outs):
                    dp_index = (dp_name, i_loop)
                    dp2index[dp_index] = (i_op, N_dp * i_loop + i_dp)
                    dp2dump[dp_index] = dp2dyn_dump(dp_name,
                                                    graph_dp_in,
                                                    graph_dp_out,
                                                    i_loop=i_loop)

    return (nodes, dp2node, node2dp, dp2idx_in_node, dp_shape, dp_hw_c,
            dp2index, dp2dump, graph_dp_in, graph_dp_out)


def is_zip_file(file_path):
    """Judget a zip or not using magic number."""
    with open(file_path, 'rb') as f:
        return f.read(4) == b'PK\x03\x04'


def get_ioinfo_from_bie(
    p_bie, hw_mode, dyn_bin="/workspace/libs_V2/dynasty/run_fix_inference"
):
    """Get input/output nodes info from bie.

    Info includes:
      * input node name with order
      * output node name and shape
    Not included:
      * (NOT) datapath (in+out) fx info: bw, radix, scales per channel
    """
    # detour for bie2
    if is_zip_file(p_bie):
        return get_ioinfo_from_bie2(p_bie)

    p_working = Path(tempfile.mkdtemp(prefix="unpack_bie_"))

    cmd = f"{dyn_bin} -m {p_bie} -t 1 -p kl{hw_mode} -e -o {p_working}"
    cp = run_bash_script(cmd)
    assert (
        cp.returncode == 0
    ), f"Failed to extract fx info from bie. Return code {cp.returncode}"

    p_j = p_working / "SnrShapeInfo.json"
    assert p_j.exists(), f"output missing: {p_j}"
    with open(p_j, "r") as f:
        j_shape = json.load(f)

    _, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(j_shape)
    # just need graph out datapath shape
    dp_out_shape = {k: shape_info[k] for k in dp_out}

    # TODO: delete folder p_working

    # the last one is optional ioinfo.json
    # only available in 0.24.0 for bit-true-match
    return dp_in, dp_out, dp_out_shape, None


def load_ioinfo_json_and_verify(js, dp_in, dp_out):
    """Load ioinfo.json (usualy for debug).

    - dynasty need ioinfo.json/calculation_info.json for dynasty for btm in some SPECIAL case. optional
    - ioinfo.json available in 0.24.0 (regression convert from compiler dump then insert.)
    - calculation_info.json available in 0.26.0 (inserted by compiler)

    DIFFERENT FORMAT!
    """
    k = "ioinfo.json"
    if k in js:
        ioinfo = js[k]

        if DEBUG:
            # do some verify
            set_dp_in2 = set([a["name"] for a in ioinfo["input"]])
            if set_dp_in2 != set(dp_in):
                msg = f"knerex {k} info does not match. given dp_in: {dp_in}, but ioinfo gives: {set_dp_in2}"
                raise ValueError(msg)

            set_dp_out2 = set([a["name"] for a in ioinfo["output"]])
            if set_dp_out2 != set(dp_out):
                msg = f"knerex {k} info does not match. given dp_out: {dp_out}, but ioinfo gives: {set_dp_out2}"
                raise ValueError(msg)
    else:
        ioinfo = None
    return ioinfo


def extract_outputnode(js):
    js_k = [k for k in js.keys() if k.startswith("OutputNode_")]
    return js_k


def extract_dp2node(js):
    dp2node = {}
    tk = "input_tensor_name"  # specified by knerex
    for k in extract_outputnode(js):
        if tk in js[k]:
            nodename = k
            # for now, js[k][tk] is a list of one tensor.
            dpname = "OutputNode_" + js[k][tk][0]
            if nodename != dpname:
                dp2node[dpname] = nodename
    return dp2node


def get_ioinfo_from_radix_info(js, dp_in, dp_out):
    """Simulate ioinfo from radix_info.json from knerex.

    TODO: make it compatible with ioinfo.json. refer to parse_setup_json_v2
    """
    ioinfo = {}
    ioinfo["note"] = "created by get_ioinfo_from_radix_info."

    k1 = "radix_info.json"
    if k1 in js:
        # NOTE: radix_info.json/OutputNode_NODE_NAME may be different from datapath name.
        # here is the fix
        dp2node = extract_dp2node(js[k1])
        for dp, node in dp2node.items():
            if dp not in js[k1]:
                # knerex give radix_info.json on node name. but regression is looking for datapath name.
                # so copy it.
                js[k1][dp] = js[k1][node]

        ioinfo["input"] = {k: js[k1][k] for k in dp_in}
        try:
            ioinfo["output"] = {k: js[k1][f"OutputNode_{k}"] for k in dp_out}
        except KeyError:
            lst_output_nodes = extract_outputnode(js[k1])
            raise KeyError(f"""
            output tensor name different from node name. Please use latest toolchain.
            radix_info.json provided output nodes: {lst_output_nodes}
            SnrShapeInfo.json provided output nodes: {list(dp_out)}
            """)

    k2 = "calculation_info.json"
    if k2 in js:
        ioinfo["calculation_info"] = js[k2]

    if len(ioinfo) == 1:
        return None
    return ioinfo


def get_ioinfo_from_bie2(p_bie2):
    """Parse ioinfo from bie2 format.

    NOTE:
      should be same output as get_ioinfo_from_bie.
    """
    js = load_zip_jsons(p_bie2)

    k1 = "shape_info.json"  # from 0.23.0
    k2 = "snr_shape_info.json"  # from 0.25.0
    if k2 not in js and k1 not in js:
        msg = f"NO {k2} or {k1} found in bie {p_bie2}. Only found: {list(js.keys())}"
        raise FileNotFoundError(msg)
    k = k2 if k2 in js else k1
    _, _, _, _, shape_info, _, _, _, dp_in, dp_out = parse_shape_info(js[k])
    # just need graph out datapath shape
    dp_out_shape = {k: shape_info[k] for k in dp_out}

    ioinfo = load_ioinfo_json_and_verify(js, dp_in, dp_out)

    if ioinfo is None:
        ioinfo = get_ioinfo_from_radix_info(js, dp_in, dp_out)

    return dp_in, dp_out, dp_out_shape, ioinfo


def find_input_txt_folder(p_model, pref="knerex_input"):
    """Find all input folders.

    The input folders should be:

      - knerex_input / knerex_input_1 / knerex_input_2 ... (for models with multiple inputs), or
      - simulator_input / simulator_input_1 / simulator_input_2 ... (for models with multiple inputs)
    """
    lst = []

    p_in = p_model / "input" / pref
    if p_in.exists():
        lst.append(p_in)
    else:
        return None

    for i in range(1, 100):
        p_in = p_model / "input" / f"{pref}_{i}"
        if p_in.exists():
            lst.append(p_in)
        else:
            return lst
    else:
        print("should not arrive here")
        return lst


def get_input_txt_list(p_in):
    """List input txt names in given folder.

    test_input.txt will be 1st one if exist.
    """

    fns = [fn.name for fn in list(p_in.glob("*.txt"))]
    fn_default = "test_input.txt"
    if fn_default in fns:
        # move fn_default to first one
        fns.remove(fn_default)
        return [fn_default] + fns
    else:
        return fns


def need_compress_command_bin(tc_cat, tc_name):
    """Special mark for some special case."""
    if tc_cat.startswith("m"):
        big_kernels = [
            "bk23x23",
            "bk25x25",
            "bk27x27",
            "bk29x29",
            "bk31x31",
            "bk33x33",
            "bk35x35",
        ]
        return any([a in tc_name for a in big_kernels])
    return False


def guess_model_id(s):
    sr = re.compile("model_(\d+)*")
    try:
        return int(sr.findall(s)[0])
    except:
        return 32768


def clean_case_name(x):
    """Normalize the case names.

    The case name in final report, may have extra info:

      - xxx (known bug) // remove space and after
      - model_ddd_xxxxxx_append  // remove _xxxxxxx
    """
    def remove_append(x):
        """works without space in the name"""
        return x.split(" ")[0]

    def remove_model_share_commit(x):
        """if this is a case of model_share with commit number in it"""
        s = re.compile("(model_\d{3})_[\da-f]{6}(_.*)")
        finds = s.findall(x)
        if len(finds) == 1:
            # foud pattern
            return "".join(finds[0])
        else:
            # no change.
            return x

    return remove_model_share_commit(remove_append(x))


def relative_path(target, origin):
    """Return path of target relative to origin.

    NOTE: .resolve() will make symlink to its target. not working in our case
    """
    # copy from https://stackoverflow.com/questions/38083555/using-pathlibs-relative-to-for-directories-on-the-same-level
    p_t = Path(target)
    p_o = Path(origin)
    try:
        return p_t.absolute().relative_to(p_o.absolute())
    except ValueError as e:  # target does not start with origin
        # recursion with origin (eventually origin is root so try will succeed)
        return Path("..").joinpath(relative_path(target, p_o.parent))


class CustomCP:
    """A customized cp to return if timeout."""
    def __init__(self, returncode, stdout=None, stderr=None):
        """Init it this cp."""
        self.returncode = returncode
        self.stdout = stdout
        self.stderr = stderr

    def __str__(self):
        """Make this cp printable."""
        return f'Return Code: {self.returncode}, Error Message: {self.stderr}'


def run_bash_script(command, do_echo=False, fail_then_exit=False, timeout=60*60*2):
    """Kneron wrap on bash commands.

    Inputs:
      - timeout: how many seconds to run
    """
    if type(command) is list:
        command = " ".join(command)

    try:
        cp = subprocess.run(
            command,
            shell=True,
            executable="/bin/bash",
            check=False,
            capture_output=True,
            text=True,
            timeout=timeout
        )
    except subprocess.TimeoutExpired:
        cp = CustomCP(111, stdout=f"Command: {command}", stderr=f"TIMEOUT ({timeout}s)")

    if do_echo or (cp.returncode != 0 and DEBUG):
        print("-------------------------------------------------------------")
        print(f"running command: {command}")
        if cp.returncode != 0:
            print(f"Failed at {cp.returncode}")
        print("-------------------------------------------------------------")
        print(cp.stdout)
        print(cp.stderr)
        print("-------------------------------------------------------------")

    if cp.returncode != 0 and fail_then_exit:
        print(f"Failed to run {command}. Exit code: {cp.returncode}")
        print("Will not continue. exit.")
        sys.exit(1)
    return cp


def check_parallel_log(fn_log):
    """Check the parallel log file.

    NOTE: sometime dynasty killed by segment fault however return code is 0.
    We need to make both are zero.
    """
    exitvals = []
    signals = []

    with open(fn_log, 'r') as file:
        for i, line in enumerate(file):
            if i == 0:
                # skiip the headline
                continue
            parts = line.split()
            if len(parts) > 6:  # Ensuring the line has enough parts
                try:
                    exitvals.append(int(parts[6]))
                    signals.append(int(parts[7]))
                except:
                    pass

    e1 = tuple(a for a in exitvals if a != 0)
    e2 = tuple(a for a in signals if a != 0)
    msg = []
    if len(e1) > 0:
        # usually not happen here
        msg.append(f"error: {e1}")
    if len(e2) > 0:
        msg.append(f"signal: {e2}")
    return "//".join(msg), e1, e2


def set_folder_public(p_out):
    """Make output path readable for basic group."""
    dir_out = Path(p_out)
    if not dir_out.exists():
        # in case it had been delete before trigger test_case.__del__
        return
    script = f"""
    chgrp -R 50000 {dir_out}
    chmod 755 {dir_out}
    find {dir_out} -type d -exec chmod 755 {{}} \;
    find {dir_out} -type f -exec chmod 644 {{}} \;
    """

    run_bash_script(script, do_echo=False)


def assert_nodes_exists(fn_onnx, node_list):
    """Kneron solutions may cast some special process on certain nodes, specified by name.

    use this script to make sure onnx not changed."""
    import onnx

    o = onnx.load_model(fn_onnx, load_external_data=False)
    nodes_all = [a.name for a in o.graph.node]
    missing = False
    print("check {} for special nodes {}".format(fn_onnx, node_list))
    for node in node_list:
        if node not in nodes_all:
            print("ERROR: node {} does not exist. check with ALG team".format(node))
            missing = True
    if missing:
        raise FileNotFoundError


def detect_valid_model(dir_in):
    """Give a path, detect valid models under it"""
    p_base = Path(dir_in).resolve()
    onnx = p_base.glob("**/*.origin.onnx")

    for o in onnx:
        p_model = o.parent.parent
        if is_valid_case(p_model):
            yield p_model


def is_valid_case(dir_case):
    """Is this a valid test case?

    XXXXX/input/XXXXX.origin.onnx (or bie)
    XXXXX must be same

    XXXXX/input/knerex_input must exist
    Not checking the txt in it.
    """
    p_case = Path(dir_case)

    if not p_case.is_dir():
        # print(f"{p_case} is not dir.")
        # TODO: return extra string
        return False

    p_origin = p_case / f"input/{p_case.name}.origin.onnx"
    if not p_origin.exists():
        p_origin = p_case / f"input/{p_case.name}.origin.bie"
        if not p_origin.exists():
            return False

    p_knerex_input = p_case / "input" / "knerex_input"
    if not p_knerex_input.exists():
        return False

    return True


def is_success_case(p_case):
    """Use output/success for quick judge.

    NOTE: currently only check output/success.
    not output_dongle / etc.
    """
    p_sign = p_case / "output/success"
    return p_sign.exists()


def filter_cases(dir_base, keywords=[]):
    """Find all test cases in dir_base.

    filter out bad test cases, e.g., missing input, origin.onnx
    select by only test case with keywords
    output: list of path to test cases
    """
    excludekeywords = []
    if "-e" in keywords:
        excludekeywords = keywords[keywords.index("-e") + 1:]
        keywords = keywords[0: keywords.index("-e")]

    whitelist = []
    if "-f" in keywords:
        whitefile = keywords[keywords.index("-f") + 1]
        keywords = keywords[0: keywords.index("-f")]
        with open(whitefile, "r") as f:
            lineList = f.readlines()
            whitelist[:] = [x.strip() for x in lineList if x.strip()]

    # find all second level sub folder
    case_all = list(Path(dir_base).glob("*/*"))
    case_all.sort()

    # ONLY keep cases including ALL keywords.
    # self.logger.debug("search cases using keywords: {}".format(keywords))
    case_selected = [
        a
        for a in case_all
        if all([True if (k in a.name or k in a.parent.name) else False for k in keywords])
    ]

    if len(excludekeywords):
        case_selected = [
            a
            for a in case_selected
            if all([False if (k in a.name or k in a.parent.name) else True for k in excludekeywords])
        ]

    if len(whitelist):
        case_selected = [
            a
            for a in case_selected
            if any(
                [
                    True if all((s in a.name or s in a.parent.name) for s in k.split()) else False
                    for k in whitelist
                ]
            )
        ]
    case_selected = [a for a in case_selected if is_valid_case(a)]

    case_success = [a for a in case_selected if is_success_case(a)]
    case_not_success = [a for a in case_selected if not is_success_case(a)]

    return case_selected, case_all, case_success, case_not_success


# TODELETE
# def filter_failed_cases(case_selected, fn_config, p_report):
#     p_config = Path(fn_config)
#     command = f"pushd {p_report} >> /dev/null && grep {p_config.name} *.info | sort | tail -n 1 | awk -F: '{{ print $1  }}'"
#     cp = subprocess.run(
#         command,
#         shell=True,
#         executable="/bin/bash",
#         check=False,
#         capture_output=True,
#         text=True,
#     )
#     fn_info = cp.stdout.strip()
#     if len(fn_info) == 0:
#         # found nothing, do nothing
#         return case_selected
# 
#     # fn_info should be `run_YYYYMMDD_hhmmss_TAG_regression.info`
#     fn_status = fn_info.replace("_regression.info", "_status.csv")
#     p_status = Path(p_report) / fn_status
#     if not p_status.exists():
#         # no status found! something wrong. should I look for another one before?
#         return case_selected
# 
#     # load status.csv
#     try:
#         df = pd.read_csv(str(p_status), header=[0, 1], index_col=[0, 1])
#         index_success = set(df.loc[df["general"]["Success"] == "✓"].index)
#         cases_failed = [
#             a for a in case_selected if (a.parent.name, a.name) not in index_success
#         ]
#         return cases_failed
#     except Exception as e:
#         pp(e)
#         return case_selected


def md5sum(filePath):
    """Check md5sum of a file/folder.

    Does not support input as python object.
    pikcle the file if need to.

    we use string() to get object representation,
    but it will not work properly for large list / numpy matrix.
    because python will only print some part of data.
    """
    assert type(filePath) in [str, PosixPath], f"md5sum works on file only, but got {type(filePath)}"

    def do_exclude(p_f):
        ignore_patterns = ["__pycache__"]
        for ig in ignore_patterns:
            if ig in str(p_f):
                return True

        return False

    def md5_update(m, fp):
        # TODO: maybe use OO is better
        with open(fp, "rb") as fh:
            while True:
                data = fh.read(8192)
                if not data:
                    break
                m.update(data)
            return m

    def md5sum_folder(p_folder):
        l1 = list(p_folder.iterdir())
        l2 = [t for t in l1 if not (do_exclude(t)) and not t.is_dir()]
        # sorting is crucial for md5 calculation
        l2.sort(key=lambda a: str(a))

        m = hashlib.md5()
        for tf in l2:
            m = md5_update(m, tf)

        return m.hexdigest()

    p = Path(filePath)
    if p.is_file():
        m = hashlib.md5()
        m = md5_update(m, p)
        return m.hexdigest()
    elif p.is_dir():
        return md5sum_folder(p)
    else:
        raise NotImplementedError(f"{p} is nor file neither folder. Check existence!")


def list2chunks(lst, k):
    """Yield successive k chunks from lst."""
    n = math.ceil(len(lst) / k)
    for i in range(0, len(lst), n):
        yield lst[i : i + n]


def let_user_pick(options, create_new=False):
    if len(options) == 0:
        if create_new:
            return input("\nInput new message: ")
        else:
            raise AttributeError

    # if options available, pick one
    if create_new:
        # option to create new one
        options.append("Create new?")
    while True:
        print("Please choose:")
        for idx, element in enumerate(options):
            print("{}) {}".format(idx + 1, element))
        i = input("Enter number: ")
        try:
            ii = int(i) - 1
            if 0 <= ii < len(options):
                if create_new and ii == len(options) - 1:
                    # create new
                    return input("\nInput new message: ")
                else:
                    return options[ii]
        except:
            pass


def create_zip(fn_zip, fns, p_base=None):
    """Create a zip with give files in base folder.

    BUG: if diff files with same name in one folder,
    only the last one will be kept.
    """
    if isinstance(fns, list):
        # no name change. for list of fn come in, just use the original name
        # but need to turn into dict
        fns = [Path(fn) for fn in fns if fn]
        fns = {p.name: p for p in fns}
    assert isinstance(fns, dict), f"parameter fns must be list or dict. but got {type(fns)}"

    with zipfile.ZipFile(fn_zip, "w", zipfile.ZIP_DEFLATED) as zf:
        for new_name, fn in fns.items():
            pf = Path(fn)
            if not pf.exists():
                continue

            if p_base is None:
                arcname = new_name
            else:
                pf2 = pf.parent / new_name
                arcname = str(pf2.relative_to(p_base))
            zf.write(filename=str(pf), arcname=arcname)


def preprocess_json(json_str):
    # 将 inf 替换为字符串 "Infinity"（JSON5标准支持）
    json_str = re.sub(r':\s*inf\b', ': "Infinity"', json_str, flags=re.IGNORECASE)
    json_str = re.sub(r':\s*-inf\b', ': "-Infinity"', json_str, flags=re.IGNORECASE)
    return json_str


class NumpyEncoder(json.JSONEncoder):
    """To save numpy array in json.

    From `numpy array is not json serializable`_ .

    .. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
    """

    def default(self, obj):
        """Set default way."""
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


def dict2json(d, fn_json):
    """Customize function to save dict to json file.

    TODO:
      merge similar functions.
    """
    with open(fn_json, "w") as f:
        json.dump(d, f, indent=4, sort_keys=True, cls=NumpyEncoder)


def load_regression_json(fn_json):
    def convert_key(k):
        d = {str(plt): plt for plt in MODE_HARDWARE}
        return d.get(k, k)

    def jsonKeys2int(x):
        # refer to https://stackoverflow.com/questions/1450957/pythons-json-module-converts-int-dictionary-keys-to-strings
        if isinstance(x, dict):
            return {convert_key(k): v for k, v in x.items()}
        return x

    with open(fn_json, "r") as f:
        j = json.load(f, object_hook=jsonKeys2int)

    return j


def clean_name(this_name):
    """Remove special charaters from given string.

    Some node names with slash, example: batch_normalization_9/gamma:0_o0
    Make this a function that will all conversion will be same.
    """
    return this_name.replace("/", "_")


def clean_file_name(this_name):
    """Clean up given name that can be used as file name.
    
    input `this_name` is given by customers. 
    It may contains " ", "()", etc.
    
    Returns:
        str: 清理后的文件名，移除了空格、括号等特殊字符
    """
    if not this_name:
        return "model"
    
    # 获取文件扩展名
    p_file = Path(this_name)
    suffix = p_file.suffix
    name_without_ext = p_file.stem
    
    # 移除或替换可能造成问题的字符
    # 空格替换为下划线
    cleaned_name = name_without_ext.replace(" ", "_")
    
    # 对于UTF-8字符，使用更宽松的正则表达式
    # 保留字母数字（包括Unicode字母）、下划线、连字符和点号
    # \w 包含Unicode字母和数字
    cleaned_name = re.sub(r'[^\w._-]', '', cleaned_name, flags=re.UNICODE)
    
    # 移除连续的下划线
    cleaned_name = re.sub(r'_+', '_', cleaned_name)
    
    # 移除开头和结尾的下划线或点号
    cleaned_name = cleaned_name.strip('_.')
    
    # 如果清理后为空，返回默认名称
    if not cleaned_name:
        cleaned_name = "model"
    
    # if cleaned_name is too long, give warning then raise error
    if len(cleaned_name) > 200:
        raise ValueError(f"File name {cleaned_name} is too long, truncated to 200 characters.")
    
    # 重新组合文件名和扩展名
    return cleaned_name + suffix
    

def remove_appendix(this_name):
    """Kneron toolchain generated onnx may have multiple appendix.
    Remove all of them.
    """
    return (
        this_name.removesuffix(".onnx")
        .removesuffix(".bie")
        .removesuffix(".origin")
        .removesuffix(".decomposed")
        .removesuffix(".wqbi")
        .removesuffix(".quan")
        .removesuffix(".scaled")
    )


def load_np_in(np_in):
    """加载并验证输入数据。

    Args:
        np_in: 可以是以下类型之一：
            - dict: 直接返回该字典
            - str或Path: 指向pickle文件的路径，将加载并返回其中的字典数据
                - .pkl: 直接用 pickle 加载
                - .pkl.xz: 用 pkl2df 加载

    Returns:
        dict: 包含输入数据的字典

    Raises:
        FileNotFoundError: 当输入文件不存在时
        TypeError: 当输入类型不正确或加载的数据不是字典时
        RuntimeError: 当加载文件失败时
    """
    if isinstance(np_in, dict):
        return np_in
    
    # Convert to Path object if string
    if isinstance(np_in, (str, Path)):
        p_in = Path(np_in)
        if not p_in.exists():
            raise FileNotFoundError(f"Input file does not exist: {p_in}")
        
        try:
            if p_in.name.endswith('.pkl.xz'):
                result = pkl2df(p_in)
            elif p_in.name.endswith('.pkl'):
                with open(p_in, 'rb') as f:
                    result = pickle.load(f)
            else:
                raise ValueError(f"Unsupported file format: {p_in.name}")
                
            if not isinstance(result, dict):
                raise TypeError(f"Loaded object is not a dictionary: {type(result)}")
            return result
        except Exception as e:
            raise RuntimeError(f"Failed to load dictionary from {p_in}: {str(e)}")
    
    raise TypeError(f"Input must be a dictionary or file path, got: {type(np_in)}")


def verify_input_shape_onnx_npy(p_onnx, np_txt):
    """Verify the np_txt to be same shape as p_onnx input."""
    o = onnx_info(p_onnx)
    d_in_shape = o.get_onnx_input_size()

    # check keys ()
    k1 = set(d_in_shape.keys())
    k2 = set(np_txt.keys())
    assert k1 == k2, f"Onnx specified input nodes: {list(k1)}, but the numpy passed in is {list(k2)}. Please check input numpy data."

    # check np_txt elements are list and have same length
    all_list = [isinstance(v, list) for k, v in np_txt.items()]
    assert all(all_list), """Not all npy input are lists. The format should be like: {"in1":[np1_1, np1_2], "in2:[np2_1, np2_2]}"""

    all_len = [len(v) for k, v in np_txt.items()]
    assert len(set(all_len)) == 1, f"""
    The list of input per input node should be same.
    But given list have different lengths: {all_len}."""
    assert all_len[0] > 0, """np_txt got EMPTY list!
    Please check your script, especially the image path."""

    for k in list(k2):
        o_shape = tuple(d_in_shape[k])
        diff_shape = [tuple(v.shape) for v in np_txt[k] if v.shape != o_shape]
        assert len(diff_shape) == 0, f"""
        Input node ({k}) has shape ({o_shape}),
        but the numpy list has different shapes of: {list(set(diff_shape))}.
        Please check the numpy input.
        """


# TODELETE: just just dict.get(x, y)
def get_switch_value(this_map, this_key, default):
    if this_key in this_map:
        return this_map[this_key]
    else:
        return default


def set_default(this_map, this_key, this_value):
    if this_key not in this_map:
        this_map[this_key] = this_value


def create_logger(module_name, fn_log=None, level="WARNING"):
    logger = logging.getLogger(module_name)
    levels = {
        "CRITICAL": logging.CRITICAL,
        "ERROR": logging.ERROR,
        "WARNING": logging.WARNING,
        "INFO": logging.INFO,
        "DEBUG": logging.DEBUG,
    }
    logger.setLevel(levels.get(level.upper(), logging.WARNING))

    if logger.hasHandlers():
        return logger

    # create formatter
    formatter = logging.Formatter(
        "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
    )

    # create console handler and set level to debug
    ch = logging.StreamHandler()
    ch.setLevel(logging.DEBUG)
    # add formatter to ch
    ch.setFormatter(formatter)
    logger.addHandler(ch)

    if fn_log:
        fh = logging.FileHandler(fn_log)
        fh.setFormatter(formatter)
        logger.addHandler(fh)

    return logger


def patch_batch_column(cols):
    """patch r2 for NG.
    old/r2 has columns of 8
    NG has columns of 9
    """
    if len(cols) == 9:
        """for ng
        i,0,input_1_o0,1,1,28,28,16W1C8B,4,16W1C8B,4
        o,0,conv2d_4_o0,1,1,1,1,16W1C8B,4,,
        """

        # NOTE: bchw.
        # convert to interger?
        return cols

    elif len(cols) == 4:
        # for 520, the very old format
        """
        i,0,input.1,3,112,112
        o,0,806,256,1,1
        o,1,847,256,1,1
        """
        return cols

    elif len(cols) == 8:
        cols.insert(1, "")
        return cols

    else:
        pp(f"info columns must be 4, 8 or 9. but got {len(cols)} ({cols})")
        raise AttributeError


def get_git_info(git_path):
    """get git info out of a single git repo

    NOTE: not working submodule
    """
    info = {}

    # sanity check.
    p_git = Path(git_path)
    if not p_git.exists():
        info["error"] = "directory not exists"
        return info
    if not (p_git / ".git").exists():
        # BUG: the git will search from this path up to / until found .git .
        # currently I expect to send exactly path of repo, aka, NONE of the subpath
        # this part may be removed.
        info["error"] = "{} is not a git repo. `.git` not found.".format(p_git)
        return info

    with working_directory(git_path):
        b = subprocess.run(
            ["git", "rev-parse", "HEAD"], stdout=subprocess.PIPE, text=True
        )
        assert b.returncode == 0, "git command failed."
        info["commit"] = b.stdout.strip()

        b = subprocess.run(
            ["git", "rev-parse", "--abbrev-ref", "HEAD"],
            stdout=subprocess.PIPE,
            text=True,
        )
        assert b.returncode == 0, "git command failed."
        info["branch"] = b.stdout.strip()

        b = subprocess.run(
            ["git", "config", "--get", "remote.origin.url"],
            stdout=subprocess.PIPE,
            text=True,
        )
        if b.returncode == 0:
            info["remote_url"] = b.stdout.strip()
        else:
            info["remote_url"] = "N/A"

        info["path"] = str(git_path)

    return info


def find_branch(model_id, commit, dir_base="/opt/data/e2e_simulator/app"):
    """get branch info from local repo folder
    for fx model release
    """
    p_base = Path(dir_base)
    assert p_base.exists(), f"{p_base} does not exists."

    print("check model_{}".format(model_id))
    print(
        "     * commit graph: http://192.168.200.1:8088/modelshare/model_{}/-/network/master".format(
            model_id
        )
    )

    models = list(p_base.glob("*/models/model_{}".format(model_id)))
    if len(models) == 0:
        print("ERROR: cannot found model_{} from any app.".format(model_id))

    p_model = models[0]
    with working_directory(p_model):
        cmd = ["git", "branch", "-r", "--contains", commit]
        b = subprocess.run(cmd, stdout=subprocess.PIPE, text=True)
        if b.returncode == 0:
            o = b.stdout
            print(o)
        else:
            o = None

    return o, models[0]


def get_model_info(git_path):
    """model are submodules in app"""

    info = get_git_info(git_path)

    onnxs = list(git_path.glob("alg/*.onnx"))
    if len(onnxs) != 1:
        print("ERROR: There should only be ONE onnx. but found {}".format(len(onnxs)))
        for o in onnxs:
            print("       * {}".format(o.name))
        print("       path: {}".format(git_path))
        assert False, "Found too many onnx"

    info["onnx"] = str(onnxs[0].relative_to(git_path))
    return info


def get_app_info(app_path, fn_json=None):
    """get git info for kneron released apps.

    The apps are listed here: http://192.168.200.1:8088/modelshare?utf8=%E2%9C%93&filter=solution_
    """
    ginfo = {}
    ginfo["app"] = get_git_info(app_path)
    ginfo["models"] = {}

    p_app = Path(app_path)

    models = p_app.glob("models/*")
    for m in models:
        ginfo["models"][m.name] = get_model_info(m)

    # if given file name to save
    if fn_json is not None:
        with open(fn_json, "w") as f:
            json.dump(ginfo, f, indent=4, sort_keys=True)

    return ginfo


def create_noise_input_folder(dir_from, sigma_levels, override=False):
    """Create noise input to test model robustness.

    currently only add noise to simulator_input / simulator_input_1 / simulator_input_2
    """
    assert "simulator_input" in str(dir_from), "No input/simulator_input folder."

    p_from = Path(dir_from)

    fn_froms = p_from.glob("*.txt")
    ori_s = {f.name: load_txt(str(f)) for f in fn_froms}

    con = np.concatenate(list(ori_s.values()))
    assert len(con.shape) == 1, "found more than 1 shape for noise input."
    i_range = np.max(con) - np.min(con)

    # NOTE: sigma_level = 3 => sigma 1, 1 pixel
    for sigma_level in sigma_levels:
        dir_name = p_from.name.replace(
            "simulator_input", "simulator_input_sigma{}".format(sigma_level)
        )
        p_to = p_from.parent / dir_name
        if p_to.exists():
            if override:
                shutil.rmtree(str(p_to))
            else:
                continue
        p_to.mkdir(mode=0o770, parents=True, exist_ok=True)

        print("Create noise input for sigma {}".format(sigma_level))
        sigma_control = 3
        bit_range = 256  # for 8bit
        n_pixel = list(ori_s.values())[0].shape
        sigma = i_range * sigma_level / (bit_range * sigma_control)

        for k1, v1 in ori_s.items():
            k2 = p_to / k1
            noise = np.random.normal(loc=0, scale=sigma, size=n_pixel)
            v2 = v1 + noise
            np.savetxt(str(k2), v2, fmt="%.10f")


def save_array(file_path, array, fmt, compression="npy"):
    """Save array to txt file, meanwhile with optional compression."""
    # NOTE: we assume the np_images is onnx shaped.
    # WARNING: previous version we require channel last
    if compression == 'gzip':
        with gzip.open(file_path.with_suffix('.txt.gz'), 'wt', compresslevel=4) as f:
            np.savetxt(f, array.ravel(), fmt=fmt)
    elif compression == "npy":
        # NOTE: dynasty float only takes float32 numpy input.
        #       does not support integer. but they are note changed here.
        if np.issubdtype(array.dtype, np.floating):
            array = array.astype(np.float32)
        # NOTE: this numpy save to disk to be read by dynasty-float. which expect c-style.
        # but sometime the numpy use fortrune style.
        # np.ravel(order="C") is different from np.ravel(order="F")
        # np.ascontiguousarray is force numpy array to be continues in memory (similar to order="C")
        np.save(file_path.with_suffix('.npy'), np.ascontiguousarray(array))
    else:
        np.savetxt(file_path, array.ravel(), fmt=fmt)


def sanity_check_npy(np_txt, input_names, filename=None, compression="npy", ch_last=False):
    """Sanity check the inputs. make sure they match each other."""
    # santity check on node names.
    names1 = set(input_names)
    names2 = set(np_txt.keys())
    if names1 != names2:
        raise ValueError(f"ERROR: input name does not match: onnx input ({list(names1)}) vs given np ({list(names2)})")

    # sanity check np_txt, which is dict of list of numpy array
    n_pairs = {k: len(v) for k, v in np_txt.items()}
    if len(set(n_pairs.values())) > 1:
        raise ValueError(f"np_txt input nodes have DIFFERENT length: {n_pairs}. They must be SAME.")

    # 如果filename不为None，检查其是否为字符串列表且长度与np_txt中的列表长度相同
    tl2 = list(n_pairs.values())[0]
    if filename is not None:
        if not isinstance(filename, list) or not all(isinstance(f, str) for f in filename):
            raise ValueError("filename should be a list of strings.")
        tl1 = len(filename)
        if tl1 != tl2:
            raise ValueError(f"The length of `filename` ({tl1}) should be the same as the pari length ({tl2}) in np_txt.")
        # NOTE: the filename suffix is not changed.

    # 检查同一列表中的numpy数组是否具有相同的形状
    for key, lst in np_txt.items():
        shapes = [arr.shape for arr in lst]
        if len(set(shapes)) != 1:
            raise ValueError(f"All npy arr in list '{key}' should have the same shape. Please check `np_txt`.")

    # NOTE: did not compare the shape against onnx specified shape.

    # ch_last is for dynasty-float-so inference. which take only txt for now.
    if ch_last and (compression != "txt"):
        raise ValueError(f"dump to channel-last must be `txt` file, but got {compression}.")
    return


def preset_pairname(np_txt, compression="npy"):
    """Create default filename for input pairs."""
    suffix = "npy" if compression == "npy" else "txt"
    tl2 = [len(v) for k, v in np_txt.items()][0]  # lens should be same
    pairname = [f"in_{i:04d}.{suffix}" for i in range(tl2)]
    pairname[0] = f"test_input.{suffix}"
    return pairname


def get_paired_inputs(p_txt_inputs, pair_names=None, suffix="npy", verify_exist=True):
    """Check multiple INPUT NODES for this MODEL.

    Give 1st input image name, give a list with whole input set (might be 1 or more.)

    TODO:
      need refactor into flow_utils

    Args:
      p_txt_inputs: where txt files exists.
      paire_names: the txt filenames in the first input folder.
        (should be same in other folder.)
    """
    # if given txt files then use it otherwise search for it
    fns = [str(p) for p in pair_names] if pair_names else sorted([fn.name for fn in list(p_txt_inputs[0].glob(f"*.{suffix}"))])

    paired_inputs = []
    for fn in fns:
        # find a pair of inputs
        pair = [p / fn for p in p_txt_inputs]
        if verify_exist:
            assert all([f.exists() for f in pair])
        paired_inputs.append(pair)
    return paired_inputs


def convert_to_channel_last(np_1):
    """Convert a numpy array to channel last.

    For the call of dynasty float so only.

    dynasty float / fx will only accept onnx-shape input.
    """
    input_shape = np_1.shape
    if len(input_shape) > 3:
        # for e2e / app_release. only do channel-last txt if dimension >= 4
        axes = range(len(input_shape))
        axes = [axes[0], *axes[2:], axes[1]]
        np_1 = np.transpose(np_1, axes)
    return np_1


def npy2txt(np_txt: dict, input_names: list, p_input,
            exists_then_skip=False,
            pairname=None,
            compression="npy",  # txt or npy
            as_test_input=True,
            ch_last=False,
            n_writer=4,
            default_btm_name="test_input",
            knerex_prefix="knerex_input",
            sim_prefix="simulator_input",
            do_sanity_check=False):
    """Save numpy file to txt files.

    np_txt is map, key is input node names,
        value: numpy array of input, 3D (HWC) or 4D (BHWC)

    input_names are input node names, get from onnx. it should be same as np_txt.key(), but with order specified by onnx. The order matters!

    p_input is where to save the knerex_input+simulator_input folders.
        usually be `model/input/`.
        If for inference, it could be any path.

    pairname: optional, to specify what the npy/txt files to dump. Otherwise will use `in_0000.npy` format.

    ch_last: default False. knerex dynasty float so call need channel-last text file. Keep this option for toolchain/inference_dynasty_so()

    as_test_input: to keep a `test_input.txt` in input folder.
    """
    if do_sanity_check or DEBUG:
        sanity_check_npy(np_txt, input_names, pairname, compression=compression, ch_last=ch_last)
    if pairname is None:
        pairname = preset_pairname(np_txt, compression)

    # prepare texts folders.
    # NOTE: the folder names are fixed. always dump to knerex_input,
    #       then link simulator_input to it.
    #       So it is ok to call in inference_* series.
    n_inputs = len(input_names)  # number of input nodes for this model
    names_knerex_inputs = [f"{knerex_prefix}_{i}" for i in range(n_inputs)]
    names_knerex_inputs[0] = knerex_prefix
    names_simulator_inputs = [f"{sim_prefix}_{i}" for i in range(n_inputs)]
    names_simulator_inputs[0] = sim_prefix

    n_pairs = [len(v) for k, v in np_txt.items()][0]  # number of pair of inputs, each pair has one input per input node

    p_input = Path(p_input)
    p_knerex_inputs = [p_input / names_knerex_inputs[i] for i in range(len(input_names))]
    p_simu_inputs = [p_input / names_simulator_inputs[i] for i in range(len(input_names))]

    do_dump = True
    if exists_then_skip:
        all_knerex_input_exist = all([p.exists() for p in p_knerex_inputs])
        if all_knerex_input_exist:
            print(f"\n\nWARNING: knerex inputs exists alreay! skip dump to disk. If you need to dump, please remove: {p_knerex_inputs} . or set `exsits_then_skip` to False.\n\n")
            do_dump = False

    if do_dump:
        if n_pairs > 1 and n_writer > 1:
            # write to disk at with multi-process
            with ProcessPoolExecutor(max_workers=n_writer) as executor:
                futures = []
                for i_in, name in enumerate(input_names):
                    # prepare folder for this input node
                    dir_in = p_knerex_inputs[i_in]
                    dir_in.mkdir(parents=True, exist_ok=True)
                    # for corresponding simulator input
                    safe_link(dir_in, p_simu_inputs[i_in], relative=True)

                    np_in_s = np_txt[name]
                    for i_image, np_image in enumerate(np_in_s):
                        p_txt = dir_in / pairname[i_image]

                        # back-compatible for dynasty-float-so inference.
                        if ch_last:
                            # if compression != "txt":
                            #     raise ValueError(f"dump to channel-last must be `txt` file, but got {compression}.")
                            np_image = convert_to_channel_last(np_image)

                        # Schedule the save operation
                        # TODO: precision may increase from 6 to 15 for some solutions if using txt. but now prefer npy
                        future = executor.submit(save_array, p_txt, np_image, '%.6f', compression)
                        futures.append(future)

                # Optionally, handle the results as they complete (or just use as below)
                for future in futures:
                    future.result()  # waiting for all files to be processed
        else:
            for i_in, name in enumerate(input_names):
                # prepare folder for this input node
                dir_in = p_knerex_inputs[i_in]
                dir_in.mkdir(parents=True, exist_ok=True)
                # for corresponding simulator input
                safe_link(dir_in, p_simu_inputs[i_in], relative=True)

                np_in_s = np_txt[name]
                for i_image, np_image in enumerate(np_in_s):
                    p_txt = dir_in / pairname[i_image]

                    # back-compatible for dynasty-float-so inference.
                    if ch_last:
                        np_image = convert_to_channel_last(np_image)

                    save_array(p_txt, np_image, '%.6f', compression)

    suffix = "npy" if compression == "npy" else "txt"
    fn_default = f"{default_btm_name}.{suffix}"
    if as_test_input:
        link_test_input_txt(p_simu_inputs, fn_default=fn_default)

    # prepare for dynasty inference
    simulator_paired_inputs = get_paired_inputs(p_simu_inputs, pairname, suffix=suffix, verify_exist=False)
    return p_knerex_inputs, simulator_paired_inputs, pairname


def solution_npy2txt(np_txt, input_names, p_input, file_name, as_test_input = False):
    """Save numpy file to txt files for solution.

    np_txt is map, key is input node names,
        value: numpy array of input, 3D (HWC) or 4D (BHWC)

    input_names are input node names, get from onnx. it should be same as np_txt.key()

    p_input is where to save the knerex_input+simulator_input folders

    """
    # save texts.
    n_inputs = len(input_names)
    knerex_inputs = ["knerex_input_{}".format(i) for i in range(n_inputs)]
    knerex_inputs[0] = "knerex_input"
    simulator_inputs = ["simulator_input_{}".format(i) for i in range(n_inputs)]
    simulator_inputs[0] = "simulator_input"

    list_inputs = []
    for i_in in range(len(input_names)):
        # for multiple inputs
        np_in_s = np_txt[input_names[i_in]]
        dir_in = p_input / knerex_inputs[i_in]
        dir_simu = p_input / simulator_inputs[i_in]
        list_inputs.append(dir_simu)
        dir_in.mkdir(parents=True, exist_ok=True)
        safe_link(dir_in, dir_simu, relative=True)

        dim_in = np_in_s[0].shape
        total_size = np.prod(dim_in)

        for i_image, np_image in enumerate(np_in_s):
            dim_this = np_image.shape
            assert (
                dim_in == dim_this
            ), f"No. {i_image} input size {dim_this} is different from 1st input size {dim_in}"
            p_txt = dir_in / "{}.txt".format(file_name)
            # NOTE: we assume the np_images is onnx shaped.
            # WARNING: previous version we require channel last
            np.savetxt(str(p_txt), np_image.reshape([total_size]), fmt="%.6f")

    # link a "test_input.txt"
    if as_test_input:
        link_test_input_txt(list_inputs)


def link_test_input_txt(list_in_nodes, fn_default="test_input.npy"):
    """Make sure test_input.{npy,txt} exists for btm in regression flow."""
    list_p = [Path(p) for p in list_in_nodes]
    list_test_input = [p / fn_default for p in list_p]
    list_test_exist = [p for p in list_test_input if p.exists()]
    if len(list_test_exist) == len(list_in_nodes):
        # print("{} exists for all input folders.".format(fn_default))
        return
    elif len(list_test_exist) == 0:
        # so no input_test.txt exists at all
        # print("link {} in {}".format(fn_default, list_p))
        suffix = Path(fn_default).suffix
        fn_pick = sorted(list(list_p[0].glob(f"*{suffix}")))[0].name
        # same file name must exist in all input folder
        if not all([(p / fn_pick).exists() for p in list_p]):
            raise FileNotFoundError(f"Not all input folder has {fn_pick}. Please check input folders: {list_in_nodes}")
        [safe_link(p / fn_pick, p / fn_default) for p in list_p]
    else:
        # error: some has test_input.txt, there must be mis-alignment.
        n_missing = len(list_in_nodes) - len(list_test_exist)
        raise FileNotFoundError(f"ERROR: Found input folders: {list_in_nodes}, but {n_missing}/{len(list_in_nodes)} missing {fn_default}. we have only {list_test_exist}")


def safe_link(fn_from, fn_to, relative=True, delete_exists=True):
    """Create a link from `fn_from` to `fn_to`.

    * if the target exist already, delete the target then link.
    """

    f_from = Path(fn_from)
    f_to = Path(fn_to)

    assert f_from.exists(), f"source file/dir {f_from} does not exists."

    if f_to.is_symlink():
        # TODO: missing_ok=False from 3.8
        f_to.unlink()
    if f_to.exists() and delete_exists:
        shutil.rmtree(f_to)
    if relative:
        f_to.symlink_to(os.path.relpath(f_from, f_to.parent))
    else:
        f_to.symlink_to(f_from.absolute())


def estimate_mem_available():
    p_info = Path("/proc/meminfo")

    def parse_entry(s):
        a, b = s.strip().split(":")
        return a.strip(), b.removesuffix("kB").strip()

    with open(p_info, "r") as f:
        lines = f.readlines()
        meminfo = {}
        for line in lines:
            k, v = parse_entry(line)
            meminfo[k] = v

    mems_kB = [int(meminfo[k]) for k in ["MemAvailable", "SwapFree"]]
    return sum(mems_kB)


def expand_array(v, n):
    """Expand scalar to array. """
    if isinstance(v, (collections.abc.Sequence, np.ndarray)):
        # is vector / array, make sure correct length
        assert len(v) == n, f"Expect {v} to have length {n} but got {len(v)}"
        return np.array(v)
    else:
        # is scalar, expand it to array
        return np.ones(n)*v


def gen_random_string(length):
    """Generate random string use less resource."""
    alphabet = string.ascii_letters + string.digits
    random_string = ''.join(secrets.choice(alphabet) for _ in range(length))
    return random_string


def chunker(seq, size):
    """Cut long list into small lists.

    from https://stackoverflow.com/questions/434287/how-to-iterate-over-a-list-in-chunks
    """
    return (seq[pos:pos + size] for pos in range(0, len(seq), size))


@contextlib.contextmanager
def working_directory(path):
    """
    Changes working directory and returns to previous on exit.

    link: https://stackoverflow.com/questions/41742317/how-can-i-change-directory-with-python-pathlib
    """
    prev_cwd = Path.cwd()

    # create if not exist
    p = Path(path)
    p.mkdir(mode=0o770, parents=True, exist_ok=True)
    os.chdir(str(p))

    try:
        yield
    finally:
        os.chdir(prev_cwd)


def pprint_dict(ld):
    """Convert dict to string then put into report."""
    d = defaultdict(set)
    for d1 in ld:
        for k, v in d1.items():
            d[v].add(k)

    if len(d) == 0:
        return ""
    elif len(d) == 1:
        return list(d.keys())[0]
    else:
        s2 = [f"""{k}:{",".join(v)}""" for k, v in d.items()]
        return " \\ ".join(s2)


def get_timestamp():
    """Get timestamp from bash env."""
    timestamp = os.environ.get('regression_timestamp', None)
    # 如果不存在，则创建一个新的时间戳
    if timestamp is None:
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
        print(f"Created new timestamp: {timestamp}")
    else:
        print(f"Using existing TIMESTAMP: {timestamp}")
    return timestamp


def report_dict2df(d, p_status=None):
    """Convert regression results to dataframe for report."""
    # check format. two level dictionary, each key is two level.
    # TODO

    # 将嵌套字典转换为 Pandas DataFrame
    df = pd.DataFrame.from_dict(d, orient='index')

    # 将 index 和 columns 设置为 MultiIndex
    df.index = pd.MultiIndex.from_tuples(df.index, names=['category', 'case'])
    df.columns = pd.MultiIndex.from_tuples(df.columns)  # , names=['platform', 'stage']
    if DEBUG:
        print(df)

    if p_status is not None:
        p_status.parent.mkdir(parents=True, exist_ok=True)
        df.to_pickle(p_status, compression='xz')

    return df


def html_merge_cell(html_string):
    """Merge cells in html string.
    
    If a cell is "↑", merge it to above cell.
    按照从下到上的顺序处理每列中的"↑"单元格，将其合并到正上方的单元格。
    先添加所有rowspan属性，然后再重新扫描表格，从右下到左上删除所有包含"↑"符号的单元格。
    """
    
    # 解析HTML字符串
    soup = BeautifulSoup(html_string, 'html.parser')
    
    # 获取所有表格行
    rows = soup.find_all('tr')
    
    # 跳过表头行，只处理数据行
    data_rows = rows[1:] if len(rows) > 0 else []
    
    # 如果没有数据行，直接返回
    if not data_rows:
        return html_string
    
    # 获取表格的列数
    max_cols = max([len(row.find_all(['td', 'th'])) for row in rows]) if rows else 0
    
    # 第一步：从最后一行向上遍历，对每行中的所有列进行处理，添加rowspan属性
    for row_idx in range(len(data_rows) - 1, -1, -1):
        row = data_rows[row_idx]
        cells = row.find_all('td')
        
        # 对当前行的每一列进行处理（从最后一列到第一列）
        for col_idx in range(min(len(cells), max_cols) - 1, -1, -1):
            cell = cells[col_idx]
            
            # 检查单元格内容是否为"↑"
            if cell.get_text().strip() == "↑":
                # 获取span属性值（如果存在）
                span_n_under = 1
                if 'rowspan' in cell.attrs:
                    span_n_under = int(cell['rowspan'])
                
                # 查找正上方的单元格
                if row_idx > 0:  # 确保不是第一行
                    above_row = data_rows[row_idx - 1]
                    above_cells = above_row.find_all('td')
                    
                    # 确保上方行有足够的单元格
                    if col_idx < len(above_cells):
                        above_cell = above_cells[col_idx]
                        
                        # 为上方单元格添加或更新rowspan属性
                        if 'rowspan' in above_cell.attrs:
                            # should never reach here.
                            above_cell['rowspan'] = str(int(above_cell['rowspan']) + 1 + span_n_under)
                        else:
                            above_cell['rowspan'] = str(1 + span_n_under)  # 当前单元格 + 下方单元格
    
    # 第二步：重新扫描整个表格，从右下到左上删除所有包含"↑"符号的单元格
    # 重新获取所有行（因为上一步可能修改了DOM结构）
    rows = soup.find_all('tr')
    data_rows = rows[1:] if len(rows) > 0 else []
    
    # 从最后一行向上遍历
    for row_idx in range(len(data_rows) - 1, -1, -1):
        row = data_rows[row_idx]
        cells = row.find_all('td')
        
        # 从最后一列到第一列遍历
        for col_idx in range(len(cells) - 1, -1, -1):
            cell = cells[col_idx]
            
            # 如果单元格内容为"↑"，则删除它
            if cell.get_text().strip() == "↑":
                cell.decompose()
    
    # 返回修改后的HTML字符串
    return str(soup)


def html_highlight_node_backend(html_string):
    """高亮后端节点列中的不同类型节点。
    
    - (weight) 节点：小字体、灰色
    - (op) 节点：粗体
    - (model_input) 节点：粗体
    """
    # 解析HTML字符串
    soup = BeautifulSoup(html_string, 'html.parser')
    
    # 获取所有表格行
    rows = soup.find_all('tr')
    
    if not rows:
        return html_string
    
    # 处理所有数据行中的所有单元格（跳过表头）
    data_rows = rows[1:] if len(rows) > 1 else []
    
    for row in data_rows:
        cells = row.find_all(['td', 'th'])
        
        for cell in cells:
            cell_text = cell.get_text().strip()
            
            # 跳过空单元格和合并标记
            if not cell_text or cell_text == '↑':
                continue
            
            # 根据前缀应用不同样式
            if cell_text.startswith('(weight) '):
                # 小字体、灰色
                cell['style'] = 'font-size: 0.75em; color: #666666;'
            elif cell_text.startswith('(op) '):
                # 粗体
                cell['style'] = 'font-weight: bold;'
            elif cell_text.startswith('(model_input) '):
                # 斜体
                cell['style'] = 'font-style: italic;'
    
    # 返回修改后的HTML字符串
    return str(soup)


def html_add_footnote(html_string):
    """Add tooltip to certain column name in table in html string."""
    tooltips = {
        "node": (None, "decomposed node for optimization stage 1"),
        "node origin": ("origin node", "node in optimized onnx from original model."),
        "node backend": ("opt stage 2 node", "decomposed node for optimization stage 2"),
        "CMD_node_idx": (None, "Index of command node."),
        "runtime(ms)": (None, "NPU runtime of all units without sync."),
        "CFUNC_runtime(ms)": (None, "runtime of main computing unit (include conv and post conv) in ms."),
        "PFUNC_runtime(ms)": (None, "runtime of auxiliary computing unit (include pool and format convertor) in ms."),
        "SYNC_runtime(ms)": (None, "NPU runtime with sync (e.g., CONV and DMA run at same time)."),
    }

    soup = BeautifulSoup(html_string, 'html.parser')
    
    # 添加自定义CSS来增强tooltip显示效果
    head = soup.find('head')
    if not head:
        head = soup.new_tag('head')
        if soup.html:
            soup.html.insert(0, head)
        else:
            soup.insert(0, head)
    
    # 添加CSS样式来增加tooltip字体大小
    style_tag = soup.new_tag('style')
    style_tag.string = """
    /* 增强tooltip显示效果 */
    [title] {
        position: relative;
    }
    
    /* 自定义tooltip样式（仅在支持的浏览器中生效） */
    [title]:hover::after {
        content: attr(title);
        position: absolute;
        bottom: 100%;
        left: 50%;
        transform: translateX(-50%);
        background-color: #333;
        color: white;
        padding: 8px 12px;
        border-radius: 6px;
        font-size: 14px;
        font-weight: normal;
        white-space: nowrap;
        z-index: 1000;
        box-shadow: 0 2px 8px rgba(0,0,0,0.2);
        margin-bottom: 5px;
    }
    
    [title]:hover::before {
        content: '';
        position: absolute;
        bottom: 100%;
        left: 50%;
        transform: translateX(-50%);
        border: 5px solid transparent;
        border-top-color: #333;
        z-index: 1000;
    }
    
    /* 隐藏默认的title属性tooltip */
    [title]:hover {
        title: '';
    }
    """
    head.append(style_tag)
    
    # 1. 搜索所有表格
    tables = soup.find_all('table')
    
    for table in tables:
        # 2. 为每个表格搜索列名（第一行）
        rows = table.find_all('tr')
        if not rows:
            continue
            
        header_row = rows[0]
        headers = header_row.find_all(['th', 'td'])
        
        # 3. 为每个列名添加tooltip
        for header in headers:
            header_text = header.get_text().strip()
            
            if header_text in tooltips:
                new_name, description = tooltips[header_text]
                
                # 3.1 如果new_name不为None，更改列名
                if new_name is not None:
                    display_name = new_name
                else:
                    display_name = header_text
                
                # 3.2 添加tooltip属性和更明显的样式
                header.string = display_name
                header['title'] = description
                # 更明显的视觉提示：蓝色背景、虚线边框、帮助指针
                current_style = header.get('style', '')
                tooltip_style = (
                    ' cursor: help;'
                    ' border-bottom: 2px dotted #0066cc;'
                    ' background-color: #f0f8ff;'
                    ' padding: 2px 4px;'
                    ' border-radius: 3px;'
                    ' position: relative;'
                )
                header['style'] = current_style + tooltip_style
    
    return str(soup)


def test_html_merge():
    # 设置文件路径
    input_path = '/home/wenliang/workflow_v3/regression_ng5/test_v2/test_report_cell_merge/'
    
    # 定义要处理的文件对
    file_pairs = [
        ('model_littlenet_original_v2.html', 'model_littlenet_merged_v2.html'),
        ('model_littlenet_original.html', 'model_littlenet_merged.html'),
        ('model_resnet_origin.html', 'model_resnet_merged.html'),
        ('model_resnet_origin_v2.html', 'model_resnet_merged_v2.html')
    ]
    
    # 处理每对文件
    for input_filename, output_filename in file_pairs:
        input_file = input_path + input_filename
        output_file = input_path + output_filename
        
        print(f"\n{'-'*50}")
        print(f"处理文件对: {input_filename} -> {output_filename}")
        
        try:
            # 读取原始HTML文件
            print(f"读取文件: {input_file}")
            with open(input_file, 'r', encoding='utf-8') as f:
                html_content = f.read()
            
            # 调用html_merge_cell函数处理HTML
            print("处理HTML中的单元格合并...")
            merged_html = html_merge_cell(html_content)
            
            # 保存处理后的HTML
            print(f"保存结果到: {output_file}")
            with open(output_file, 'w', encoding='utf-8') as f:
                f.write(merged_html)
                
            print("处理成功!")
        except Exception as e:
            print(f"处理文件时出错: {e}")
    
    print(f"\n{'-'*50}")
    print("所有文件处理完成!")


def gen_backend_node_graph(p_json, p_svg, skip_dot=False):
    """Generate backend node graph from node_format_opt_be.json file in SVG format."""
    # 处理两种情况：p_json可能是文件路径或已加载的数据
    if isinstance(p_json, (str, Path)):
        # p_json是文件路径
        with open(p_json, 'r') as f:
            data = json.load(f)
    else:
        # p_json已经是加载的数据
        data = p_json

    p_svg = Path(p_svg)
    
    # 创建GraphViz DOT文件内容
    dot_content = []
    dot_content.append('digraph G {')
    dot_content.append('    rankdir=TB;')  # 从上到下的布局
    dot_content.append('    label="based on opt stage 2 node info.";')  # 图标题
    dot_content.append('    labelloc="t";')  # 标题位置在顶部
    dot_content.append('    labeljust="r";')  # 标题向右对齐
    dot_content.append('    fontsize=14;')  # 标题字体大小
    dot_content.append('    node [shape=box, style=filled, fillcolor=lightblue];')  # 操作节点样式
    dot_content.append('    edge [color=black];')
    
    # 数据节点样式
    dot_content.append('    node [shape=ellipse, style=filled, fillcolor=lightgreen] data;')
    
    # 定义所有节点和边
    data_nodes = set()  # 用于跟踪已添加的数据节点
    op_nodes = set()  # 用于跟踪已添加的操作节点
    
    # 遍历所有操作节点
    for op_name, op_info in data.items():
        # 添加操作节点
        dot_content.append(f'    "{op_name}" [shape=box, style=filled, fillcolor=lightblue];')
        op_nodes.add(op_name)
        
        # 处理输入数据
        if 'inputs' in op_info:
            for input_data in op_info['inputs']:
                for data_name, data_format in input_data.items():
                    # 添加数据节点（如果尚未添加）
                    if data_name not in data_nodes:
                        dot_content.append(f'    "{data_name}" [shape=ellipse, style=filled, fillcolor=lightgreen, label="{data_name}\n[{data_format}]"];')
                        data_nodes.add(data_name)
                    
                    # 添加边：输入数据 -> 操作
                    dot_content.append(f'    "{data_name}" -> "{op_name}";')
        
        # 处理输出数据
        if 'outputs' in op_info:
            for output_data in op_info['outputs']:
                for data_name, data_format in output_data.items():
                    # 添加数据节点（如果尚未添加）
                    if data_name not in data_nodes:
                        dot_content.append(f'    "{data_name}" [shape=ellipse, style=filled, fillcolor=lightgreen, label="{data_name}\n[{data_format}]"];')
                        data_nodes.add(data_name)
                    
                    # 添加边：操作 -> 输出数据
                    dot_content.append(f'    "{op_name}" -> "{data_name}";')
    
    # 结束DOT文件
    dot_content.append('}')

    if skip_dot:
        return data_nodes, op_nodes
    
    # 将DOT内容写入文件
    p_dot = p_svg.with_suffix('.dot')
    with open(p_dot, 'w') as f:
        f.write('\n'.join(dot_content))
    logging.debug(f"后端节点图已生成并保存到: {p_dot}")
    
    # 使用dot命令生成SVG文件
    try:
        # 检查graphviz/dot是否安装
        check_result = run_bash_script('dot -V', timeout=10)
        if check_result.returncode != 0:
            raise Exception("Graphviz/dot命令未找到，请安装graphviz")
        
        # 生成SVG文件，使用600秒超时
        dot_cmd = f'dot -Tsvg "{p_dot}" -o "{p_svg}"'
        result = run_bash_script(dot_cmd, timeout=600)
        
        if result.returncode == 0:
            logging.debug(f"SVG图像已生成并保存到: {p_svg}")
        else:
            logging.error(f"生成SVG文件失败: {result.stderr}")
    except Exception as e:
        logging.error(f"生成SVG文件时出现异常: {e}")

    # if dot not found, still return the info
    return data_nodes, op_nodes

def be_node_name_add_prefix(records, op_nodes, model_ins):
    """Add prefix (op, weight, model_in) to be node name.
    
    records is list of dict with raw data collected.
    op_nodes is set of op node names.
    model_ins is set of model input node names.

    the leftover node is weight, which should not have other properties, e.g. CFUNC_runtime(ms).
    """
    k_be = "node backend"
    k_1 = "CFUNC_runtime(ms)"
    for d in records:
        if k_be not in d:
            continue
        if d[k_be] == "↑":
            continue

        if d[k_be] in model_ins:
            d[k_be] = f"(model_input) {d[k_be]}"
        elif d[k_be] in op_nodes:
            d[k_be] = f"(op) {d[k_be]}"
        elif k_1 not in d:
            # make sure it is weight
            d[k_be] = f"(weight) {d[k_be]}"
        else:
            # this is op with other properties but not in op_nodes.
            # should not reach here.
            logging.error(f"undetermined type for node: {d[k_be]}")
    return records


def test_backend_node_graph():
    p_regression = Path("/home/wenliang/workflow_v3/regression_ng5/test_v2/test_utils/")
    p_json = p_regression / "node_format_opt_be.json"
    p_svg = p_regression / "node_format_opt_be.svg"  # 传递SVG路径而不是DOT路径
    gen_backend_node_graph(p_json, p_svg)


if __name__ == "__main__":
    pass