kneron_model_converter/vendor/sys_flow/compiler_v2.py

#! /usr/bin/env python3

import os
import pathlib
import tempfile
import shutil
import re
import json
from collections import OrderedDict
from functools import lru_cache

import numpy as np

import sys_flow.flow_constants as fconsts
import sys_flow.flow_utils as futils

import snoop
DEBUG = True if os.environ.get("REGRESSION_DEBUG", False) else False
snoop.install(enabled=DEBUG)

# constants

P_TMP_MODEL = pathlib.Path("/tmp/model_working")
P_TMP_INPUT = pathlib.Path("/tmp/input_data")


def get_nef_util_bins():
    bin_nef_util = fconsts.BIN_SET["compiler"]["kneron_nef_utils"]
    bin_kne_util = fconsts.BIN_SET["compiler"]["kneron_kne_utils"]
    pb_nef = pathlib.Path(bin_nef_util).parent
    pb_kne = pathlib.Path(bin_kne_util).parent
    ADD_NEF_UTIL_PATH = f"""export PATH={pb_nef}:{pb_kne}:$PATH"""
    return ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util


ADD_NEF_UTIL_PATH, bin_nef_util, bin_kne_util = get_nef_util_bins()


###################################################################################
# get model info from nef + ioinfo.json
###################################################################################
def clean_list_nef(list_nef):
    """Convert to str and make uniq."""
    # convert to str. it may be pathlib obj
    l1 = [str(nef) for nef in list_nef]
    # make unique
    l2 = list(set(l1))
    return " ".join(l2)


def combine_nef(list_nef: list, hw_mode, d_out):
    """Combine multiple nef into one using nef utils.

    After combination, the combined.nef will run extra `unpack_nefs()` and
    re-organized with `ioinfo.json` per model. This side-effect is prepared a
    combined `ioinfo.json` for dongle inference.

    Args:
      list_nef (list): each element is path to nef file.
      hw_mode (int): specify platform.
      d_out (pathlib / str) : where to put `combined.nef` and `ioinfo.json`

    Returns:
      tuple: multiple info returned:

        - `p_out`: where is the out folder. usually same as specified.
        - `p_nef`: path of the combined nef
        - `p_ioinfo`: path of the (combined) ioinfo.json, prepared for dongle, not for normal process!
        - `fn_maps`: the combined.nef is unpacked and re-organized in `p_out/unpack`.
          Per-model file mapping is recorded in this.
          Same as `unpack_nefs()` returned.

    """
    temp_dir = tempfile.mkdtemp()
    lst = clean_list_nef(list_nef)
    cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} --combine_nef \"{lst}\" -O {temp_dir}"
    # currently no -o option working. we need to cpy $temp_dir/models_xxx.nef to fn_out
    cp = futils.run_bash_script(cmd)
    assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}"

    # check output
    p_temp = pathlib.Path(temp_dir)
    nefs = list(p_temp.glob("models_*.nef"))
    assert len(nefs) == 1, f"combine nef but find {len(nefs)} created: {nefs}"

    # copy necessary files to p_out
    p_out = pathlib.Path(d_out)
    p_out.mkdir(parents=True, exist_ok=True)
    p_nef = p_out / "combined.nef"
    shutil.copyfile(nefs[0], p_nef)

    # prepare ioinfo (for convience of dongle)
    dongle_io = {}
    fn_maps, p_dump = unpack_nefs(p_nef, hw_mode)
    for model_id, (p_unpack, ioinfo) in fn_maps.items():
        dongle_io[model_id] = {}
        dongle_io[model_id]["ioinfo_in"] = ioinfo["input"]
        dongle_io[model_id]["ioinfo_out"] = ioinfo["output"]
    p_ioinfo = p_out / "ioinfo.json"
    with open(p_ioinfo, "w") as f:
        json.dump(dongle_io, f, cls=NumpyEncoder)

    shutil.rmtree(temp_dir, ignore_errors=True)

    return p_out, p_nef, p_ioinfo, fn_maps


def guess_available_model_id(p_dump, hw_mode):
    """Guess model_id from extracted filenames from NEF.

    NOTE: if the nef is from regression, it will have dfault model_id 32768.

    Args:
      p_dump (pathlib / str): where the nef was extracted to.
      hw_mode (int): specify the platform.

    Returns:
      tuple: list of model_id available in givem dump folder.
    """
    if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:  # 540/730
        s1 = f"models_{hw_mode}_model_*.kne"
        s2 = rf"models_{hw_mode}_model_(\d+).kne"
    else:
        s1 = "NEF_*modelid_*"
        s2 = r'NEF_.*?_modelid_(\d+)_.*$'

    p_nefs = pathlib.Path(p_dump).glob(s1)
    p_names = [p.name for p in p_nefs]
    modelids = []
    for name in p_names:
        modelids.extend(re.findall(s2, name))
    ids = tuple(set([int(a) for a in modelids]))
    return ids


def verify_ioinfo(ioinfo, nef_version):
    """Verify ioinfo got enought quantization info."""
    missing = False
    for k1 in ["input", "output"]:
        if DEBUG:
            print(f"ioinfo got {len(ioinfo[k1])} of {k1}.")
        for i_info, info in enumerate(ioinfo[k1]):
            for k2 in ["name",
                       "shape",
                       "onnx_shape",
                       "ch_dim",
                       "radix",
                       "scale",
                       "bitw",
                       "data_format"]:
                if k2 not in info:
                    print(f"Error: {k1}/{i_info} is missing {k2}")
                    missing = True
    assert not missing


def convert_ioinfo(p_sub, hw_mode):
    """Load ioinfo from io_raw, then save to ioinfo.json .

    This is a wrapper function to call correct parser according to hw_mode.
    """
    if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]:
        # 520, or 720 pure bin (obsolete)
        # BUG: only per-layer quantization info
        # BUG: only sim shape. (no onnx shape. no dimension transpose in sim shape though.)
        fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
        ioinfo = parse_setup_json_v0(fn_json_raw)
        nef_version = 0
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
        fn_json_raw = list(p_sub.glob("*_setup.bin.json"))[0]
        ioinfo = parse_setup_json_v1(fn_json_raw)
        nef_version = 1
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:  # 540/730
        fn_json_raw = list(p_sub.glob("*.kne.no_binary.json"))[0]
        ioinfo = parse_setup_json_v2(fn_json_raw)
        nef_version = 2

    verify_ioinfo(ioinfo, nef_version)

    fn_ioinfo = p_sub / "ioinfo.json"
    with open(fn_ioinfo, "w") as f:
        json.dump(ioinfo, f, cls=NumpyEncoder)
    return ioinfo


def unpack_nefs(p_nef, hw_mode):
    """Parse nef to get compiler outputs for csim inference.

    Ref: `ticket #17762`_

    Args:
        p_nef (pathlib or str): path to the nef file, which may include
            multiple models.
        hw_mode (int): specify the platform (520/530/540/630/720/730/etc),
            because the way to call nef_utils are different.

    Returns:
      dict-type: example: `{model_id: (p_sub, ioinfo)}`.

        - The `model_id` is unique for each released model.
        - `p_sub` is where the model for `model_id` is unpacked,
        - the `ioinfo` includes the shape and quantization info of input/output nodes.
          It will be used to convert input data to
          bin file as csim/dongle input.

    .. _ticket #17762: https://redmine.kneron.tw/issues/17762
    """
    p_out = pathlib.Path(tempfile.mkdtemp(prefix="nef_unpack_"))
    if hw_mode in fconsts.MODE_HW_LIMIT["nef_v0"]:  # 520, or 720 pure bin (obsolete)
        nef_version = 0
        cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -p {hw_mode} -O {p_out}"
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:  # 720/530/630 flatbuffer
        nef_version = 1
        cmd = f"{ADD_NEF_UTIL_PATH}; {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -s -O {p_out}"
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:  # 540/730
        # 1 nef -> 1 kne (incl multiple models)
        nef_version = 2
        cmd = f"""set -e; {ADD_NEF_UTIL_PATH};
        {bin_nef_util} -X {pathlib.Path(p_nef).absolute()} --keep_all -O {p_out} &&
        pushd {p_out} >> /dev/null &&
        {bin_kne_util} -X NEF_0x*_models_{hw_mode}.kne &&
        for k in `ls models_{hw_mode}_model_*.kne`
        do
            {bin_kne_util} -j ${{k}}
        done
        """
    else:
        raise NotImplementedError
    # extract nef file
    cp = futils.run_bash_script(cmd)
    if DEBUG:
        print(f"unpack nef (version {nef_version}) to {p_out}")
        print(cp.stderr)
    assert cp.returncode == 0, f"extract nef failed with return code: {cp.returncode}."

    # put each model into submodel
    # for 520/720/530/630
    model_ids = guess_available_model_id(p_out, hw_mode)
    fn_maps = {}
    for mid in model_ids:
        p_sub = p_out / f"model_{mid}"
        p_sub.mkdir(parents=True, exist_ok=True)
        if hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:  # 540/730
            cmd = f"mv {p_out}/models_{hw_mode}_model_{mid}.kne* {p_sub}"
        else:
            cmd = f"mv {p_out}/NEF_*_modelid_{mid}_* {p_sub}"
        cp = futils.run_bash_script(cmd)
        assert cp.returncode == 0, f"Failed to move model_{mid} bin files. Return code: {cp.returncode}"

        p_sub = p_out / f"model_{mid}"
        ioinfo = convert_ioinfo(p_sub, hw_mode)

        # fn_map = locate_compiler_dump(p_sub, hw_mode, parse_nef=True)
        fn_maps[mid] = (p_sub, ioinfo)

    return fn_maps, p_out


class NumpyEncoder(json.JSONEncoder):
    """To save numpy array in json.

    From `numpy array is not json serializable`_ .

    .. _numpy array is not json serializable: https://stackoverflow.com/questions/26646362/numpy-array-is-not-json-serializable
    """

    def default(self, obj):
        """Set default way."""
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)


def parse_setup_json_v0(fn_json):
    """Parse raw json generated from 520 setup.bin.

    Necessary info per io node (same for all platform),
    see `verify_ioinfo()`.

    NOTE:
      - we assume only 1 input for 520 models.

    Related to flow_utils/get_ioinfo_from_knerex_json
    """
    with open(fn_json, "r") as f:
        raw = json.load(f)

    ioinfo = {}

    def get_in(h):
        v1 = {}
        # NOTE: for 520, the given dimension is always 1CHW
        # There will be no onnx shape in setup.bin.
        # example, [1, 10] will be [1, 10, 1, 1]
        v1["name"] = "0"
        v1["shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
        v1["onnx_shape"] = [1, h["input_channel"], h["input_row"], h["input_col"]]
        v1["bitw"] = 8  # only support 8bit
        # 520 only support per layer
        v1["radix"] = [h["input_radix"] for i in range(h["input_channel"])]
        v1["scale"] = [1.0 for i in range(h["input_channel"])]
        v1["ch_dim"] = 1
        v1["data_format"] = "RGBA_8BIT"  # just guess. to keep same format
        return v1

    def get_out(i, h):
        d = {}
        # no name saved in 520 setup.bin / nef. so we use index only
        d["name"] = str(i)
        d["shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
        d["onnx_shape"] = [1, h["ch_length"], h["row_length"], h["col_length"]]
        d["bitw"] = 8  # only support 8bit
        # NOTE: 520 radix/scale are same for all channels
        d["radix"] = [h["output_radix"] for i in range(h["ch_length"])]
        per_channel_scales = futils.intle2flt(h["output_scale"])
        d["scale"] = [per_channel_scales for i in range(h["ch_length"])]
        d["ch_dim"] = 1
        d["data_format"], _ = parse_data_format(520, h["data_format"])
        return d

    # input. assume only one.
    # sometime the json have headers or header
    if "headers" in raw:
        ioinfo["input"] = [get_in(a) for a in raw["headers"]]
    else:
        assert "header" in raw, "Extracted 520 setup.bin.json have no header nor headers."
        ioinfo["input"] = [get_in(raw["header"])]
    # output. maybe multiple.
    ioinfo["output"] = [get_out(i, d) for i, d in enumerate(raw["outputs"])]

    return ioinfo


def parse_data_format(hw_mode, fmt):
    """Convert fmt number to real format.

    The raw ioinfo from compiler use int to represent hardware data format.
    The data-converter require the input of format in "string".
    This function will take definition from compiler and hard-code here.
    Update if compiler changed.

    Ref: `ticket #17762`_
    """
    if hw_mode == 520:
        """refer to compiler/lib/target/mozart/basic/hw_define.h
        not using this info now.
        """
        d = {
            -1: ("UNKNOWN", 8),
            8: ("16W1C8B", 8),
            0: ("8W1C16B", 16),
            9: ("BY_COL_8BIT", 8),
            1: ("BY_COL_16BIT", 16),
            10: ("BY_CHNL_8BIT", 8),
            2: ("BY_CHNL_16BIT", 16),
            15: ("CUSTOMIZE", 8),
            16: ("RGBA_8BIT", 8),
            17: ("RGBA_16BIT", 16),
            18: ("SEQ_32BIT", 32),
            100: ("RAW8", 8),
            101: ("RAW16", 16),
            102: ("RAW_FLOAT", 32),
        }

        return d[int(fmt)]
    elif hw_mode == 720:
        """refer to compiler/lib/target/beethoven/basic/hw_define.h"""
        ref = {
            -1: ("UNKNOWN", 8),
            0: ("1W16C8B", 8),
            1: ("1W16C8B_INTLV", 8),
            2: ("1W16C8BHL", 16),
            3: ("1W16C8BHL_INTLV", 16),
            4: ("4W4C8B", 8),
            5: ("16W1C8B", 8),
            6: ("8W1C16B", 16),
            7: ("PS_8W1C16B", 16),
            8: ("PS_1W8C16B", 16),
            9: ("PS_1W4C32B", 32),
            11: ("PS_2W4C16B", 16),
            12: ("PS_4W1C32B", 32),
            13: ("PS_1W16C16B", 16),
            14: ("PS_1W8C32B", 32),
            15: ("PS_1W16C32B", 32),
            16: ("PS_4W2C16B", 16),
            17: ("PS_2W4C32B", 32),
            18: ("PS_2W2C32B", 32),
            100: ("RAW8", 8),
            101: ("RAW16", 16),
            102: ("RAW_FLOAT", 32),
        }
        return ref[int(fmt)]
    elif hw_mode in [530, 730, 540, 630]:
        """
        6730/540/630 refer to compiler/lib/target/wagner/basic/hw_define.h

        530 see refer to compiler/lib/target/bach/basic/hw_define.h
        but seems same for now
        """
        ref = {
            -1: ("UNKNOWN", 8),
            0: ("1W16C8B", 8),
            1: ("1W16C8BHL", 15),
            2: ("4W4C8B", 8),
            3: ("4W4C8BHL", 15),
            4: ("16W1C8B", 8),
            5: ("16W1C8BHL", 15),
            6: ("8W1C16B", 16),
            7: ("PS_1W16C24B", 24),
            100: ("RAW8", 8),
            102: ("RAW16", 16),
            103: ("RAW_FLOAT", 32),
        }
        return ref[int(fmt)]


def parse_setup_json_v1(fn_json):
    """Parse raw json generated from setup.bin (v2 flatbuffer, 530/630/720).

    Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.

    Related to flow_utils/get_ioinfo_from_knerex_json
    """
    with open(fn_json, "r") as f:
        raw = json.load(f)

    ioinfo = {}

    def get_platform(j):
        return int(j["header"]["target"].strip("KL"))

    platform = get_platform(raw)

    def get_in(h):
        v1 = {}
        v1["name"] = h["name"]
        # from Jay
        # setup.bin 的 Tensor 定義：
        # raw_shape 是 onnx shape
        # shape 是 hw shape
        # ioinfo.json 的定義：
        # onnx_shape 是 onnx_shape
        # shape 是 hw shape
        v1["shape"] = np.array(h["shape"])
        v1["onnx_shape"] = np.array(h["raw_shape"])
        # TODO: is this true? always second?
        v1["ch_dim"] = 1

        v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])

        # for per channel radix/scale
        n_ch = v1["onnx_shape"][v1["ch_dim"]]
        for k in ["radix", "scale"]:
            t = [a[k] for a in h["quantization"]["fxp_info"]]
            if len(t) == n_ch:  # per channel given
                v1[k] = np.array(t)
            else:  # per layer given. need expand
                assert (
                    len(t) == 1
                ), f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
                v1[k] = np.array([t[0] for i in range(n_ch)])
        return v1

    # input.  maybe multiple
    ioinfo["input"] = [get_in(d) for d in raw["inputs"]]
    # output. maybe multiple.
    ioinfo["output"] = [get_in(d) for d in raw["outputs"]]

    return ioinfo


def parse_setup_json_v2(fn_json):
    """Parse raw json generated from kne (540/730).

    Necessary info per io node (same for all platform), please refer to `parse_setup_json_v0()`.

    Ref: `ticket #17762`_

    Related to flow_utils/get_ioinfo_from_knerex_json
    """
    with open(fn_json, "r") as f:
        raw = json.load(f)

    ioinfo = {}

    def get_platform(j):
        return int(j["header"]["target"].strip("KL"))

    platform = get_platform(raw)

    def parse_ch_dim(lst):
        """Input lst should be a list of 4 elements: [b, c, h, w]."""
        if lst[0] == -1:
            # when list is [-1. -1, -1, -1]
            return 1
        else:
            # there should be no -1 in the list
            assert lst[1] != -1
            return lst[1]

    def get_in(h):
        v1 = {}
        v1["name"] = h["name"]
        v1["ndim"] = h["ndim"]
        v1["shape"] = np.array(h["shape"])
        # need to combine shape and inv_shape_intrp_dim to get real onnx_shape.
        # see #18456
        v1["onnx_shape"] = np.array([v1["shape"][a] for a in h["inv_shape_intrp_dim"]])
        v1["ch_dim"] = parse_ch_dim(h["shape_intrp_dim"])

        v1["data_format"], v1["bitw"] = parse_data_format(platform, h["format"])

        # for per channel radix
        n_ch = v1["shape"][1]
        k = "radix"
        t = h["quantization"][k]
        if len(t) == n_ch:  # per channel given
            v1[k] = np.array(t)
        else:  # per layer given. need expand
            assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
            v1[k] = np.array([t[0] for i in range(n_ch)])

        # scale
        k = "scale"
        scale_le = h["quantization"]["scale"]
        scale_le_n = h["quantization"]["scale_count"]
        t = futils.array_le2flt(scale_le, scale_le_n)
        if len(t) == n_ch:  # per channel given
            v1[k] = np.array(t)
        else:  # per layer given. need expand
            assert len(t) == 1, f"channel {n_ch} but got {k} for {len(t)} channels: {t}"
            v1[k] = np.array([t[0] for i in range(n_ch)])

        return v1

    ioinfo["input"] = [get_in(d) for d in raw["models"][0]["header"]["inputs"]]
    ioinfo["output"] = [get_in(d) for d in raw["models"][0]["header"]["outputs"]]

    return ioinfo


@lru_cache(maxsize=128)
def locate_compiler_dump(p_out, hw_mode, parse_nef=False):
    """Locate important files in compiler dump folder.

    Each platform has it's own required files to run csim.
    Some names may change, e.g., test.conf/apb.npu,
    but they serve same purpose.

    This function is to find correponding file and return
    organized as dict, so that each call will always get correct file
    independant of hw_mode.
    """
    p_out = pathlib.Path(p_out)
    if hw_mode in [520]:
        patterns = {
            "setup_bin": "*setup.bin",
            "command_bin": "*command.bin",
            "weight_bin": "*weight.bin",
            "apb_npu": "*test.conf",  # diff
        }
        if parse_nef:
            # HACK
            # unpack_nefs will genearte ioinfo.json for 520
            patterns["ioinfo_json"] = "*ioinfo.json"  # diff
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v1"]:
        patterns = {
            "setup_bin": "*setup.bin",
            "command_bin": "*command.bin",
            "weight_bin": "*weight.bin",
            "ioinfo_json": "*ioinfo.json",
            "apb_npu": "*apb.npu",
        }
        if hw_mode in [720]:  # diff for 520/720
            patterns["apb_npu"] = "*test.conf"
    elif hw_mode in fconsts.MODE_HW_LIMIT["nef_v2"]:
        patterns = {
            "kne": f"*models_{hw_mode}*.kne",
            "ioinfo_json": "*ioinfo.json",
        }
    else:
        raise NotImplementedError

    fn_map = {}
    for k, v in patterns.items():
        ps = list(p_out.glob(v))
        assert len(ps) >= 1, f"Looking for {k}, expect 1, but found {len(ps)}: {ps}"
        fn_map[k] = ps[0]

    return fn_map


def load_ioinfo_json(fn_ioinfo):
    """Load compiler generated ioinfo.json then apply special process.

    Convert `shape`/`onnx_shape`/`radix`/`scale` to numpy array for
    better process later.

    NOTE:
      No ioinfo.json for 520.
    """
    with open(fn_ioinfo, "r") as f:
        ioinfo = json.load(f)

    for io in ["input", "output"]:
        for a in ioinfo[io]:
            a["name"] = futils.clean_name(a["name"])
            for k in ["onnx_shape", "scale", "radix", "shape"]:
                a[k] = np.array(a[k])
    return ioinfo


def collect_fps_improve(dir_out):
    """Load the fps improved from ip_evaluator reports.

    The reults will be compiled with other analysis and put in the final report.

    Args:
      `dir_out`: the output folder of compiler.
    """
    p_f = pathlib.Path(dir_out) / "summary_image_cut_search.txt"
    if not p_f.exists():
        return None

    with open(p_f, "r") as f:
        lines = f.readlines()
    for line in lines:
        if line.startswith("fps improve: "):
            return line.strip().replace("fps improve: ", "")
    return None


def get_cpu_node_op_type(dir_out):
    cpu_node_list_str = set()

    setup_dir = "{}/{}".format(dir_out, "setup.txt")

    def extract_cpu_op_type(txt):
        s = re.compile('"(op_type|opcode_index)": *"(.*?)"')
        return s.findall(txt)[0][1]

    try:
        with open(setup_dir, "r") as f:
            lines = f.readlines()
            for line in lines:
                # new setup.txt(opcode_index)
                # old setup.txt(op_type)
                if "op_type" in line or "opcode_index" in line:
                    cpu_node_str = extract_cpu_op_type(str(line))
                    if cpu_node_str == "CpuFusion":
                        continue
                    cpu_node_list_str.add(cpu_node_str)
        if len(cpu_node_list_str) == 0:
            return "None"
        else:
            return ",".join(cpu_node_list_str)
    except:
        # print("No setup.txt found.")
        return "N/A"


def collect_command_weight_size(dir_out):
    cmd_size = None
    weight_size = None
    stats_dir = "{}/{}".format(dir_out, "dbg.stat.json")
    try:
        with open(stats_dir, "r") as f:
            stats = json.load(f)
        cmd_size = int(stats["general"]["cmd_size"] / (10**3))
        weight_size = int(stats["general"]["wt_size"] / (10**6))
    except:
        pass
    return cmd_size, weight_size


def find_cpu_nodes(lines):
    nodes = []
    found = False
    for line in lines:
        if line.startswith("***** Warning: CPU ops types"):
            found = True
            continue
        if found:
            clean = line.strip().strip(",")
            if len(clean) > 4:
                nodes.append(clean)
            else:
                found = False
    if len(nodes) > 0:
        return "//".join(nodes)
    else:
        return "N/A"


def collect_FPS(dir_out, hw_mode):
    """Collect FPS info from compiler output folder.

    WARNING:
      - Tiefang will make report same for ALL platforms.
      - will all be named as `ProfileResult.txt`
    """
    profile_dir = f"{dir_out}/ProfileResult.txt"

    d_profile = OrderedDict()

    def search_by_prefix(lines, k):
        for line in lines:
            if line.startswith(k):
                return line.lstrip(k).strip()
        return None

    def gb2mb(line):
        return float(line.strip("GB"))*1000

    def convert2int(s):
        if s == "inf" or s is None:
            return None
        return int(float(s))
    try:
        with open(profile_dir, "r") as f:
            lines = f.readlines()

            # load fps
            if hw_mode == 520:
                d_profile["fps"] = search_by_prefix(lines, "output_fps =")
                d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
                d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
                d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
                d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
                d_profile["cpu_node"] = find_cpu_nodes(lines)
                # d_profile[f"RV(mb)"] = search_by_prefix(lines, "output_total_data_move_in_amount =")
                # d_profile[f"WV(mb)"] = search_by_prefix(lines, "output_total_data_move_out_amount =")
            else:
                d_profile["fps"] = search_by_prefix(lines, "output_fps =")
                d_profile["ITC(ms)"] = search_by_prefix(lines, "output_total_time =")
                d_profile["C(GOPs)"] = search_by_prefix(lines, "output_total_theory_mac =")
                d_profile["RDMA bandwidth GB/s"] = search_by_prefix(lines, "RDMA_bandwidth_GBPs =")
                d_profile["WDMA bandwidth GB/s"] = search_by_prefix(lines, "WDMA_bandwidth_GBPs =")
                d_profile["GETW bandwidth GB/s"] = search_by_prefix(lines, "GETW_bandwidth_GBPs =")
                d_profile["RV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_RDMA_amount ="))
                d_profile["WV(mb)"] = gb2mb(search_by_prefix(lines, "output_total_WDMA_amount ="))
                d_profile["cpu_node"] = find_cpu_nodes(lines)

    except:
        # print("No {} found.".format(profile_dir))
        pass

    # filter None items
    d_prof = OrderedDict()
    for k, v in d_profile.items():
        if v:  # not None
            d_prof[k] = v
    return d_prof