kneron_model_converter/ktc/toolchain.py

# -*- coding: utf-8 -*-

# This file mainly contains the class and the functions of running knerex and compiler.
from typing import Dict, List
import glob
import subprocess
import shutil
import json
import os
import onnx
import logging

from .utils import (
    get_toolchain_version,
    RESULT_FOLDER,
    LIBS_V1_FOLDER,
    LIBS_V2_FOLDER,
    SCRIPT_FOLDER,
    clean_up,
    evalutor_result_html_parse,
    check_filename_validity
)


from sys_flow.run import gen_fx_model as gen_fx_model_v1
from sys_flow_v2.run import gen_fx_model as gen_fx_model_v2
from sys_flow_v2.run import gen_opt_model_v2

if onnx.__version__ == "1.7.0":
    SUPPORTED_PLATFORMS = ["520", "720", "530", "630"]
else:
    SUPPORTED_PLATFORMS = ["520", "720", "530", "630", "730"]


class ModelConfig:
    """Kneron model configuration."""

    def __init__(
        self,
        id: int,
        version: str,
        platform: str,
        onnx_model=None,
        onnx_path=None,
        bie_path=None,
        radix_json_path=None,
        compiler_config_path=None,
        input_fmt = None,
        debug=False,
    ):
        """Create an Kneron model config object. One of these three parameters is required: onnx_model, onnx_path, bie_path.

        Args:
            id (int): model ID
            version (str): version number which should be a four digit hex, e.g. "0a2f"
            platform (str): hardware platform, should be "520" or "720"
            onnx_model (ModelProto, optional): loaded onnx model. Defaults to None.
            onnx_path (str, optional): onnx file path. Defaults to None.
            bie_path (str, optional): bie file path. Defaults to None.
            radix_json_path (str, optional): radix json path. Defaults to None.
            compiler_config_path (str, optional): compiler config json path. Defaults to None.
            input_fmt (str or Dict, optional): input format. If None, it will be automatically decided. If str, all inputs will be set to the same format. If Dict, each input can have its own format. Defaults to None. Only available for kdp730.
                e,g. input_fmt="4W4C8B" or input_fmt={"input1": "4W4C8B", "input2": "16W1C8B"}.
                available formats: <id: (name, bitwidth)>
                    0: ("1W16C8B_CH_COMPACT", 8),
                    1: ("1W16C8BHL_CH_COMPACT", 16),
                    2: ("4W4C8B", 8),
                    3: ("4W4C8BHL", 16),
                    4: ("16W1C8B", 8),
                    5: ("16W1C8BHL", 16),
                    6: ("8W1C16B", 16),
                    7: ("PS_1W16C24B", 24),
                    8: ("1W16C8B", 8),
                    9: ("1W16C8BHL", 16),
                    10: ("HW4C8B_KEEP_A", 8),  # inproc
                    11: ("HW4C8B_DROP_A", 8),  # inproc
                    12: ("HW1C8B", 8),         # inproc
                    13: ("HW1C16B_LE", 16),    # inproc
                    14: ("HW1C16B_BE", 16),    # inproc
                    100: ("RAW8", 8),
                    102: ("RAW16", 16),
                    103: ("RAW_FLOAT", 32),
            debug (bool, optional): debug mode. Defaults to False.
        """
        if type(id) is not int:
            self.id = int(id)
        else:
            self.id = id
        self.version = version
        if type(platform) is not str:
            self.platform = str(platform)
        else:
            self.platform = platform
        self.onnx_path = onnx_path
        self.bie_path = bie_path
        self.radix_json_path = radix_json_path
        self.compiler_config_path = compiler_config_path
        self.debug = debug
        self.input_fmt = input_fmt
        if onnx_model is not None:
            self.onnx_path = os.path.join(RESULT_FOLDER, "input.onnx")
            if os.path.isfile(RESULT_FOLDER):
                logging.error(
                    f"Folder {RESULT_FOLDER} cannot be created. File with same name exists."
                )
                raise ValueError(
                    f"Folder {RESULT_FOLDER} cannot be created. File with same name exists."
                )
            elif not os.path.isdir(RESULT_FOLDER):
                os.makedirs(RESULT_FOLDER)
            onnx.save(onnx_model, self.onnx_path)
        if self.onnx_path is None and self.bie_path is None:
            logging.error(
                "These three parameter cannot be None at the same time: onnx_model, onnx_path, bie_path."
            )
        # Check if it is relative path.
        if self.onnx_path is not None and self.onnx_path[0] != "/":
            self.onnx_path = os.path.abspath(self.onnx_path)
            check_filename_validity(self.onnx_path)
        if self.bie_path is not None and self.bie_path[0] != "/":
            self.bie_path = os.path.abspath(self.bie_path)
            check_filename_validity(self.bie_path)
        # Check platform
        if self.platform not in SUPPORTED_PLATFORMS:
            logging.error(f"Platform {self.platform} is not supported in the current environment.")
            raise ValueError(f"Platform {self.platform} is not supported in the current environment.")

    def analysis(
        self,
        input_mapping: Dict,
        output_dir: str = "/data1/kneron_flow",
        threads: int = 4,
        quantize_mode: str = "default",
        datapath_range_method: str = "percentage",
        percentage: float = 0.999,
        percentage_16b: float = 0.999999,
        percentile: float = 0.001,
        outlier_factor: float = 1.0,
        datapath_bitwidth_mode="int8",
        weight_bitwidth_mode="int8",
        model_in_bitwidth_mode="int8",
        model_out_bitwidth_mode="int8",
        cpu_node_bitwidth_mode="int8",
        flops_ratio: float = 0.2,
        compiler_tiling="default",
        mode: int = 1,
        optimize: int = 0,
        lut_high_accuracy_mode="0",
        quan_config=None,
    ) -> str:
        """Fix point analysis for the model. If the object is initialized with an onnx. This step is required before compile.

        Args:
            input_mapping (Dict): Dictionary of mapping input data to a specific input. Input data should be a list of numpy array.
            output_dir (str, optional): path to the output directory. Defaults to /data1/kneron_flow.
            threads (int, optional): multithread setting. Defaults to 4.
            quantize_mode (str, optional): quantize_mode setting. Currently support default and post_sigmoid. Defaults to "default".
            datapath_range_method (str, optional): could be 'mmse' or 'percentage. mmse: use snr-based-range method. percentage: use arbitary percentage. Default to 'percentage'.
            percentage (float, optional): used to determine the range of 8-bit data under 'percentage' mode. Suggest to set value between 0.999 and 1.0. Use 1.0 for detection models. Defaults to 0.999.
            percentage_16b (float, optional): used to determine the range of 16-bit data under 'percentage' mode. Suggest to set value between 0.999 and 1.0. Use 1.0 for detection models. Defaults to 0.999999. percentage_16b >= percentage.
            percentile (float, optional): used under 'mmse' mode. The range to search. The larger the value, the larger the search range, the better the performance but the longer the simulation time. Defaults to 0.001,
            outlier_factor (float, optional): used under 'mmse' mode. The factor applied on outliers. For example, if clamping data is sensitive to your model, set outlier_factor to 2 or higher. Higher outlier_factor will reduce outlier removal by increasing range. Defaults to 1.0.
            datapath_bitwidth_mode: choose from "int8"/"int16"/"mix balance"/"mix light"/"mixbw". ("int16" is not supported in kdp520. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8. "mixbw" automatically selects the best bitwidth mode based but takes longer time.)
            weight_bitwidth_mode: choose from "int8"/"int16"/"int4"/"mix balance"/"mix light"/"mixbw". ("int16" is not supported in kdp520. "int4" is not supported in kdp720. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8. "mixbw" automatically selects the best bitwidth mode based but takes longer time.)
            model_in_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            model_out_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            cpu_node_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            flops_ratio(float, optional):  the floating-point computation relative to a fully INT16-quantized model, normalized to 1. (default as 0.2)
            compiler_tiling (str, optional): `fm_cut` option before. could be "default" or "deep_search". Deep search mode optimizes the performance but takes longer. Defaults to "default".
            mode (int, optional): running mode for the analysis.
                0: run ip_evaluator only.
                1: run knerex (for quantization) only.
                2: run knerex + dynasty + compiler + csim + bit-true-match check. dynasty will inference only 1 image and only check quantization accuracy of output layers.
                3: run knerex + dynasty + compiler + csim + bit-true-match check. dynasty will inference all images and dump results of all layers. It will provide most detailed analysis but will take much longer time.
                Defaults to 1.
            optimize (int, optional): level of optimization. 0-2, the larger number, the better model performance, but takes longer. Defaults to 0.
        Returns:
            str: path to the output bie file
        """
        # Check input params
        if self.onnx_path is None:
            logging.error("onnx model is required before fix point analysis")
        if self.bie_path is not None:
            logging.warning("bie file is provided. It will be overwritten.")

        if percentage < 0.9 or percentage > 1.0:
            logging.error("percentage should be between 0.9 and 1.0")
            return output_dir
        if percentage_16b < 0.9 or percentage_16b > 1.0:
            logging.error("percentage_16b should be between 0.9 and 1.0")
            return output_dir
        if percentage_16b < percentage:
            logging.error("percentage_16b should be larger than or equal to percentage")
            return output_dir

        if output_dir is None:
            output_dir = "/data1/kneron_flow"
        if datapath_bitwidth_mode == 'mixbw' and weight_bitwidth_mode not in ['mixbw', 'int16']:
            logging.error("mixbw mode is only supported when datapath_bitwidth_mode and weight_bitwidth_mode are both mixbw or int16.")
            return output_dir
        if weight_bitwidth_mode == 'mixbw' and datapath_bitwidth_mode not in ['mixbw', 'int16']:
            logging.error("mixbw mode is only supported when datapath_bitwidth_mode and weight_bitwidth_mode are both mixbw or int16.")
            return output_dir

        # Run knerex
        if self.platform == "730" and (datapath_bitwidth_mode == "mixbw" or weight_bitwidth_mode == "mixbw"):
            if datapath_bitwidth_mode == "mixbw" and weight_bitwidth_mode == "mixbw":
                mixbw_mode = "both"
            elif datapath_bitwidth_mode == "mixbw":
                mixbw_mode = "data"
            else:
                mixbw_mode = "weight"
            ret = gen_opt_model_v2(
                self.onnx_path,
                input_mapping,
                data_analysis_threads=threads,
                weight_bitwidth_mode='int16',
                mixbw_mode=mixbw_mode,
                flops_ratio=flops_ratio,
                p_output=output_dir,
                clean_cache=True)
        elif self.platform == "730":
            ret = gen_fx_model_v2(
                self.onnx_path,
                input_mapping,
                int(self.platform),
                datapath_range_method=datapath_range_method,
                data_analysis_pct=percentage,
                data_analysis_16b_pct=percentage_16b,
                data_analysis_threads=threads,
                datapath_bitwidth_mode=datapath_bitwidth_mode,
                weight_bitwidth_mode=weight_bitwidth_mode,
                model_in_bitwidth_mode=model_in_bitwidth_mode,
                model_out_bitwidth_mode=model_out_bitwidth_mode,
                cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
                percentile=percentile,
                outlier_factor=outlier_factor,
                quantize_mode=quantize_mode,
                quan_config=quan_config,
                p_output=output_dir,
                compiler_tiling=compiler_tiling,
                mode=mode,
                optimize=f"o{optimize}",
                lut_high_accuracy_mode=lut_high_accuracy_mode,
            )
        else:
            ret = gen_fx_model_v1(
                self.onnx_path,
                input_mapping,
                int(self.platform),
                datapath_range_method=datapath_range_method,
                data_analysis_pct=percentage,
                data_analysis_16b_pct=percentage_16b,
                data_analysis_threads=threads,
                datapath_bitwidth_mode=datapath_bitwidth_mode,
                weight_bitwidth_mode=weight_bitwidth_mode,
                model_in_bitwidth_mode=model_in_bitwidth_mode,
                model_out_bitwidth_mode=model_out_bitwidth_mode,
                cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
                percentile=percentile,
                outlier_factor=outlier_factor,
                quantize_mode=quantize_mode,
                p_output=output_dir,
                compiler_tiling=compiler_tiling,
                mode=mode,
                optimize=f"o{optimize}",
            )
        # Check outputs
        if len(ret) == 0:
            logging.error("Fixed-point analysis failed.")
            return output_dir
        if mode == 0:
            logging.warning(
                "Anaysis is running under mode 0, which calls evaluator only. The bie file is not generated. Return folder path instead."
            )
            return output_dir
        files = glob.glob(
            os.path.join(
                output_dir,
                f"{os.path.basename(self.onnx_path).split('.')[0]}*kdp{self.platform}*.bie",
            )
        )
        if len(files) == 0:
            logging.error("Cannot find bie file.")
            return output_dir
        elif len(files) > 1:
            logging.warning("Multiple bie files are found. Use the first one.")
        self.bie_path = files[0]

        # Do clean up
        if not self.debug:
            files_to_remove = [f"models_{self.platform}.nef"]
            clean_up(output_dir, files_to_remove)
        return self.bie_path

    def evaluate(
        self,
        output_dir: str = "/data1/kneron_flow",
        datapath_bitwidth_mode="int8",
        weight_bitwidth_mode="int8",
        model_in_bitwidth_mode="int8",
        model_out_bitwidth_mode="int8",
        cpu_node_bitwidth_mode="int8",
        weight_bandwidth=None,
        dma_bandwidth=None,
        compiler_tiling="default",
    ) -> str:
        """Run IP evaluator.

        Args:
            output_dir (str, optional): path to the output directory. Defaults to /data1/kneron_flow.
            datapath_bitwidth_mode: choose from "int8"/"int16"/"mix balance"/"mix light". ("int16" is not supported in kdp520. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8.)
            weight_bitwidth_mode: choose from "int8"/"int16"/"int4"/"mix balance"/"mix light". ("int16" is not supported in kdp520. "int4" is not supported in kdp720. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8.)
            model_in_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            model_out_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            cpu_node_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
            weight_bandwidth: weight bandwidth in gbps. Defaults to None to use the default value for the specific hardware.
            dma_bandwidth: dma bandwidth in gbps. Defaults to None to use the default value for the specific hardware.
            compiler_tiling (str, optional): `fm_cut` option before. could be "default" or "deep_search". Deep search mode optimizes the performance but takes longer. Defaults to "default".

        Returns:
            str: report of the IP evaluator.
        """
        # Check the input file path
        if self.bie_path is not None:
            input_model_path = self.bie_path
        elif self.onnx_path is not None:
            input_model_path = self.onnx_path
        else:
            logging.error(
                "Onnx model path or bie model path is required for ip evaluator."
            )
            return "[ERROR] Cannot find input file."
        if output_dir is None:
            output_dir = "/data1/kneron_flow"
        # Run ip evaluator
        if self.platform == "730":
            gen_fx_model = gen_fx_model_v2
        else:
            gen_fx_model = gen_fx_model_v1
        ret = gen_fx_model(
            input_model_path,
            None,
            int(self.platform),
            p_output=output_dir,
            mode=0,
            datapath_bitwidth_mode=datapath_bitwidth_mode,
            weight_bitwidth_mode=weight_bitwidth_mode,
            model_in_bitwidth_mode=model_in_bitwidth_mode,
            model_out_bitwidth_mode=model_out_bitwidth_mode,
            cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
            weight_bandwidth=weight_bandwidth,
            dma_bandwidth=dma_bandwidth,
            compiler_tiling=compiler_tiling,
        )
        # Check outputs
        if len(ret) == 0:
            logging.error("Fixed-point analysis failed.")
            return output_dir
        # Get the result
        with open(output_dir + "/model_fx_report.html", "r") as f:
            result = f.read()
        return evalutor_result_html_parse(result)


# Compiler helper functions. Since compiler might involve multiple models, we seperate it into ModelConfig.
def gen_config_for_single_model(platform: str, model: str, id: str, hw_cut_opt=False, template: str = None, input_fmt=None):
    additional_config = {}
    # hw_cut_opt: hardware cut optimization based on json config.
    if hw_cut_opt and platform != "520":
        additional_config["ip_evaluator_cfg"] = '"/workspace/scripts/res/ip_config_' + platform + '.json"'
    elif hw_cut_opt:
        logging.error(
            f"Platform {platform} does not support hardware cut optimization yet."
        )
        exit(1)
    # input_fmt: input format for kdp730. If None, it will be automatically decided.
    if input_fmt is not None and platform == "730":
        if type(input_fmt) is str:
            additional_config["input_fmt"] = input_fmt
        elif type(input_fmt) is dict:
            additional_config["input_fmt"] = input_fmt
        else:
            logging.error("input_fmt should be str or dict.")
            exit(1)
    # select libs folder based on platform
    if platform == "730":
        LIBS_FOLDER = LIBS_V2_FOLDER
    else:
        LIBS_FOLDER = LIBS_V1_FOLDER
    os.environ["OPT_COMPILE_DIR"] = LIBS_FOLDER + "/compiler/opt_compile"
    additional_json = json.dumps(additional_config)
    commands = [
        "python",
        LIBS_FOLDER + "/compiler/gen_config.py",
        "-t",
        platform,
        "-v",
        "model_rel",
        "-a",
        additional_json,
        "-o",
        f"/tmp/{id}.json",
    ]
    # print(commands)
    if hw_cut_opt:
        commands.append("-m")
        commands.append(model)
    if template is not None:
        commands.append("-f")
        commands.append(template)
    subprocess.run(commands)
    return f"/tmp/{id}.json"


def generate_batch_conf(
    encryption_config: Dict,
    platform: str,
    output_path: str,
    weight_compress=False,
    flatbuffer=True,
):
    additional_config = {}
    if encryption_config["whether_encryption"]:
        additional_config["encryption_flag"] = True
        if (
            "encryption mode" not in encryption_config
            or encryption_config["encryption mode"] == 1
        ):
            additional_config["encryption_key"] = encryption_config["encryption_key"]
            additional_config["encryption_file"] = encryption_config["encryption_file"]
            if "encryption_efuse_key" in encryption_config:
                additional_config["encryption_efuse_key"] = encryption_config[
                    "encryption_efuse_key"
                ]
        elif encryption_config["encryption mode"] == 2:
            additional_config["encryption_efuse_key"] = encryption_config[
                "encryption_efuse_key"
            ]
        else:
            raise ValueError("encryption mode can only be 1 or 2.")
    else:
        additional_config["encryption_flag"] = False
    additional_config["weight_compress"] = weight_compress
    if not flatbuffer:
        additional_config["gen_setup_fbs"] = False

    additional_json = json.dumps(additional_config)

    if platform not in SUPPORTED_PLATFORMS:
        raise ValueError("Invalid version for batch compiler: " + platform)

    # Generate config file using compiler gen_config.py script
    if platform == "730":
        LIBS_FOLDER = LIBS_V2_FOLDER
    else:
        LIBS_FOLDER = LIBS_V1_FOLDER
    subprocess.run(
        [
            "python",
            LIBS_FOLDER + "/compiler/gen_config.py",
            "-t",
            platform,
            "-v",
            "model_rel",
            "-o",
            output_path,
            "-a",
            additional_json,
        ]
    )


def generate_batch_bconfig(
    models, batch_conf_path: str, output_path: str, hw_cut_opt=False
):
    model_list = []
    for batch_model in models:
        if batch_model.bie_path is None and batch_model.radix_json_path is None:
            logging.error("Analysis is required before compile.")
        if batch_model.bie_path is not None:
            model_dict = {
                "id": batch_model.id,
                "version": batch_model.version,
                "path": batch_model.bie_path,
            }
        else:
            model_dict = {
                "id": batch_model.id,
                "version": batch_model.version,
                "path": batch_model.onnx_path,
                "radix_json": batch_model.radix_json_path,
            }
        if batch_model.compiler_config_path is not None:
            model_dict["compile_cfg"] = batch_model.compiler_config_path
        elif hw_cut_opt or batch_model.input_fmt is not None:
            model_dict["compile_cfg"] = gen_config_for_single_model(
                batch_model.platform,
                batch_model.bie_path,
                str(batch_model.id),
                hw_cut_opt=hw_cut_opt,
                template=batch_conf_path,
                input_fmt=batch_model.input_fmt,
            )
        model_list.append(model_dict)
    bconf_dict = {"compile_cfg": batch_conf_path, "models": model_list}

    fp = open(output_path, "w")
    json.dump(bconf_dict, fp)
    fp.close()


def compile(
    model_list: List[ModelConfig],
    output_dir: str = "/data1/kneron_flow",
    dedicated_output_buffer: bool = True,
    weight_compress: bool = False,
    hardware_cut_opt: bool = False,
    flatbuffer: bool = True,
    debug: bool = False,
) -> str:
    """Compile the models and generate the nef file

    Args:
        model_list (List[ModelConfig]): a list of models need to be compile. Models with onnx should run analysis() before compilation.
        output_dir (str, optional): output directory. Defaults to "/data1/kneron_flow".
        dedicated_output_buffer (bool, optional): dedicated output buffer. Defaults to True.
        weight_compress (bool, optional): compress weight to slightly reduce the binary file size. Defaults to False.
        hardware_cut_opt (bool, optional): optimize the hardware memory usage while processing large inputs. This option might cause the compiling time increase. Currently, only available for 720. Defaults to False.
        flatbuffer (bool, optional): enable new flatbuffer mode for 720. Defaults to False.

    Returns:
        str: path to the nef file
    """
    return encrypt_compile(
        model_list,
        output_dir,
        dedicated_output_buffer,
        mode=None,
        weight_compress=weight_compress,
        hardware_cut_opt=hardware_cut_opt,
        flatbuffer=flatbuffer,
        debug=debug,
    )


def encrypt_compile(
    model_list: List[ModelConfig],
    output_dir: str = "/data1/kneron_flow",
    dedicated_output_buffer: bool = True,
    mode: int = None,
    key: str = "",
    key_file: str = "",
    encryption_efuse_key: str = "",
    weight_compress: bool = False,
    hardware_cut_opt=False,
    flatbuffer=True,
    debug=False,
) -> str:
    """Compile the models, generate an encrypted nef file.

    Args:
        model_list (List[ModelConfig]): a list of models need to be compile. Models with onnx should run analysis() before compilation.
        output_dir (str, optional): output directory. Defaults to "/data1/kneron_flow".
        dedicated_output_buffer (bool, optional): dedicated output buffer. Defaults to True.
        mode (int, optional): There are two modes: 1, 2. Defaults to None, which is no encryption.
        key (str, optional): a hex code. Required in mode 1 Defaults to "".
        key_file (str, optional): key file path. Required in mode 1. Defaults to "".
        encryption_efuse_key (str, optional): a hex code. Required in mode 2 and optional in mode 1. Defaults to "".
        weight_compress (bool, optional): compress weight to slightly reduce the binary file size. Defaults to False.
        hardware_cut_opt (bool, optional): optimize the hardware memory usage while processing large inputs. This option might cause the compiling time increase. This option is ignored if a model has specified its compiler config json. Currently, only available for 720. Defaults to False.
        flatbuffer (bool, optional): enable new flatbuffer mode for 720. Defaults to True.

    Returns:
        str: path to the nef file
    """
    # Check model platform
    platform = model_list[0].platform
    version_str = ""
    id_str = ""
    for model in model_list:
        if model.platform != platform:
            logging.error("Batch compile models should belongs to the same platform.")
            return "[ERROR] Batch compile models should belongs to the same platform."
        version_str += model.version + ","
        id_str += str(model.id) + ","
    if str(platform) not in SUPPORTED_PLATFORMS:
        logging.error(f"{platform} batch compiler is not ready yet.")
        return f"[ERROR] {platform} batch compiler is not ready yet."
    # Check encryption
    if mode == 1:
        enc_config = {
            "whether_encryption": True,
            "encryption mode": 1,
            "encryption_key": key,
            "encryption_file": key_file,
        }
        if len(encryption_efuse_key) > 0:
            enc_config["encryption_efuse_key"] = encryption_efuse_key
    elif mode == 2:
        enc_config = {
            "whether_encryption": True,
            "encryption mode": 2,
            "encryption_efuse_key": encryption_efuse_key,
        }
    else:
        enc_config = {"whether_encryption": False}
    # Check output dir
    if output_dir is None:
        output_dir = "/data1/kneron_flow"
    if not os.path.exists(output_dir):
        os.mkdir(output_dir)
    # Before batch compiler. Backup ip evaluator result
    profile_result = None
    if os.path.isfile(output_dir + "/ProfileResult.txt"):
        with open(output_dir + "/ProfileResult.txt", "r") as f:
            profile_result = f.read()

    # Generate config and run
    if platform == "520":
        generate_batch_conf(
            enc_config,
            "520",
            output_dir + "/batch_compile_config.json",
            weight_compress=weight_compress,
        )
        if hardware_cut_opt:
            logging.warning(
                "KDP520 currently does not support hardware cut optimization. Skipped."
            )
        generate_batch_bconfig(
            model_list,
            output_dir + "/batch_compile_config.json",
            output_dir + "/batch_compile_bconfig.json",
        )
        os.chdir(output_dir)
        commands = [
            LIBS_V1_FOLDER + "/compiler/batch_compile",
            "-T",
            "520",
            output_dir + "/batch_compile_bconfig.json",
            "-t",
            get_toolchain_version(),
        ]
        if dedicated_output_buffer:
            commands.append("-o")
        subprocess.run(commands, check=True)
    else:
        generate_batch_conf(
            enc_config,
            platform,
            output_dir + "/batch_compile_config.json",
            weight_compress=weight_compress,
            flatbuffer=flatbuffer,
        )
        generate_batch_bconfig(
            model_list,
            output_dir + "/batch_compile_config.json",
            output_dir + "/batch_compile_bconfig.json",
            hw_cut_opt=hardware_cut_opt,
        )
        os.chdir(output_dir)
        if platform == "730":
            LIBS_FOLDER = LIBS_V2_FOLDER
        else:
            LIBS_FOLDER = LIBS_V1_FOLDER
        commands = [
            LIBS_FOLDER + "/compiler/batch_compile",
            "-T",
            platform,
            output_dir + "/batch_compile_bconfig.json",
            "-t",
            get_toolchain_version(),
        ]
        if dedicated_output_buffer:
            commands.append("-o")
        subprocess.run(commands, check=True)
        if platform == "730":
            # Use kne to generate nef file
            subprocess.run(
                [
                    LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
                    "-G",
                    "--kne",
                    output_dir + "/models_730.kne",
                    "-t",
                    '730',
                    "-O",
                    output_dir,
                ],
                check=True,
            )
            # Update model info
            subprocess.run(
                [
                    LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
                    "-U",
                    output_dir + "/models_730.nef",
                    "--model_info_version",
                    version_str[:-1],
                    "--model_info_name",
                    id_str[:-1],
                    "--replace_original"
                ],
                check=True,
            )

    # Restore profile result.
    if (
        not os.path.isfile(output_dir + "/ProfileResult.txt")
        and profile_result is not None
    ):
        with open(output_dir + "/ProfileResult.txt", "w") as f:
            f.write(profile_result)
    # Clean up files
    if not debug:
        files_to_remove = [
            "models_730.kne",
            "all_models.bin",
            "batch_compile_bconfig.json",
            "batch_compile_config.json",
            "fw_info.bin",
        ]
        dirs_to_remove = []
        for model in model_list:
            files_to_remove.append(f"model_{model.id}_command.bin")
            files_to_remove.append(f"model_{model.id}_setup.bin")
            files_to_remove.append(f"model_{model.id}_weight.bin")
            if model.bie_path is not None:
                folder_path = model.bie_path.split(".")[0] + "_modelid_" + str(model.id)
                dirs_to_remove.append(folder_path)
                if os.path.isfile(folder_path + "/ioinfo.json"):
                    shutil.copyfile(
                        folder_path + "/ioinfo.json", folder_path + "_ioinfo.json"
                    )
            elif model.onnx_path is not None:
                folder_path = (
                    model.onnx_path.split(".")[0] + "_modelid_" + str(model.id)
                )
                dirs_to_remove.append(folder_path)
                if os.path.isfile(folder_path + "/ioinfo.json"):
                    shutil.copyfile(
                        folder_path + "/ioinfo.json", folder_path + "_ioinfo.json"
                    )
        clean_up(output_dir, files_to_remove, dirs_to_remove)
    return f"{output_dir}/models_{platform}.nef"


def combine_nef(
    nef_list: List[str], output_path: str = "/data1/combined", platform: int = 720
) -> str:
    """Merge multiple nef files into one.

    Args:
        nef_list (List[str]): a list of nef file paths.
        output_path (str, optional): output folder name. Defaults to /data1/combined. The nef path would be /data1/combined/models_<target>.nef.

    Returns:
        str: output folder name. Should be same as the `output_path` argument.
    """
    if len(nef_list) == 0:
        logging.error("nef_list should not be empty")
        return ""
    nef_list_str = ""
    for nef_path in nef_list:
        nef_list_str += nef_path + " "
    nef_list_str = nef_list_str[:-1]
    subprocess.run(
        [
            LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
            "-c",
            nef_list_str,
            "-O",
            output_path,
        ],
        check=True,
    )
    return output_path