779 lines
33 KiB
Python
779 lines
33 KiB
Python
# -*- coding: utf-8 -*-
|
|
|
|
# This file mainly contains the class and the functions of running knerex and compiler.
|
|
from typing import Dict, List
|
|
import glob
|
|
import subprocess
|
|
import shutil
|
|
import json
|
|
import os
|
|
import onnx
|
|
import logging
|
|
|
|
from .utils import (
|
|
get_toolchain_version,
|
|
RESULT_FOLDER,
|
|
LIBS_V1_FOLDER,
|
|
LIBS_V2_FOLDER,
|
|
SCRIPT_FOLDER,
|
|
clean_up,
|
|
evalutor_result_html_parse,
|
|
check_filename_validity
|
|
)
|
|
|
|
|
|
from sys_flow.run import gen_fx_model as gen_fx_model_v1
|
|
from sys_flow_v2.run import gen_fx_model as gen_fx_model_v2
|
|
from sys_flow_v2.run import gen_opt_model_v2
|
|
|
|
if onnx.__version__ == "1.7.0":
|
|
SUPPORTED_PLATFORMS = ["520", "720", "530", "630"]
|
|
else:
|
|
SUPPORTED_PLATFORMS = ["520", "720", "530", "630", "730"]
|
|
|
|
|
|
class ModelConfig:
|
|
"""Kneron model configuration."""
|
|
|
|
def __init__(
|
|
self,
|
|
id: int,
|
|
version: str,
|
|
platform: str,
|
|
onnx_model=None,
|
|
onnx_path=None,
|
|
bie_path=None,
|
|
radix_json_path=None,
|
|
compiler_config_path=None,
|
|
input_fmt = None,
|
|
debug=False,
|
|
):
|
|
"""Create an Kneron model config object. One of these three parameters is required: onnx_model, onnx_path, bie_path.
|
|
|
|
Args:
|
|
id (int): model ID
|
|
version (str): version number which should be a four digit hex, e.g. "0a2f"
|
|
platform (str): hardware platform, should be "520" or "720"
|
|
onnx_model (ModelProto, optional): loaded onnx model. Defaults to None.
|
|
onnx_path (str, optional): onnx file path. Defaults to None.
|
|
bie_path (str, optional): bie file path. Defaults to None.
|
|
radix_json_path (str, optional): radix json path. Defaults to None.
|
|
compiler_config_path (str, optional): compiler config json path. Defaults to None.
|
|
input_fmt (str or Dict, optional): input format. If None, it will be automatically decided. If str, all inputs will be set to the same format. If Dict, each input can have its own format. Defaults to None. Only available for kdp730.
|
|
e,g. input_fmt="4W4C8B" or input_fmt={"input1": "4W4C8B", "input2": "16W1C8B"}.
|
|
available formats: <id: (name, bitwidth)>
|
|
0: ("1W16C8B_CH_COMPACT", 8),
|
|
1: ("1W16C8BHL_CH_COMPACT", 16),
|
|
2: ("4W4C8B", 8),
|
|
3: ("4W4C8BHL", 16),
|
|
4: ("16W1C8B", 8),
|
|
5: ("16W1C8BHL", 16),
|
|
6: ("8W1C16B", 16),
|
|
7: ("PS_1W16C24B", 24),
|
|
8: ("1W16C8B", 8),
|
|
9: ("1W16C8BHL", 16),
|
|
10: ("HW4C8B_KEEP_A", 8), # inproc
|
|
11: ("HW4C8B_DROP_A", 8), # inproc
|
|
12: ("HW1C8B", 8), # inproc
|
|
13: ("HW1C16B_LE", 16), # inproc
|
|
14: ("HW1C16B_BE", 16), # inproc
|
|
100: ("RAW8", 8),
|
|
102: ("RAW16", 16),
|
|
103: ("RAW_FLOAT", 32),
|
|
debug (bool, optional): debug mode. Defaults to False.
|
|
"""
|
|
if type(id) is not int:
|
|
self.id = int(id)
|
|
else:
|
|
self.id = id
|
|
self.version = version
|
|
if type(platform) is not str:
|
|
self.platform = str(platform)
|
|
else:
|
|
self.platform = platform
|
|
self.onnx_path = onnx_path
|
|
self.bie_path = bie_path
|
|
self.radix_json_path = radix_json_path
|
|
self.compiler_config_path = compiler_config_path
|
|
self.debug = debug
|
|
self.input_fmt = input_fmt
|
|
if onnx_model is not None:
|
|
self.onnx_path = os.path.join(RESULT_FOLDER, "input.onnx")
|
|
if os.path.isfile(RESULT_FOLDER):
|
|
logging.error(
|
|
f"Folder {RESULT_FOLDER} cannot be created. File with same name exists."
|
|
)
|
|
raise ValueError(
|
|
f"Folder {RESULT_FOLDER} cannot be created. File with same name exists."
|
|
)
|
|
elif not os.path.isdir(RESULT_FOLDER):
|
|
os.makedirs(RESULT_FOLDER)
|
|
onnx.save(onnx_model, self.onnx_path)
|
|
if self.onnx_path is None and self.bie_path is None:
|
|
logging.error(
|
|
"These three parameter cannot be None at the same time: onnx_model, onnx_path, bie_path."
|
|
)
|
|
# Check if it is relative path.
|
|
if self.onnx_path is not None and self.onnx_path[0] != "/":
|
|
self.onnx_path = os.path.abspath(self.onnx_path)
|
|
check_filename_validity(self.onnx_path)
|
|
if self.bie_path is not None and self.bie_path[0] != "/":
|
|
self.bie_path = os.path.abspath(self.bie_path)
|
|
check_filename_validity(self.bie_path)
|
|
# Check platform
|
|
if self.platform not in SUPPORTED_PLATFORMS:
|
|
logging.error(f"Platform {self.platform} is not supported in the current environment.")
|
|
raise ValueError(f"Platform {self.platform} is not supported in the current environment.")
|
|
|
|
def analysis(
|
|
self,
|
|
input_mapping: Dict,
|
|
output_dir: str = "/data1/kneron_flow",
|
|
threads: int = 4,
|
|
quantize_mode: str = "default",
|
|
datapath_range_method: str = "percentage",
|
|
percentage: float = 0.999,
|
|
percentage_16b: float = 0.999999,
|
|
percentile: float = 0.001,
|
|
outlier_factor: float = 1.0,
|
|
datapath_bitwidth_mode="int8",
|
|
weight_bitwidth_mode="int8",
|
|
model_in_bitwidth_mode="int8",
|
|
model_out_bitwidth_mode="int8",
|
|
cpu_node_bitwidth_mode="int8",
|
|
flops_ratio: float = 0.2,
|
|
compiler_tiling="default",
|
|
mode: int = 1,
|
|
optimize: int = 0,
|
|
lut_high_accuracy_mode="0",
|
|
quan_config=None,
|
|
) -> str:
|
|
"""Fix point analysis for the model. If the object is initialized with an onnx. This step is required before compile.
|
|
|
|
Args:
|
|
input_mapping (Dict): Dictionary of mapping input data to a specific input. Input data should be a list of numpy array.
|
|
output_dir (str, optional): path to the output directory. Defaults to /data1/kneron_flow.
|
|
threads (int, optional): multithread setting. Defaults to 4.
|
|
quantize_mode (str, optional): quantize_mode setting. Currently support default and post_sigmoid. Defaults to "default".
|
|
datapath_range_method (str, optional): could be 'mmse' or 'percentage. mmse: use snr-based-range method. percentage: use arbitary percentage. Default to 'percentage'.
|
|
percentage (float, optional): used to determine the range of 8-bit data under 'percentage' mode. Suggest to set value between 0.999 and 1.0. Use 1.0 for detection models. Defaults to 0.999.
|
|
percentage_16b (float, optional): used to determine the range of 16-bit data under 'percentage' mode. Suggest to set value between 0.999 and 1.0. Use 1.0 for detection models. Defaults to 0.999999. percentage_16b >= percentage.
|
|
percentile (float, optional): used under 'mmse' mode. The range to search. The larger the value, the larger the search range, the better the performance but the longer the simulation time. Defaults to 0.001,
|
|
outlier_factor (float, optional): used under 'mmse' mode. The factor applied on outliers. For example, if clamping data is sensitive to your model, set outlier_factor to 2 or higher. Higher outlier_factor will reduce outlier removal by increasing range. Defaults to 1.0.
|
|
datapath_bitwidth_mode: choose from "int8"/"int16"/"mix balance"/"mix light"/"mixbw". ("int16" is not supported in kdp520. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8. "mixbw" automatically selects the best bitwidth mode based but takes longer time.)
|
|
weight_bitwidth_mode: choose from "int8"/"int16"/"int4"/"mix balance"/"mix light"/"mixbw". ("int16" is not supported in kdp520. "int4" is not supported in kdp720. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8. "mixbw" automatically selects the best bitwidth mode based but takes longer time.)
|
|
model_in_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
model_out_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
cpu_node_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
flops_ratio(float, optional): the floating-point computation relative to a fully INT16-quantized model, normalized to 1. (default as 0.2)
|
|
compiler_tiling (str, optional): `fm_cut` option before. could be "default" or "deep_search". Deep search mode optimizes the performance but takes longer. Defaults to "default".
|
|
mode (int, optional): running mode for the analysis.
|
|
0: run ip_evaluator only.
|
|
1: run knerex (for quantization) only.
|
|
2: run knerex + dynasty + compiler + csim + bit-true-match check. dynasty will inference only 1 image and only check quantization accuracy of output layers.
|
|
3: run knerex + dynasty + compiler + csim + bit-true-match check. dynasty will inference all images and dump results of all layers. It will provide most detailed analysis but will take much longer time.
|
|
Defaults to 1.
|
|
optimize (int, optional): level of optimization. 0-2, the larger number, the better model performance, but takes longer. Defaults to 0.
|
|
Returns:
|
|
str: path to the output bie file
|
|
"""
|
|
# Check input params
|
|
if self.onnx_path is None:
|
|
logging.error("onnx model is required before fix point analysis")
|
|
if self.bie_path is not None:
|
|
logging.warning("bie file is provided. It will be overwritten.")
|
|
|
|
if percentage < 0.9 or percentage > 1.0:
|
|
logging.error("percentage should be between 0.9 and 1.0")
|
|
return output_dir
|
|
if percentage_16b < 0.9 or percentage_16b > 1.0:
|
|
logging.error("percentage_16b should be between 0.9 and 1.0")
|
|
return output_dir
|
|
if percentage_16b < percentage:
|
|
logging.error("percentage_16b should be larger than or equal to percentage")
|
|
return output_dir
|
|
|
|
if output_dir is None:
|
|
output_dir = "/data1/kneron_flow"
|
|
if datapath_bitwidth_mode == 'mixbw' and weight_bitwidth_mode not in ['mixbw', 'int16']:
|
|
logging.error("mixbw mode is only supported when datapath_bitwidth_mode and weight_bitwidth_mode are both mixbw or int16.")
|
|
return output_dir
|
|
if weight_bitwidth_mode == 'mixbw' and datapath_bitwidth_mode not in ['mixbw', 'int16']:
|
|
logging.error("mixbw mode is only supported when datapath_bitwidth_mode and weight_bitwidth_mode are both mixbw or int16.")
|
|
return output_dir
|
|
|
|
# Run knerex
|
|
if self.platform == "730" and (datapath_bitwidth_mode == "mixbw" or weight_bitwidth_mode == "mixbw"):
|
|
if datapath_bitwidth_mode == "mixbw" and weight_bitwidth_mode == "mixbw":
|
|
mixbw_mode = "both"
|
|
elif datapath_bitwidth_mode == "mixbw":
|
|
mixbw_mode = "data"
|
|
else:
|
|
mixbw_mode = "weight"
|
|
ret = gen_opt_model_v2(
|
|
self.onnx_path,
|
|
input_mapping,
|
|
data_analysis_threads=threads,
|
|
weight_bitwidth_mode='int16',
|
|
mixbw_mode=mixbw_mode,
|
|
flops_ratio=flops_ratio,
|
|
p_output=output_dir,
|
|
clean_cache=True)
|
|
elif self.platform == "730":
|
|
ret = gen_fx_model_v2(
|
|
self.onnx_path,
|
|
input_mapping,
|
|
int(self.platform),
|
|
datapath_range_method=datapath_range_method,
|
|
data_analysis_pct=percentage,
|
|
data_analysis_16b_pct=percentage_16b,
|
|
data_analysis_threads=threads,
|
|
datapath_bitwidth_mode=datapath_bitwidth_mode,
|
|
weight_bitwidth_mode=weight_bitwidth_mode,
|
|
model_in_bitwidth_mode=model_in_bitwidth_mode,
|
|
model_out_bitwidth_mode=model_out_bitwidth_mode,
|
|
cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
|
|
percentile=percentile,
|
|
outlier_factor=outlier_factor,
|
|
quantize_mode=quantize_mode,
|
|
quan_config=quan_config,
|
|
p_output=output_dir,
|
|
compiler_tiling=compiler_tiling,
|
|
mode=mode,
|
|
optimize=f"o{optimize}",
|
|
lut_high_accuracy_mode=lut_high_accuracy_mode,
|
|
)
|
|
else:
|
|
ret = gen_fx_model_v1(
|
|
self.onnx_path,
|
|
input_mapping,
|
|
int(self.platform),
|
|
datapath_range_method=datapath_range_method,
|
|
data_analysis_pct=percentage,
|
|
data_analysis_16b_pct=percentage_16b,
|
|
data_analysis_threads=threads,
|
|
datapath_bitwidth_mode=datapath_bitwidth_mode,
|
|
weight_bitwidth_mode=weight_bitwidth_mode,
|
|
model_in_bitwidth_mode=model_in_bitwidth_mode,
|
|
model_out_bitwidth_mode=model_out_bitwidth_mode,
|
|
cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
|
|
percentile=percentile,
|
|
outlier_factor=outlier_factor,
|
|
quantize_mode=quantize_mode,
|
|
p_output=output_dir,
|
|
compiler_tiling=compiler_tiling,
|
|
mode=mode,
|
|
optimize=f"o{optimize}",
|
|
)
|
|
# Check outputs
|
|
if len(ret) == 0:
|
|
logging.error("Fixed-point analysis failed.")
|
|
return output_dir
|
|
if mode == 0:
|
|
logging.warning(
|
|
"Anaysis is running under mode 0, which calls evaluator only. The bie file is not generated. Return folder path instead."
|
|
)
|
|
return output_dir
|
|
files = glob.glob(
|
|
os.path.join(
|
|
output_dir,
|
|
f"{os.path.basename(self.onnx_path).split('.')[0]}*kdp{self.platform}*.bie",
|
|
)
|
|
)
|
|
if len(files) == 0:
|
|
logging.error("Cannot find bie file.")
|
|
return output_dir
|
|
elif len(files) > 1:
|
|
logging.warning("Multiple bie files are found. Use the first one.")
|
|
self.bie_path = files[0]
|
|
|
|
# Do clean up
|
|
if not self.debug:
|
|
files_to_remove = [f"models_{self.platform}.nef"]
|
|
clean_up(output_dir, files_to_remove)
|
|
return self.bie_path
|
|
|
|
def evaluate(
|
|
self,
|
|
output_dir: str = "/data1/kneron_flow",
|
|
datapath_bitwidth_mode="int8",
|
|
weight_bitwidth_mode="int8",
|
|
model_in_bitwidth_mode="int8",
|
|
model_out_bitwidth_mode="int8",
|
|
cpu_node_bitwidth_mode="int8",
|
|
weight_bandwidth=None,
|
|
dma_bandwidth=None,
|
|
compiler_tiling="default",
|
|
) -> str:
|
|
"""Run IP evaluator.
|
|
|
|
Args:
|
|
output_dir (str, optional): path to the output directory. Defaults to /data1/kneron_flow.
|
|
datapath_bitwidth_mode: choose from "int8"/"int16"/"mix balance"/"mix light". ("int16" is not supported in kdp520. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8.)
|
|
weight_bitwidth_mode: choose from "int8"/"int16"/"int4"/"mix balance"/"mix light". ("int16" is not supported in kdp520. "int4" is not supported in kdp720. "mix balance" and "mix light" are combines of int8 and int16 mode. "mix balance" prefers int16 while "mix light" prefers int8.)
|
|
model_in_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
model_out_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
cpu_node_bitwidth_mode: choose from "int8"/"int16". ("int16" is not supported in kdp520.)
|
|
weight_bandwidth: weight bandwidth in gbps. Defaults to None to use the default value for the specific hardware.
|
|
dma_bandwidth: dma bandwidth in gbps. Defaults to None to use the default value for the specific hardware.
|
|
compiler_tiling (str, optional): `fm_cut` option before. could be "default" or "deep_search". Deep search mode optimizes the performance but takes longer. Defaults to "default".
|
|
|
|
Returns:
|
|
str: report of the IP evaluator.
|
|
"""
|
|
# Check the input file path
|
|
if self.bie_path is not None:
|
|
input_model_path = self.bie_path
|
|
elif self.onnx_path is not None:
|
|
input_model_path = self.onnx_path
|
|
else:
|
|
logging.error(
|
|
"Onnx model path or bie model path is required for ip evaluator."
|
|
)
|
|
return "[ERROR] Cannot find input file."
|
|
if output_dir is None:
|
|
output_dir = "/data1/kneron_flow"
|
|
# Run ip evaluator
|
|
if self.platform == "730":
|
|
gen_fx_model = gen_fx_model_v2
|
|
else:
|
|
gen_fx_model = gen_fx_model_v1
|
|
ret = gen_fx_model(
|
|
input_model_path,
|
|
None,
|
|
int(self.platform),
|
|
p_output=output_dir,
|
|
mode=0,
|
|
datapath_bitwidth_mode=datapath_bitwidth_mode,
|
|
weight_bitwidth_mode=weight_bitwidth_mode,
|
|
model_in_bitwidth_mode=model_in_bitwidth_mode,
|
|
model_out_bitwidth_mode=model_out_bitwidth_mode,
|
|
cpu_node_bitwidth_mode=cpu_node_bitwidth_mode,
|
|
weight_bandwidth=weight_bandwidth,
|
|
dma_bandwidth=dma_bandwidth,
|
|
compiler_tiling=compiler_tiling,
|
|
)
|
|
# Check outputs
|
|
if len(ret) == 0:
|
|
logging.error("Fixed-point analysis failed.")
|
|
return output_dir
|
|
# Get the result
|
|
with open(output_dir + "/model_fx_report.html", "r") as f:
|
|
result = f.read()
|
|
return evalutor_result_html_parse(result)
|
|
|
|
|
|
# Compiler helper functions. Since compiler might involve multiple models, we seperate it into ModelConfig.
|
|
def gen_config_for_single_model(platform: str, model: str, id: str, hw_cut_opt=False, template: str = None, input_fmt=None):
|
|
additional_config = {}
|
|
# hw_cut_opt: hardware cut optimization based on json config.
|
|
if hw_cut_opt and platform != "520":
|
|
additional_config["ip_evaluator_cfg"] = '"/workspace/scripts/res/ip_config_' + platform + '.json"'
|
|
elif hw_cut_opt:
|
|
logging.error(
|
|
f"Platform {platform} does not support hardware cut optimization yet."
|
|
)
|
|
exit(1)
|
|
# input_fmt: input format for kdp730. If None, it will be automatically decided.
|
|
if input_fmt is not None and platform == "730":
|
|
if type(input_fmt) is str:
|
|
additional_config["input_fmt"] = input_fmt
|
|
elif type(input_fmt) is dict:
|
|
additional_config["input_fmt"] = input_fmt
|
|
else:
|
|
logging.error("input_fmt should be str or dict.")
|
|
exit(1)
|
|
# select libs folder based on platform
|
|
if platform == "730":
|
|
LIBS_FOLDER = LIBS_V2_FOLDER
|
|
else:
|
|
LIBS_FOLDER = LIBS_V1_FOLDER
|
|
os.environ["OPT_COMPILE_DIR"] = LIBS_FOLDER + "/compiler/opt_compile"
|
|
additional_json = json.dumps(additional_config)
|
|
commands = [
|
|
"python",
|
|
LIBS_FOLDER + "/compiler/gen_config.py",
|
|
"-t",
|
|
platform,
|
|
"-v",
|
|
"model_rel",
|
|
"-a",
|
|
additional_json,
|
|
"-o",
|
|
f"/tmp/{id}.json",
|
|
]
|
|
# print(commands)
|
|
if hw_cut_opt:
|
|
commands.append("-m")
|
|
commands.append(model)
|
|
if template is not None:
|
|
commands.append("-f")
|
|
commands.append(template)
|
|
subprocess.run(commands)
|
|
return f"/tmp/{id}.json"
|
|
|
|
|
|
def generate_batch_conf(
|
|
encryption_config: Dict,
|
|
platform: str,
|
|
output_path: str,
|
|
weight_compress=False,
|
|
flatbuffer=True,
|
|
):
|
|
additional_config = {}
|
|
if encryption_config["whether_encryption"]:
|
|
additional_config["encryption_flag"] = True
|
|
if (
|
|
"encryption mode" not in encryption_config
|
|
or encryption_config["encryption mode"] == 1
|
|
):
|
|
additional_config["encryption_key"] = encryption_config["encryption_key"]
|
|
additional_config["encryption_file"] = encryption_config["encryption_file"]
|
|
if "encryption_efuse_key" in encryption_config:
|
|
additional_config["encryption_efuse_key"] = encryption_config[
|
|
"encryption_efuse_key"
|
|
]
|
|
elif encryption_config["encryption mode"] == 2:
|
|
additional_config["encryption_efuse_key"] = encryption_config[
|
|
"encryption_efuse_key"
|
|
]
|
|
else:
|
|
raise ValueError("encryption mode can only be 1 or 2.")
|
|
else:
|
|
additional_config["encryption_flag"] = False
|
|
additional_config["weight_compress"] = weight_compress
|
|
if not flatbuffer:
|
|
additional_config["gen_setup_fbs"] = False
|
|
|
|
additional_json = json.dumps(additional_config)
|
|
|
|
if platform not in SUPPORTED_PLATFORMS:
|
|
raise ValueError("Invalid version for batch compiler: " + platform)
|
|
|
|
# Generate config file using compiler gen_config.py script
|
|
if platform == "730":
|
|
LIBS_FOLDER = LIBS_V2_FOLDER
|
|
else:
|
|
LIBS_FOLDER = LIBS_V1_FOLDER
|
|
subprocess.run(
|
|
[
|
|
"python",
|
|
LIBS_FOLDER + "/compiler/gen_config.py",
|
|
"-t",
|
|
platform,
|
|
"-v",
|
|
"model_rel",
|
|
"-o",
|
|
output_path,
|
|
"-a",
|
|
additional_json,
|
|
]
|
|
)
|
|
|
|
|
|
def generate_batch_bconfig(
|
|
models, batch_conf_path: str, output_path: str, hw_cut_opt=False
|
|
):
|
|
model_list = []
|
|
for batch_model in models:
|
|
if batch_model.bie_path is None and batch_model.radix_json_path is None:
|
|
logging.error("Analysis is required before compile.")
|
|
if batch_model.bie_path is not None:
|
|
model_dict = {
|
|
"id": batch_model.id,
|
|
"version": batch_model.version,
|
|
"path": batch_model.bie_path,
|
|
}
|
|
else:
|
|
model_dict = {
|
|
"id": batch_model.id,
|
|
"version": batch_model.version,
|
|
"path": batch_model.onnx_path,
|
|
"radix_json": batch_model.radix_json_path,
|
|
}
|
|
if batch_model.compiler_config_path is not None:
|
|
model_dict["compile_cfg"] = batch_model.compiler_config_path
|
|
elif hw_cut_opt or batch_model.input_fmt is not None:
|
|
model_dict["compile_cfg"] = gen_config_for_single_model(
|
|
batch_model.platform,
|
|
batch_model.bie_path,
|
|
str(batch_model.id),
|
|
hw_cut_opt=hw_cut_opt,
|
|
template=batch_conf_path,
|
|
input_fmt=batch_model.input_fmt,
|
|
)
|
|
model_list.append(model_dict)
|
|
bconf_dict = {"compile_cfg": batch_conf_path, "models": model_list}
|
|
|
|
fp = open(output_path, "w")
|
|
json.dump(bconf_dict, fp)
|
|
fp.close()
|
|
|
|
|
|
def compile(
|
|
model_list: List[ModelConfig],
|
|
output_dir: str = "/data1/kneron_flow",
|
|
dedicated_output_buffer: bool = True,
|
|
weight_compress: bool = False,
|
|
hardware_cut_opt: bool = False,
|
|
flatbuffer: bool = True,
|
|
debug: bool = False,
|
|
) -> str:
|
|
"""Compile the models and generate the nef file
|
|
|
|
Args:
|
|
model_list (List[ModelConfig]): a list of models need to be compile. Models with onnx should run analysis() before compilation.
|
|
output_dir (str, optional): output directory. Defaults to "/data1/kneron_flow".
|
|
dedicated_output_buffer (bool, optional): dedicated output buffer. Defaults to True.
|
|
weight_compress (bool, optional): compress weight to slightly reduce the binary file size. Defaults to False.
|
|
hardware_cut_opt (bool, optional): optimize the hardware memory usage while processing large inputs. This option might cause the compiling time increase. Currently, only available for 720. Defaults to False.
|
|
flatbuffer (bool, optional): enable new flatbuffer mode for 720. Defaults to False.
|
|
|
|
Returns:
|
|
str: path to the nef file
|
|
"""
|
|
return encrypt_compile(
|
|
model_list,
|
|
output_dir,
|
|
dedicated_output_buffer,
|
|
mode=None,
|
|
weight_compress=weight_compress,
|
|
hardware_cut_opt=hardware_cut_opt,
|
|
flatbuffer=flatbuffer,
|
|
debug=debug,
|
|
)
|
|
|
|
|
|
def encrypt_compile(
|
|
model_list: List[ModelConfig],
|
|
output_dir: str = "/data1/kneron_flow",
|
|
dedicated_output_buffer: bool = True,
|
|
mode: int = None,
|
|
key: str = "",
|
|
key_file: str = "",
|
|
encryption_efuse_key: str = "",
|
|
weight_compress: bool = False,
|
|
hardware_cut_opt=False,
|
|
flatbuffer=True,
|
|
debug=False,
|
|
) -> str:
|
|
"""Compile the models, generate an encrypted nef file.
|
|
|
|
Args:
|
|
model_list (List[ModelConfig]): a list of models need to be compile. Models with onnx should run analysis() before compilation.
|
|
output_dir (str, optional): output directory. Defaults to "/data1/kneron_flow".
|
|
dedicated_output_buffer (bool, optional): dedicated output buffer. Defaults to True.
|
|
mode (int, optional): There are two modes: 1, 2. Defaults to None, which is no encryption.
|
|
key (str, optional): a hex code. Required in mode 1 Defaults to "".
|
|
key_file (str, optional): key file path. Required in mode 1. Defaults to "".
|
|
encryption_efuse_key (str, optional): a hex code. Required in mode 2 and optional in mode 1. Defaults to "".
|
|
weight_compress (bool, optional): compress weight to slightly reduce the binary file size. Defaults to False.
|
|
hardware_cut_opt (bool, optional): optimize the hardware memory usage while processing large inputs. This option might cause the compiling time increase. This option is ignored if a model has specified its compiler config json. Currently, only available for 720. Defaults to False.
|
|
flatbuffer (bool, optional): enable new flatbuffer mode for 720. Defaults to True.
|
|
|
|
Returns:
|
|
str: path to the nef file
|
|
"""
|
|
# Check model platform
|
|
platform = model_list[0].platform
|
|
version_str = ""
|
|
id_str = ""
|
|
for model in model_list:
|
|
if model.platform != platform:
|
|
logging.error("Batch compile models should belongs to the same platform.")
|
|
return "[ERROR] Batch compile models should belongs to the same platform."
|
|
version_str += model.version + ","
|
|
id_str += str(model.id) + ","
|
|
if str(platform) not in SUPPORTED_PLATFORMS:
|
|
logging.error(f"{platform} batch compiler is not ready yet.")
|
|
return f"[ERROR] {platform} batch compiler is not ready yet."
|
|
# Check encryption
|
|
if mode == 1:
|
|
enc_config = {
|
|
"whether_encryption": True,
|
|
"encryption mode": 1,
|
|
"encryption_key": key,
|
|
"encryption_file": key_file,
|
|
}
|
|
if len(encryption_efuse_key) > 0:
|
|
enc_config["encryption_efuse_key"] = encryption_efuse_key
|
|
elif mode == 2:
|
|
enc_config = {
|
|
"whether_encryption": True,
|
|
"encryption mode": 2,
|
|
"encryption_efuse_key": encryption_efuse_key,
|
|
}
|
|
else:
|
|
enc_config = {"whether_encryption": False}
|
|
# Check output dir
|
|
if output_dir is None:
|
|
output_dir = "/data1/kneron_flow"
|
|
if not os.path.exists(output_dir):
|
|
os.mkdir(output_dir)
|
|
# Before batch compiler. Backup ip evaluator result
|
|
profile_result = None
|
|
if os.path.isfile(output_dir + "/ProfileResult.txt"):
|
|
with open(output_dir + "/ProfileResult.txt", "r") as f:
|
|
profile_result = f.read()
|
|
|
|
# Generate config and run
|
|
if platform == "520":
|
|
generate_batch_conf(
|
|
enc_config,
|
|
"520",
|
|
output_dir + "/batch_compile_config.json",
|
|
weight_compress=weight_compress,
|
|
)
|
|
if hardware_cut_opt:
|
|
logging.warning(
|
|
"KDP520 currently does not support hardware cut optimization. Skipped."
|
|
)
|
|
generate_batch_bconfig(
|
|
model_list,
|
|
output_dir + "/batch_compile_config.json",
|
|
output_dir + "/batch_compile_bconfig.json",
|
|
)
|
|
os.chdir(output_dir)
|
|
commands = [
|
|
LIBS_V1_FOLDER + "/compiler/batch_compile",
|
|
"-T",
|
|
"520",
|
|
output_dir + "/batch_compile_bconfig.json",
|
|
"-t",
|
|
get_toolchain_version(),
|
|
]
|
|
if dedicated_output_buffer:
|
|
commands.append("-o")
|
|
subprocess.run(commands, check=True)
|
|
else:
|
|
generate_batch_conf(
|
|
enc_config,
|
|
platform,
|
|
output_dir + "/batch_compile_config.json",
|
|
weight_compress=weight_compress,
|
|
flatbuffer=flatbuffer,
|
|
)
|
|
generate_batch_bconfig(
|
|
model_list,
|
|
output_dir + "/batch_compile_config.json",
|
|
output_dir + "/batch_compile_bconfig.json",
|
|
hw_cut_opt=hardware_cut_opt,
|
|
)
|
|
os.chdir(output_dir)
|
|
if platform == "730":
|
|
LIBS_FOLDER = LIBS_V2_FOLDER
|
|
else:
|
|
LIBS_FOLDER = LIBS_V1_FOLDER
|
|
commands = [
|
|
LIBS_FOLDER + "/compiler/batch_compile",
|
|
"-T",
|
|
platform,
|
|
output_dir + "/batch_compile_bconfig.json",
|
|
"-t",
|
|
get_toolchain_version(),
|
|
]
|
|
if dedicated_output_buffer:
|
|
commands.append("-o")
|
|
subprocess.run(commands, check=True)
|
|
if platform == "730":
|
|
# Use kne to generate nef file
|
|
subprocess.run(
|
|
[
|
|
LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
|
|
"-G",
|
|
"--kne",
|
|
output_dir + "/models_730.kne",
|
|
"-t",
|
|
'730',
|
|
"-O",
|
|
output_dir,
|
|
],
|
|
check=True,
|
|
)
|
|
# Update model info
|
|
subprocess.run(
|
|
[
|
|
LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
|
|
"-U",
|
|
output_dir + "/models_730.nef",
|
|
"--model_info_version",
|
|
version_str[:-1],
|
|
"--model_info_name",
|
|
id_str[:-1],
|
|
"--replace_original"
|
|
],
|
|
check=True,
|
|
)
|
|
|
|
# Restore profile result.
|
|
if (
|
|
not os.path.isfile(output_dir + "/ProfileResult.txt")
|
|
and profile_result is not None
|
|
):
|
|
with open(output_dir + "/ProfileResult.txt", "w") as f:
|
|
f.write(profile_result)
|
|
# Clean up files
|
|
if not debug:
|
|
files_to_remove = [
|
|
"models_730.kne",
|
|
"all_models.bin",
|
|
"batch_compile_bconfig.json",
|
|
"batch_compile_config.json",
|
|
"fw_info.bin",
|
|
]
|
|
dirs_to_remove = []
|
|
for model in model_list:
|
|
files_to_remove.append(f"model_{model.id}_command.bin")
|
|
files_to_remove.append(f"model_{model.id}_setup.bin")
|
|
files_to_remove.append(f"model_{model.id}_weight.bin")
|
|
if model.bie_path is not None:
|
|
folder_path = model.bie_path.split(".")[0] + "_modelid_" + str(model.id)
|
|
dirs_to_remove.append(folder_path)
|
|
if os.path.isfile(folder_path + "/ioinfo.json"):
|
|
shutil.copyfile(
|
|
folder_path + "/ioinfo.json", folder_path + "_ioinfo.json"
|
|
)
|
|
elif model.onnx_path is not None:
|
|
folder_path = (
|
|
model.onnx_path.split(".")[0] + "_modelid_" + str(model.id)
|
|
)
|
|
dirs_to_remove.append(folder_path)
|
|
if os.path.isfile(folder_path + "/ioinfo.json"):
|
|
shutil.copyfile(
|
|
folder_path + "/ioinfo.json", folder_path + "_ioinfo.json"
|
|
)
|
|
clean_up(output_dir, files_to_remove, dirs_to_remove)
|
|
return f"{output_dir}/models_{platform}.nef"
|
|
|
|
|
|
def combine_nef(
|
|
nef_list: List[str], output_path: str = "/data1/combined", platform: int = 720
|
|
) -> str:
|
|
"""Merge multiple nef files into one.
|
|
|
|
Args:
|
|
nef_list (List[str]): a list of nef file paths.
|
|
output_path (str, optional): output folder name. Defaults to /data1/combined. The nef path would be /data1/combined/models_<target>.nef.
|
|
|
|
Returns:
|
|
str: output folder name. Should be same as the `output_path` argument.
|
|
"""
|
|
if len(nef_list) == 0:
|
|
logging.error("nef_list should not be empty")
|
|
return ""
|
|
nef_list_str = ""
|
|
for nef_path in nef_list:
|
|
nef_list_str += nef_path + " "
|
|
nef_list_str = nef_list_str[:-1]
|
|
subprocess.run(
|
|
[
|
|
LIBS_V2_FOLDER + "/compiler/kneron_nef_utils",
|
|
"-c",
|
|
nef_list_str,
|
|
"-O",
|
|
output_path,
|
|
],
|
|
check=True,
|
|
)
|
|
return output_path
|