2026-01-28 06:16:04 +00:00

272 lines
10 KiB
C++

#include <algorithm>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <set>
#include <string>
#include <unordered_map>
#include <utility>
#include <vector>
#include "cpu/InferencerPImpl.h"
#include "msft/MSFTInferencer.h"
#ifdef CUDA_FOUND
#include "msft-gpu/MSFTInferencer.h"
#endif
#include "kl730/InferencerPImpl.h"
#include "main.h"
using std::pair;
using std::set;
using std::string;
using std::vector;
using std::unordered_map;
using namespace dynasty::inferencer;
using namespace dynasty::inferencer::fix_point;
template <typename T>
static void Save1dVectorToFile(std::vector<T> const &input_vector, std::string const &file_location) {
std::ofstream f_out;
// Set exceptions to be thrown on failure
f_out.exceptions(std::ifstream::failbit | std::ifstream::badbit);
try {
f_out.open(file_location);
} catch (std::system_error &e) {
std::cout << "Failed to open file " << file_location << ": " << e.code().message().c_str() << std::endl;
exit(1);
}
for (auto const &i : input_vector) {
f_out << std::setprecision(8) << i << std::endl;
}
}
void dump_layer_output_size_fl(const string out_file, const vector<int32_t> output_dim) {
FILE *csv_file = fopen(out_file.c_str(), "w+");
if (csv_file == NULL) {
std::cout << "Failed to open file " << out_file.c_str() << std::endl;
exit(1);
}
for (auto it = output_dim.begin(); it != output_dim.end(); it++) {
fprintf(csv_file, "%d\n", *it);
}
fclose(csv_file);
}
template <typename T>
void SaveOutputTensors(unordered_map<string, vector<T>> output, string output_folder,
vector<pair<string, vector<int32_t>>> dims, bool dump_csv, string suffix) {
set<string> out_nodes;
for (auto node : dims) {
std::string node_name = node.first;
std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid
if (dump_csv) {
string csv_file = output_folder + "/layer_output_" + node_name + "_output_size.csv";
dump_layer_output_size_fl(csv_file, node.second);
}
out_nodes.insert(node_name);
}
for (auto node : output) {
std::string node_name = node.first;
std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid
string prefix = "/layer_intermediate_";
if (out_nodes.count(node_name)) {
prefix = "/layer_output_";
}
string data_file = output_folder + prefix + node_name + suffix + ".txt";
Save1dVectorToFile(node.second, data_file);
}
}
extern "C" void inference(string platform, string encrypt, string shape_order,
int dump, string model, string radix_file, int num_inputs,
std::vector<string> input_files, std::vector<string> input_names,
string output_folder, string debug, string cuda, string ort) {
bool bie = encrypt == "True";
bool cuda_enable = cuda == "True";
bool use_ort = ort == "True";
unordered_map<string, vector<float>> inputs;
unordered_map<string, vector<float>> outputs;
unordered_map<string, vector<int>> fix_outputs;
unordered_map<string, string> inference_input;
// if (debug == "True") {
// printf("--------------------------------------------------------------\n");
// printf("Running Dynasty inference...\n");
// std::cout << "dump: " << dump_level << ", mode: " << mode << ", is bie: " << bie << std::endl;
// std::cout << "model: " << model_file << "\nradix file: " << radix << "\noutput folder: " << output << std::endl;
// }
for (int i = 0; i < num_inputs; i++) {
inference_input[input_names[i]] = input_files[i];
// if (debug == "True") {
// std::cout << "in file[" << i << "]: " << string(input_files[i]) << std::endl;
// std::cout << "node name[" << i << "]: " << string(input_names[i]) << std::endl;
// }
}
if (platform == "Float") {
if (use_ort) {
#ifdef CUDA_FOUND
auto builder = dynasty::inferencer::msftgpu::Inferencer<float>::GetBuilder()->
WithDeviceID(0) -> WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model);
#else
auto builder = dynasty::inferencer::msft::Inferencer<float>::GetBuilder()->
WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model);
#endif
auto inferencer = builder->Build();
outputs = inferencer->Inference(inference_input, dump == 0);
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
} else {
auto builder = dynasty::inferencer::cpu::Inferencer<float>::GetBuilder()->
WithGraphOptimization(1)->WithONNXModel(model);
auto inferencer = builder->Build();
outputs = inferencer->Inference(inference_input, dump == 0);
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
}
} else {
InferencerUniquePtr<float> inferencer;
std::unique_ptr<typename PianoInferencer<float>::Builder> builder;
if (platform == "730") {
if (bie) { // BIE file
builder = dynasty::inferencer::fix_point::kl730::Inferencer::GetBuilder();
inferencer = kl730::createInferencer(builder.get(), model, cuda_enable, 0);
} else { // ONNX file
throw std::runtime_error("only 730 bie mode is supported, the given one is onnx model");
}
}
outputs = inferencer->Inference(inference_input, dump == 0);
fix_outputs = inferencer->ConvertFloatToInt(outputs, dump == 0);
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
SaveOutputTensors(fix_outputs, output_folder, builder->GetOutputDimensions(), false, "_fx");
}
// if (debug == "True") {
// printf("Done inference!\n");
// }
}
extern "C" void inference_wrapper(const char *platform, const char *encrypt, const char *shape_order,
int dump, const char *model, const char *radix_file, int num_inputs,
const char **input_files, const char **input_names,
const char *output_folder, const char *debug, const char *cuda,
const char *ort) {
// This wrapper to call Dynasty inference will be called by Python
std::vector<string> input_node_files;
std::vector<string> input_node_names;
for (int i = 0; i < num_inputs; i++) {
input_node_files.push_back(string(input_files[i]));
input_node_names.push_back(string(input_names[i]));
}
inference(string(platform), string(encrypt), string(shape_order), dump, string(model),
string(radix_file), num_inputs, input_node_files, input_node_names,
string(output_folder), string(debug), string(cuda), string(ort));
}
/**
* Check if the provided input dimensions match the input dimensions of the model. Input dimensions
* should be in 1chw format to match that of the model.
*
* Returns -1 if the dimensions do not match. Returns 0 otherwise.
*/
extern "C" int check_input_dims(const char *platform, const char *encrypt, const char *model,
const char *radix, int *dims) {
bool bie = string(encrypt) == "True";
bool cuda_enable = false;
string mode = string(platform);
string model_file = string(model);
string radix_file = string(radix);
int current_index = 0;
if (mode == "Float") {
auto builder = dynasty::inferencer::cpu::Inferencer<float>::GetBuilder()->
WithGraphOptimization(1)->WithONNXModel(model_file);
auto input_dim = builder->GetInputDimensions();
for(auto &item : input_dim)
for (auto num : item.second)
if (num != dims[current_index++])
// return mismatch node index, incremented by 1 so 0 is no error
return (current_index - 1) / 3 + 1;
} else {
InferencerUniquePtr<float> inferencer;
std::unique_ptr<typename PianoInferencer<float>::Builder> builder;
if (mode == "730") {
builder = kl730::Inferencer::GetBuilder();
auto input_dim = builder->GetInputDimensions();
for(auto &item : input_dim)
for (auto num : item.second)
if (num != dims[current_index++])
// return mismatch node index, incremented by 1 so 0 is no error
return (current_index - 1) / 3 + 1;
}else{
throw std::runtime_error("only model 730 is supported");
}
}
return 0;
}
/**
* Dumps radix JSON from a BIE file.
*/
extern "C" void dump_radix_json(int platform, const char *model, const char *out_json_folder) {
InferencerUniquePtr<float> inferencer;
if (platform == 730) {
auto builder = kl730::Inferencer::GetBuilder();
inferencer = kl730::createInferencer(builder.get(), model, false, 0);
}
inferencer->dumpRadixJson(model, out_json_folder);
}
std::vector<string> get_names(string input_names) {
// input_names will have names separated by commas
std::vector<string> split_names;
std::istringstream iss(input_names);
string item;
while(std::getline(iss, item, ','))
split_names.push_back(item);
return split_names;
}
// Dummy for binary executable file use
int main(int argc, char *argv[]) {
string platform = argv[1];
string encrypt = argv[2];
string shape_order = argv[3];
int dump = std::stoi(argv[4]);
string model = argv[5];
string radix_file = argv[6];
int num_inputs = std::stoi(argv[7]);
string input_files_list = argv[8];
std::vector<string> input_files = get_names(input_files_list);
string input_names_list = argv[9];
std::vector<string> input_names = get_names(input_names_list);
string output_folder = argv[10];
string cuda = argv[11];
string ort = "False";
inference(platform, encrypt, shape_order, dump, model, radix_file,
num_inputs, input_files, input_names, output_folder, "False", cuda, ort);
return 0;
}