272 lines
10 KiB
C++
272 lines
10 KiB
C++
#include <algorithm>
|
|
#include <fstream>
|
|
#include <iomanip>
|
|
#include <iostream>
|
|
#include <set>
|
|
#include <string>
|
|
#include <unordered_map>
|
|
#include <utility>
|
|
#include <vector>
|
|
|
|
#include "cpu/InferencerPImpl.h"
|
|
#include "msft/MSFTInferencer.h"
|
|
#ifdef CUDA_FOUND
|
|
#include "msft-gpu/MSFTInferencer.h"
|
|
#endif
|
|
|
|
#include "kl730/InferencerPImpl.h"
|
|
#include "main.h"
|
|
|
|
using std::pair;
|
|
using std::set;
|
|
using std::string;
|
|
using std::vector;
|
|
using std::unordered_map;
|
|
using namespace dynasty::inferencer;
|
|
using namespace dynasty::inferencer::fix_point;
|
|
|
|
template <typename T>
|
|
static void Save1dVectorToFile(std::vector<T> const &input_vector, std::string const &file_location) {
|
|
std::ofstream f_out;
|
|
// Set exceptions to be thrown on failure
|
|
f_out.exceptions(std::ifstream::failbit | std::ifstream::badbit);
|
|
try {
|
|
f_out.open(file_location);
|
|
} catch (std::system_error &e) {
|
|
std::cout << "Failed to open file " << file_location << ": " << e.code().message().c_str() << std::endl;
|
|
exit(1);
|
|
}
|
|
for (auto const &i : input_vector) {
|
|
f_out << std::setprecision(8) << i << std::endl;
|
|
}
|
|
}
|
|
|
|
void dump_layer_output_size_fl(const string out_file, const vector<int32_t> output_dim) {
|
|
FILE *csv_file = fopen(out_file.c_str(), "w+");
|
|
if (csv_file == NULL) {
|
|
std::cout << "Failed to open file " << out_file.c_str() << std::endl;
|
|
exit(1);
|
|
}
|
|
|
|
for (auto it = output_dim.begin(); it != output_dim.end(); it++) {
|
|
fprintf(csv_file, "%d\n", *it);
|
|
}
|
|
fclose(csv_file);
|
|
}
|
|
|
|
template <typename T>
|
|
void SaveOutputTensors(unordered_map<string, vector<T>> output, string output_folder,
|
|
vector<pair<string, vector<int32_t>>> dims, bool dump_csv, string suffix) {
|
|
set<string> out_nodes;
|
|
for (auto node : dims) {
|
|
std::string node_name = node.first;
|
|
std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid
|
|
if (dump_csv) {
|
|
string csv_file = output_folder + "/layer_output_" + node_name + "_output_size.csv";
|
|
dump_layer_output_size_fl(csv_file, node.second);
|
|
}
|
|
out_nodes.insert(node_name);
|
|
}
|
|
|
|
for (auto node : output) {
|
|
std::string node_name = node.first;
|
|
std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid
|
|
|
|
string prefix = "/layer_intermediate_";
|
|
if (out_nodes.count(node_name)) {
|
|
prefix = "/layer_output_";
|
|
}
|
|
string data_file = output_folder + prefix + node_name + suffix + ".txt";
|
|
Save1dVectorToFile(node.second, data_file);
|
|
}
|
|
}
|
|
|
|
extern "C" void inference(string platform, string encrypt, string shape_order,
|
|
int dump, string model, string radix_file, int num_inputs,
|
|
std::vector<string> input_files, std::vector<string> input_names,
|
|
string output_folder, string debug, string cuda, string ort) {
|
|
bool bie = encrypt == "True";
|
|
bool cuda_enable = cuda == "True";
|
|
bool use_ort = ort == "True";
|
|
|
|
unordered_map<string, vector<float>> inputs;
|
|
unordered_map<string, vector<float>> outputs;
|
|
unordered_map<string, vector<int>> fix_outputs;
|
|
unordered_map<string, string> inference_input;
|
|
|
|
// if (debug == "True") {
|
|
// printf("--------------------------------------------------------------\n");
|
|
// printf("Running Dynasty inference...\n");
|
|
// std::cout << "dump: " << dump_level << ", mode: " << mode << ", is bie: " << bie << std::endl;
|
|
// std::cout << "model: " << model_file << "\nradix file: " << radix << "\noutput folder: " << output << std::endl;
|
|
// }
|
|
|
|
for (int i = 0; i < num_inputs; i++) {
|
|
inference_input[input_names[i]] = input_files[i];
|
|
|
|
// if (debug == "True") {
|
|
// std::cout << "in file[" << i << "]: " << string(input_files[i]) << std::endl;
|
|
// std::cout << "node name[" << i << "]: " << string(input_names[i]) << std::endl;
|
|
// }
|
|
}
|
|
|
|
if (platform == "Float") {
|
|
if (use_ort) {
|
|
#ifdef CUDA_FOUND
|
|
auto builder = dynasty::inferencer::msftgpu::Inferencer<float>::GetBuilder()->
|
|
WithDeviceID(0) -> WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model);
|
|
#else
|
|
auto builder = dynasty::inferencer::msft::Inferencer<float>::GetBuilder()->
|
|
WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model);
|
|
#endif
|
|
auto inferencer = builder->Build();
|
|
|
|
outputs = inferencer->Inference(inference_input, dump == 0);
|
|
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
|
|
} else {
|
|
auto builder = dynasty::inferencer::cpu::Inferencer<float>::GetBuilder()->
|
|
WithGraphOptimization(1)->WithONNXModel(model);
|
|
auto inferencer = builder->Build();
|
|
|
|
outputs = inferencer->Inference(inference_input, dump == 0);
|
|
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
|
|
}
|
|
} else {
|
|
InferencerUniquePtr<float> inferencer;
|
|
std::unique_ptr<typename PianoInferencer<float>::Builder> builder;
|
|
|
|
if (platform == "730") {
|
|
if (bie) { // BIE file
|
|
builder = dynasty::inferencer::fix_point::kl730::Inferencer::GetBuilder();
|
|
inferencer = kl730::createInferencer(builder.get(), model, cuda_enable, 0);
|
|
} else { // ONNX file
|
|
throw std::runtime_error("only 730 bie mode is supported, the given one is onnx model");
|
|
}
|
|
}
|
|
|
|
outputs = inferencer->Inference(inference_input, dump == 0);
|
|
fix_outputs = inferencer->ConvertFloatToInt(outputs, dump == 0);
|
|
|
|
SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl");
|
|
SaveOutputTensors(fix_outputs, output_folder, builder->GetOutputDimensions(), false, "_fx");
|
|
}
|
|
|
|
// if (debug == "True") {
|
|
// printf("Done inference!\n");
|
|
// }
|
|
}
|
|
|
|
extern "C" void inference_wrapper(const char *platform, const char *encrypt, const char *shape_order,
|
|
int dump, const char *model, const char *radix_file, int num_inputs,
|
|
const char **input_files, const char **input_names,
|
|
const char *output_folder, const char *debug, const char *cuda,
|
|
const char *ort) {
|
|
// This wrapper to call Dynasty inference will be called by Python
|
|
std::vector<string> input_node_files;
|
|
std::vector<string> input_node_names;
|
|
|
|
for (int i = 0; i < num_inputs; i++) {
|
|
input_node_files.push_back(string(input_files[i]));
|
|
input_node_names.push_back(string(input_names[i]));
|
|
}
|
|
|
|
inference(string(platform), string(encrypt), string(shape_order), dump, string(model),
|
|
string(radix_file), num_inputs, input_node_files, input_node_names,
|
|
string(output_folder), string(debug), string(cuda), string(ort));
|
|
}
|
|
|
|
/**
|
|
* Check if the provided input dimensions match the input dimensions of the model. Input dimensions
|
|
* should be in 1chw format to match that of the model.
|
|
*
|
|
* Returns -1 if the dimensions do not match. Returns 0 otherwise.
|
|
*/
|
|
extern "C" int check_input_dims(const char *platform, const char *encrypt, const char *model,
|
|
const char *radix, int *dims) {
|
|
bool bie = string(encrypt) == "True";
|
|
bool cuda_enable = false;
|
|
|
|
string mode = string(platform);
|
|
string model_file = string(model);
|
|
string radix_file = string(radix);
|
|
|
|
int current_index = 0;
|
|
|
|
if (mode == "Float") {
|
|
auto builder = dynasty::inferencer::cpu::Inferencer<float>::GetBuilder()->
|
|
WithGraphOptimization(1)->WithONNXModel(model_file);
|
|
auto input_dim = builder->GetInputDimensions();
|
|
|
|
for(auto &item : input_dim)
|
|
for (auto num : item.second)
|
|
if (num != dims[current_index++])
|
|
// return mismatch node index, incremented by 1 so 0 is no error
|
|
return (current_index - 1) / 3 + 1;
|
|
} else {
|
|
InferencerUniquePtr<float> inferencer;
|
|
std::unique_ptr<typename PianoInferencer<float>::Builder> builder;
|
|
|
|
if (mode == "730") {
|
|
builder = kl730::Inferencer::GetBuilder();
|
|
auto input_dim = builder->GetInputDimensions();
|
|
for(auto &item : input_dim)
|
|
for (auto num : item.second)
|
|
if (num != dims[current_index++])
|
|
// return mismatch node index, incremented by 1 so 0 is no error
|
|
return (current_index - 1) / 3 + 1;
|
|
}else{
|
|
throw std::runtime_error("only model 730 is supported");
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Dumps radix JSON from a BIE file.
|
|
*/
|
|
extern "C" void dump_radix_json(int platform, const char *model, const char *out_json_folder) {
|
|
InferencerUniquePtr<float> inferencer;
|
|
|
|
if (platform == 730) {
|
|
auto builder = kl730::Inferencer::GetBuilder();
|
|
inferencer = kl730::createInferencer(builder.get(), model, false, 0);
|
|
}
|
|
inferencer->dumpRadixJson(model, out_json_folder);
|
|
}
|
|
|
|
std::vector<string> get_names(string input_names) {
|
|
// input_names will have names separated by commas
|
|
std::vector<string> split_names;
|
|
std::istringstream iss(input_names);
|
|
string item;
|
|
|
|
while(std::getline(iss, item, ','))
|
|
split_names.push_back(item);
|
|
|
|
return split_names;
|
|
}
|
|
|
|
// Dummy for binary executable file use
|
|
int main(int argc, char *argv[]) {
|
|
string platform = argv[1];
|
|
string encrypt = argv[2];
|
|
string shape_order = argv[3];
|
|
int dump = std::stoi(argv[4]);
|
|
string model = argv[5];
|
|
string radix_file = argv[6];
|
|
int num_inputs = std::stoi(argv[7]);
|
|
string input_files_list = argv[8];
|
|
std::vector<string> input_files = get_names(input_files_list);
|
|
string input_names_list = argv[9];
|
|
std::vector<string> input_names = get_names(input_names_list);
|
|
string output_folder = argv[10];
|
|
string cuda = argv[11];
|
|
string ort = "False";
|
|
|
|
inference(platform, encrypt, shape_order, dump, model, radix_file,
|
|
num_inputs, input_files, input_names, output_folder, "False", cuda, ort);
|
|
return 0;
|
|
}
|