#include #include #include #include #include #include #include #include #include #include "cpu/InferencerPImpl.h" #include "msft/MSFTInferencer.h" #ifdef CUDA_FOUND #include "msft-gpu/MSFTInferencer.h" #endif #include "kl730/InferencerPImpl.h" #include "main.h" using std::pair; using std::set; using std::string; using std::vector; using std::unordered_map; using namespace dynasty::inferencer; using namespace dynasty::inferencer::fix_point; template static void Save1dVectorToFile(std::vector const &input_vector, std::string const &file_location) { std::ofstream f_out; // Set exceptions to be thrown on failure f_out.exceptions(std::ifstream::failbit | std::ifstream::badbit); try { f_out.open(file_location); } catch (std::system_error &e) { std::cout << "Failed to open file " << file_location << ": " << e.code().message().c_str() << std::endl; exit(1); } for (auto const &i : input_vector) { f_out << std::setprecision(8) << i << std::endl; } } void dump_layer_output_size_fl(const string out_file, const vector output_dim) { FILE *csv_file = fopen(out_file.c_str(), "w+"); if (csv_file == NULL) { std::cout << "Failed to open file " << out_file.c_str() << std::endl; exit(1); } for (auto it = output_dim.begin(); it != output_dim.end(); it++) { fprintf(csv_file, "%d\n", *it); } fclose(csv_file); } template void SaveOutputTensors(unordered_map> output, string output_folder, vector>> dims, bool dump_csv, string suffix) { set out_nodes; for (auto node : dims) { std::string node_name = node.first; std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid if (dump_csv) { string csv_file = output_folder + "/layer_output_" + node_name + "_output_size.csv"; dump_layer_output_size_fl(csv_file, node.second); } out_nodes.insert(node_name); } for (auto node : output) { std::string node_name = node.first; std::replace(node_name.begin(), node_name.end(), '/', '_'); // replace / so file paths are valid string prefix = "/layer_intermediate_"; if (out_nodes.count(node_name)) { prefix = "/layer_output_"; } string data_file = output_folder + prefix + node_name + suffix + ".txt"; Save1dVectorToFile(node.second, data_file); } } extern "C" void inference(string platform, string encrypt, string shape_order, int dump, string model, string radix_file, int num_inputs, std::vector input_files, std::vector input_names, string output_folder, string debug, string cuda, string ort) { bool bie = encrypt == "True"; bool cuda_enable = cuda == "True"; bool use_ort = ort == "True"; unordered_map> inputs; unordered_map> outputs; unordered_map> fix_outputs; unordered_map inference_input; // if (debug == "True") { // printf("--------------------------------------------------------------\n"); // printf("Running Dynasty inference...\n"); // std::cout << "dump: " << dump_level << ", mode: " << mode << ", is bie: " << bie << std::endl; // std::cout << "model: " << model_file << "\nradix file: " << radix << "\noutput folder: " << output << std::endl; // } for (int i = 0; i < num_inputs; i++) { inference_input[input_names[i]] = input_files[i]; // if (debug == "True") { // std::cout << "in file[" << i << "]: " << string(input_files[i]) << std::endl; // std::cout << "node name[" << i << "]: " << string(input_names[i]) << std::endl; // } } if (platform == "Float") { if (use_ort) { #ifdef CUDA_FOUND auto builder = dynasty::inferencer::msftgpu::Inferencer::GetBuilder()-> WithDeviceID(0) -> WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model); #else auto builder = dynasty::inferencer::msft::Inferencer::GetBuilder()-> WithParallelLevel(1)->WithGraphOptimization(1)->WithONNXModel(model); #endif auto inferencer = builder->Build(); outputs = inferencer->Inference(inference_input, dump == 0); SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl"); } else { auto builder = dynasty::inferencer::cpu::Inferencer::GetBuilder()-> WithGraphOptimization(1)->WithONNXModel(model); auto inferencer = builder->Build(); outputs = inferencer->Inference(inference_input, dump == 0); SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl"); } } else { InferencerUniquePtr inferencer; std::unique_ptr::Builder> builder; if (platform == "730") { if (bie) { // BIE file builder = dynasty::inferencer::fix_point::kl730::Inferencer::GetBuilder(); inferencer = kl730::createInferencer(builder.get(), model, cuda_enable, 0); } else { // ONNX file throw std::runtime_error("only 730 bie mode is supported, the given one is onnx model"); } } outputs = inferencer->Inference(inference_input, dump == 0); fix_outputs = inferencer->ConvertFloatToInt(outputs, dump == 0); SaveOutputTensors(outputs, output_folder, builder->GetOutputDimensions(), true, "_fl"); SaveOutputTensors(fix_outputs, output_folder, builder->GetOutputDimensions(), false, "_fx"); } // if (debug == "True") { // printf("Done inference!\n"); // } } extern "C" void inference_wrapper(const char *platform, const char *encrypt, const char *shape_order, int dump, const char *model, const char *radix_file, int num_inputs, const char **input_files, const char **input_names, const char *output_folder, const char *debug, const char *cuda, const char *ort) { // This wrapper to call Dynasty inference will be called by Python std::vector input_node_files; std::vector input_node_names; for (int i = 0; i < num_inputs; i++) { input_node_files.push_back(string(input_files[i])); input_node_names.push_back(string(input_names[i])); } inference(string(platform), string(encrypt), string(shape_order), dump, string(model), string(radix_file), num_inputs, input_node_files, input_node_names, string(output_folder), string(debug), string(cuda), string(ort)); } /** * Check if the provided input dimensions match the input dimensions of the model. Input dimensions * should be in 1chw format to match that of the model. * * Returns -1 if the dimensions do not match. Returns 0 otherwise. */ extern "C" int check_input_dims(const char *platform, const char *encrypt, const char *model, const char *radix, int *dims) { bool bie = string(encrypt) == "True"; bool cuda_enable = false; string mode = string(platform); string model_file = string(model); string radix_file = string(radix); int current_index = 0; if (mode == "Float") { auto builder = dynasty::inferencer::cpu::Inferencer::GetBuilder()-> WithGraphOptimization(1)->WithONNXModel(model_file); auto input_dim = builder->GetInputDimensions(); for(auto &item : input_dim) for (auto num : item.second) if (num != dims[current_index++]) // return mismatch node index, incremented by 1 so 0 is no error return (current_index - 1) / 3 + 1; } else { InferencerUniquePtr inferencer; std::unique_ptr::Builder> builder; if (mode == "730") { builder = kl730::Inferencer::GetBuilder(); auto input_dim = builder->GetInputDimensions(); for(auto &item : input_dim) for (auto num : item.second) if (num != dims[current_index++]) // return mismatch node index, incremented by 1 so 0 is no error return (current_index - 1) / 3 + 1; }else{ throw std::runtime_error("only model 730 is supported"); } } return 0; } /** * Dumps radix JSON from a BIE file. */ extern "C" void dump_radix_json(int platform, const char *model, const char *out_json_folder) { InferencerUniquePtr inferencer; if (platform == 730) { auto builder = kl730::Inferencer::GetBuilder(); inferencer = kl730::createInferencer(builder.get(), model, false, 0); } inferencer->dumpRadixJson(model, out_json_folder); } std::vector get_names(string input_names) { // input_names will have names separated by commas std::vector split_names; std::istringstream iss(input_names); string item; while(std::getline(iss, item, ',')) split_names.push_back(item); return split_names; } // Dummy for binary executable file use int main(int argc, char *argv[]) { string platform = argv[1]; string encrypt = argv[2]; string shape_order = argv[3]; int dump = std::stoi(argv[4]); string model = argv[5]; string radix_file = argv[6]; int num_inputs = std::stoi(argv[7]); string input_files_list = argv[8]; std::vector input_files = get_names(input_files_list); string input_names_list = argv[9]; std::vector input_names = get_names(input_names_list); string output_folder = argv[10]; string cuda = argv[11]; string ort = "False"; inference(platform, encrypt, shape_order, dump, model, radix_file, num_inputs, input_files, input_names, output_folder, "False", cuda, ort); return 0; }