#!/usr/bin/env python3 """ This is a standalone script for converting Onnx model files into TensorRT model files Author: Leo Gordon (dividiti) """ import argparse import tensorrt as trt def convert_onnx_model_to_trt(onnx_model_filename, trt_model_filename, input_tensor_name, output_tensor_name, output_data_type, max_workspace_size, max_batch_size): "Convert an onnx_model_filename into a trt_model_filename using the given parameters" TRT_LOGGER = trt.Logger(trt.Logger.WARNING) TRT_VERSION_MAJOR = int(trt.__version__.split('.')[0]) with trt.Builder(TRT_LOGGER) as builder: if TRT_VERSION_MAJOR >= 7: flag = (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_PRECISION)) | (1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) network = builder.create_network(flag) else: network = builder.create_network() parser = trt.OnnxParser(network, TRT_LOGGER) if (output_data_type=='fp32'): print('Converting into fp32 (default), max_batch_size={}'.format(max_batch_size)) builder.fp16_mode = False else: if not builder.platform_has_fast_fp16: print('Warning: This platform is not optimized for fast fp16 mode') builder.fp16_mode = True print('Converting into fp16, max_batch_size={}'.format(max_batch_size)) builder.max_workspace_size = max_workspace_size builder.max_batch_size = max_batch_size with open(onnx_model_filename, 'rb') as onnx_model_file: onnx_model = onnx_model_file.read() if not parser.parse(onnx_model): raise RuntimeError("Onnx model parsing from {} failed. Error: {}".format(onnx_model_filename, parser.get_error(0).desc())) if TRT_VERSION_MAJOR >= 7: # Create an optimization profile (see Section 7.2 of https://docs.nvidia.com/deeplearning/sdk/pdf/TensorRT-Developer-Guide.pdf). profile = builder.create_optimization_profile() # FIXME: Hardcoded for ImageNet. The minimum/optimum/maximum dimensions of a dynamic input tensor are the same. profile.set_shape(input_tensor_name, (1, 3, 224, 224), (max_batch_size, 3, 224, 224), (max_batch_size, 3, 224, 224)) config = builder.create_builder_config() config.add_optimization_profile(profile) trt_model_object = builder.build_engine(network, config) else: trt_model_object = builder.build_cuda_engine(network) try: serialized_trt_model = trt_model_object.serialize() with open(trt_model_filename, "wb") as trt_model_file: trt_model_file.write(serialized_trt_model) except: raise RuntimeError('Cannot serialize or write TensorRT engine to file {}.'.format(trt_model_filename)) def main(): "Parse command line and feed the conversion function" arg_parser = argparse.ArgumentParser() arg_parser.add_argument('onnx_model_file', type=str, help='Onnx model file') arg_parser.add_argument('trt_model_filename', type=str, help='TensorRT model file') arg_parser.add_argument('--input_tensor_name', type=str, default='input_tensor:0', help='Input tensor type') arg_parser.add_argument('--output_tensor_name', type=str, default='prob', help='Output tensor type') arg_parser.add_argument('--output_data_type', type=str, default='fp32', help='Model data type') arg_parser.add_argument('--max_workspace_size', type=int, default=(1<<30), help='Builder workspace size') arg_parser.add_argument('--max_batch_size', type=int, default=1, help='Builder batch size') args = arg_parser.parse_args() convert_onnx_model_to_trt( args.onnx_model_file, args.trt_model_filename, args.input_tensor_name, args.output_tensor_name, args.output_data_type, args.max_workspace_size, args.max_batch_size ) main()