Open jiangchengchengark opened 3 months ago
Can you export an onnx, then use trtexec to build ?
+1 if you can share an ONNX model / reproduction steps.
notebookbdffc79b63.zip this is my notebook on kaggle, engine is too large so I can't submit it . if run the notebook, you can get same result
pip install tensorrt print(trt.version) import tensorrt
import torch
import torchvision.models as models
model= models.resnet50(pretrained=True)
import tensorrt as trt
import torch
import torchvision.models as models
from torchvision.models import ResNet50_Weights
model.eval()
x = torch.randn(1, 3, 224, 224)
torch.onnx.export(model, x, "resnet50.onnx", opset_version=11,training=torch.onnx.TrainingMode.EVAL,
do_constant_folding=True,input_names=["input"],
output_names=["output"],
verbose=False)
import onnx onnx_model = onnx.load("/kaggle/working/resnet50.onnx") # load onnx model onnx.checker.check_model(onnx_model) # check onnx model logger=trt.Logger(trt.Logger.WARNING) builder=trt.Builder(logger) network=builder.create_network(1 << int(trt.NetworkDefinitionCreationFlag.EXPLICIT_BATCH)) parser=trt.OnnxParser(network,logger) success=parser.parse_from_file("/kaggle/working/resnet50.onnx")
config=builder.create_builder_config() config.set_memory_pool_limit(trt.MemoryPoolType.WORKSPACE, 5<<30) config.get_memory_pool_limit serialized_engine=builder.build_serialized_network(network,config) logger=trt.Logger(trt.Logger.WARNING) runtime = trt.Runtime(trt.Logger(trt.Logger.WARNING))
def save_engine(engine, path): path = Path(path) path.parent.mkdir(parents=True, exist_ok=True) with open(path, 'wb') as f: f.write(engine)
def load_engine(path): with open(path, 'rb') as f: engine = runtime.deserialize_cuda_engine(f.read()) return engine path="resnet_engine" save_engine(serialized_engine,path) engine=load_engine(path) context=engine.create_execution_context()
import ctypes import numpy as np from PIL import Image def prerocess_image(image_path,input_shape): image=Image.open(image_path) image=image.resize(input_shape[2:]) image_array=np.array(image,dtype=np.float32)
image_array=image_array/255.0
image_array = np.expand_dims(image_array, axis=0)
image_array=image_array.transpose((0, 3, 1, 2))
return image_array
import numpy as np import torch input_shape = (1, 3, 224, 224) output_shape = (1, 1000) dtype = np.float32
image_path="/kaggle/input/test-image-3/8cec3814fbe3524455caf2ebb6183bf4.jpeg" input_shape=(1,3,224,224) input_data=prerocess_image(image_path,input_shape) input_data=np.ascontiguousarray(input_data)
output_data = np.empty(output_shape, dtype=dtype) output_data = np.ascontiguousarray(output_data)
import pycuda.driver as cuda import pycuda.autoinit
stream = cuda.Stream() import pycuda.driver as cuda import pycuda.autoinit
d_input = cuda.mem_alloc(input_data.nbytes) d_output = cuda.mem_alloc(output_data.nbytes)
if d_input is None or d_output is None: raise RuntimeError("CUDA内存分配失败")
stream = cuda.Stream()
if stream is None: raise RuntimeError("PyCUDA流对象创建失败")
cuda.memcpy_htod_async(d_input, input_data, stream)
context.set_tensor_address('input', int(d_input)) context.set_tensor_address('output', int(d_output))
bindings = [int(d_input), int(d_output)]
context.execute_v2(bindings)
cuda.memcpy_dtoh(output_data,d_output)
print(output_data)
when I use the lateset expressing for loading the resnet-50, it can run successfully
output:
but when I got it for pretrained
it return false