Open Zerocheng001 opened 2 months ago
profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); or profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
will be sucess; I want to infer a batch of images
Try to use trtexec --minShapes=spec --optShapes=spec --maxShapes=spec --onnx=spec --verbose
to convert. Your code maybe has bugs.
I can use the engine in python Api,but I have a error when I use the engine in C++API;I can make sure my engine is right,but I use it to infer in C++ ,it is wrong. void BuildEngine() {
try
{
auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
if (!builder)
{
return false;
}
auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
if (!network)
{
return false;
}
auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
if (!config)
{
return false;
}
auto parser
= SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
if (!parser)
{
return false;
}
auto timingCache = SampleUniquePtr<nvinfer1::ITimingCache>();
auto constructed = ConstructNetwork(builder, network, config, parser, timingCache);
if (!constructed)
{
return false;
}
auto profileStream = samplesCommon::makeCudaStream();
if (!profileStream)
{
return false;
}
config->setProfileStream(*profileStream);
IOptimizationProfile* profile = builder->createOptimizationProfile();
auto input = network->getInput(0);
auto inputDims = input->getDimensions();
profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{20, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{10, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
config->addOptimizationProfile(profile);
SampleUniquePtr<IHostMemory> plan{ builder->buildSerializedNetwork(*network, *config) };
if (!plan)
{
return false;
}
std::ofstream outFile("model.engine", std::ios::binary);
outFile.write(reinterpret_cast<const char*>(plan->data()), plan->size());
outFile.close();
if (timingCache != nullptr && !mParams.timingCacheFile.empty())
{
samplesCommon::updateTimingCacheFile(
sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
}
mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
if (!mRuntime)
{
return false;
}
mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
if (!mEngine)
{
return false;
}
ASSERT(network->getNbInputs() == 1);
mInputDims = network->getInput(0)->getDimensions();
ASSERT(mInputDims.nbDims == 4);
ASSERT(network->getNbOutputs() == 1);
mOutputDims = network->getOutput(0)->getDimensions();
ASSERT(mOutputDims.nbDims == 2);
return true;
}
catch (const std::exception&)
{
return false;
}
void Infer() { // Create RAII buffer manager object samplesCommon::BufferManager buffers(mEngine);
auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
return false;
}
for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
auto const name = mEngine->getIOTensorName(i);
context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}
// Read the input data into the managed buffers
ASSERT(mParams.inputTensorNames.size() == 1);
if (!ProcessInput(buffers, cvImagesPtr, images_num))
{
return false;
}
// Memcpy from host input buffers to device input buffers
buffers.copyInputToDevice();
bool status = context->executeV2(buffers.getDeviceBindings().data());
if (!status)
{
return false;
}
// Memcpy from device output buffers to host output buffers
buffers.copyOutputToHost();
// Verify results
if (!VerifyOutput(buffers,images_num,class_result))
{
return false;
}
return true;
}
this " samplesCommon::BufferManager buffers(mEngine);" throw System.Runtime.InteropServices.SEHException:“External component has thrown an exception.” the version of tensorrt is TensorRT-10.2.0.19.Windows.win10.cuda-12.5
profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{20, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{10, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); I also found that the larger the batch size setting, the longer the inference event, for example, if I set the batch to 1, I input 1 graph, the time is 2ms, and the batch size is set to 20, but I only enter 1 graph, and the time is about 14ms
I also found that the larger the batch size setting, the longer the inference event
It is normal.
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model1_trained = torch.load('MyClassify', map_location=device) model1_trained.eval() dummy_input = torch.randn(1, 1, 224, 224, device=device) onnx_model_path = 'resnet182.onnx' try: torch.onnx.export( model1_trained,
dummy_input,
onnx_model_path,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'},
'output': {0: 'batch_size'}} ) print(f"模型已成功转换为 ONNX 并保存在 {onnx_model_path}") except Exception as e: print(f"转换为 ONNX 时出错: {e}")
C++ :onnx2tensorrt
When infering, the minimum size, optimal size, and maximum size should be set to the same before the error will be reported, otherwise the error will be reported samplesCommon::BufferManager buffers(mEngine);
errormsg:System.Runtime.InteropServices.SEHException:“External component has thrown an exception.”