NVIDIA / TensorRT

NVIDIA® TensorRT™ is an SDK for high-performance deep learning inference on NVIDIA GPUs. This repository contains the open source components of TensorRT.
https://developer.nvidia.com/tensorrt
Apache License 2.0
10.7k stars 2.12k forks source link

onnx2tensorrt #4064

Open Zerocheng001 opened 2 months ago

Zerocheng001 commented 2 months ago

import torch

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model1_trained = torch.load('MyClassify', map_location=device) model1_trained.eval() dummy_input = torch.randn(1, 1, 224, 224, device=device) onnx_model_path = 'resnet182.onnx' try: torch.onnx.export( model1_trained,
dummy_input,
onnx_model_path,
export_params=True,
opset_version=11,
do_constant_folding=True,
input_names=['input'],
output_names=['output'],
dynamic_axes={'input': {0: 'batch_size'},
'output': {0: 'batch_size'}} ) print(f"模型已成功转换为 ONNX 并保存在 {onnx_model_path}") except Exception as e: print(f"转换为 ONNX 时出错: {e}")

C++ :onnx2tensorrt

  IOptimizationProfile* profile = builder->createOptimizationProfile();
  auto input = network->getInput(0);
  auto inputDims = input->getDimensions();
  profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
  profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
  profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{ 8, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
  config->addOptimizationProfile(profile);

When infering, the minimum size, optimal size, and maximum size should be set to the same before the error will be reported, otherwise the error will be reported samplesCommon::BufferManager buffers(mEngine);
errormsg:System.Runtime.InteropServices.SEHException:“External component has thrown an exception.”

Zerocheng001 commented 2 months ago

profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{ 1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); or profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{ 4, inputDims.d[1], inputDims.d[2], inputDims.d[3] });

will be sucess; I want to infer a batch of images

lix19937 commented 2 months ago

Try to use trtexec --minShapes=spec --optShapes=spec --maxShapes=spec --onnx=spec --verbose to convert. Your code maybe has bugs.

Zerocheng001 commented 2 months ago

I can use the engine in python Api,but I have a error when I use the engine in C++API;I can make sure my engine is right,but I use it to infer in C++ ,it is wrong. void BuildEngine() {

try
{
    auto builder = SampleUniquePtr<nvinfer1::IBuilder>(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
    if (!builder)
    {
        return false;
    }

    auto network = SampleUniquePtr<nvinfer1::INetworkDefinition>(builder->createNetworkV2(0));
    if (!network)
    {
        return false;
    }

    auto config = SampleUniquePtr<nvinfer1::IBuilderConfig>(builder->createBuilderConfig());
    if (!config)
    {
        return false;
    }

    auto parser
        = SampleUniquePtr<nvonnxparser::IParser>(nvonnxparser::createParser(*network, sample::gLogger.getTRTLogger()));
    if (!parser)
    {
        return false;
    }

    auto timingCache = SampleUniquePtr<nvinfer1::ITimingCache>();

    auto constructed = ConstructNetwork(builder, network, config, parser, timingCache);
    if (!constructed)
    {
        return false;
    }

    auto profileStream = samplesCommon::makeCudaStream();
    if (!profileStream)
    {
        return false;
    }
    config->setProfileStream(*profileStream);

    IOptimizationProfile* profile = builder->createOptimizationProfile();

    auto input = network->getInput(0);
    auto inputDims = input->getDimensions();

    profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
    profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{20, inputDims.d[1], inputDims.d[2], inputDims.d[3] });
    profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{10, inputDims.d[1], inputDims.d[2], inputDims.d[3] });

    config->addOptimizationProfile(profile);
    SampleUniquePtr<IHostMemory> plan{ builder->buildSerializedNetwork(*network, *config) };
    if (!plan)
    {
        return false;
    }

    std::ofstream outFile("model.engine", std::ios::binary);
    outFile.write(reinterpret_cast<const char*>(plan->data()), plan->size());
    outFile.close();

    if (timingCache != nullptr && !mParams.timingCacheFile.empty())
    {
        samplesCommon::updateTimingCacheFile(
            sample::gLogger.getTRTLogger(), mParams.timingCacheFile, timingCache.get(), *builder);
    }

    mRuntime = std::shared_ptr<nvinfer1::IRuntime>(createInferRuntime(sample::gLogger.getTRTLogger()));
    if (!mRuntime)
    {
        return false;
    }

    mEngine = std::shared_ptr<nvinfer1::ICudaEngine>(
        mRuntime->deserializeCudaEngine(plan->data(), plan->size()), samplesCommon::InferDeleter());
    if (!mEngine)
    {
        return false;
    }

    ASSERT(network->getNbInputs() == 1);
    mInputDims = network->getInput(0)->getDimensions();
    ASSERT(mInputDims.nbDims == 4);

    ASSERT(network->getNbOutputs() == 1);
    mOutputDims = network->getOutput(0)->getDimensions();
    ASSERT(mOutputDims.nbDims == 2);

    return true;
}
catch (const std::exception&)
{
    return false;
}

void Infer() { // Create RAII buffer manager object samplesCommon::BufferManager buffers(mEngine);

auto context = SampleUniquePtr<nvinfer1::IExecutionContext>(mEngine->createExecutionContext());
if (!context)
{
    return false;
}

for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
    auto const name = mEngine->getIOTensorName(i);
    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}

// Read the input data into the managed buffers
ASSERT(mParams.inputTensorNames.size() == 1);
if (!ProcessInput(buffers, cvImagesPtr, images_num))
{
    return false;
}

// Memcpy from host input buffers to device input buffers
buffers.copyInputToDevice();

bool status = context->executeV2(buffers.getDeviceBindings().data());
if (!status)
{
    return false;
}

// Memcpy from device output buffers to host output buffers
buffers.copyOutputToHost();

// Verify results
if (!VerifyOutput(buffers,images_num,class_result))
{
    return false;
}

return true;

}

this " samplesCommon::BufferManager buffers(mEngine);" throw System.Runtime.InteropServices.SEHException:“External component has thrown an exception.” the version of tensorrt is TensorRT-10.2.0.19.Windows.win10.cuda-12.5

Zerocheng001 commented 2 months ago

profile->setDimensions(input->getName(), OptProfileSelector::kMIN, Dims4{1, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kMAX, Dims4{20, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); profile->setDimensions(input->getName(), OptProfileSelector::kOPT, Dims4{10, inputDims.d[1], inputDims.d[2], inputDims.d[3] }); I also found that the larger the batch size setting, the longer the inference event, for example, if I set the batch to 1, I input 1 graph, the time is 2ms, and the batch size is set to 20, but I only enter 1 graph, and the time is about 14ms

lix19937 commented 2 months ago

I also found that the larger the batch size setting, the longer the inference event

It is normal.