NVIDIA / TensorRT

NVIDIA® TensorRT™ is an SDK for high-performance deep learning inference on NVIDIA GPUs. This repository contains the open source components of TensorRT.
https://developer.nvidia.com/tensorrt
Apache License 2.0
10.52k stars 2.1k forks source link

Error[3]: [executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueV3::2666] Error Code 3: API Usage Error (Parameter check failed at: executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueV3::2666, condition: mContext.profileObliviousBindings.at(profileObliviousIndex) || getPtrOrNull(mOutputAllocators, profileObliviousIndex) #3992

Open Leonjr98 opened 2 months ago

Leonjr98 commented 2 months ago

Description

I tried to use tensorrt for inference in the neural network inference code that originally used libtorch, but I got an error.

bool model_deploy::LoadTRTModel(std::string& s_model_path) { try { // load TRT Model**** auto s_enginepath = s_model_path.c_str(); std::ifstream fs_engineFile(s_enginepath, std::ios::binary); if (!fs_engineFile.good()) { sample::gLogInfo << "[TRT] Error opening engine file: " << s_enginepath; } fs_engineFile.seekg(0, std::ifstream::end); int64_t ifsize = fs_engineFile.tellg(); fs_engineFile.seekg(0, std::ifstream::beg); std::vector engineBlob(ifsize); fs_engineFile.read(reinterpret_cast<char>(engineBlob.data()), ifsize); if (!fs_engineFile.good()) { sample::gLogInfo << "[TRT] Error loading engine file: " << s_enginepath; } sample::gLogInfo << "[TRT] Loading engine successful!" << std::endl; std::unique_ptr pBuildEnv(new BuildEnvironment(/options.build.safe/ 0, /options.build.versionCompatible/ 0, /options.system.DLACore/ -1, /options.build.tempdir/ "", /options.build.tempfileControls/ 3, /options.build.leanDLLPath/ "")); pBuildEnv->engine.setBlob(engineBlob.data(), engineBlob.size()); pInferEnv = new InferenceEnvironment(pBuildEnv); pBuildEnv.reset(); // Delete build environment. m_engine = pInferEnv->engine.get(); // create context m_context = makeUnique(m_engine->createExecutionContext()); if (!m_context) { sample::gLogError << "Preprocessor context build failed." << std::endl; } cudaStreamCreate(&inferenceCudaStream); // create cuda stream } catch (std::exception const& e) { sample::gLogError << "[TRT] Error loading the model" << e.what() << std::endl; return false; } std::cout << "Model loaded successfully!\n"; return true; }

void model_deploy::model_inference(string s_model_path, int i_model_info, float input_img_raw, float output_img_raw) { int wei = i_input_img_sz[0], hei = i_input_img_sz[1], slc = i_input_img_sz[2]; int i_input_slc_len = wei hei;

int i_img_len = slc * i_input_slc_len, i_output_slc_len = i_input_slc_len * i_out_scale * i_out_scale;
int i_model_output_id = i_model_info[0] - 1, channels = i_model_info[1];
int i_tem = 0, half_channal = int(channels / 2);

sample::gLogInfo << "[TRT] infer start ..." << std::endl;
auto start = std::chrono::high_resolution_clock::now();

m_engine = pInferEnv->engine.get();

auto context = std::unique_ptr<nvinfer1::IExecutionContext>(m_engine->createExecutionContext());
//[MemUsageChange]

std::vector<void*> buffers;

buffers.resize(m_engine->getNbIOTensors());

cudaMalloc(&buffers[0], 5 * i_input_slc_len * sizeof(float));

cudaMalloc(&buffers[1],  i_input_slc_len * sizeof(float));

cudaMemcpyAsync(buffers[0], input_img_raw, 5 * i_input_slc_len * sizeof(float), cudaMemcpyHostToDevice, inferenceCudaStream);

cudaStreamSynchronize(inferenceCudaStream);

context->setTensorAddress("input", buffers[0]);

context->setTensorAddress("output", buffers[1]);

bool status = context->enqueueV3(inferenceCudaStream);
//Error[3]: [executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueV3::2666] Error Code 3: API Usage Error (Parameter check failed at: executionContext.cpp::nvinfer1::rt::ExecutionContext::enqueueV3::2666, condition: mContext.profileObliviousBindings.at(profileObliviousIndex) || getPtrOrNull(mOutputAllocators, profileObliviousIndex)

-->

Environment

TensorRT Version:8.6.16

NVIDIA GPU:3060

NVIDIA Driver Version:

CUDA Version:12.1

CUDNN Version:

Operating System:windows

Python Version (if applicable):

Tensorflow Version (if applicable):

PyTorch Version (if applicable):

Baremetal or Container (if so, version):

Relevant Files

Model link:

Steps To Reproduce

Commands or scripts:

Have you tried the latest release?:

Can this model run on other frameworks? For example run ONNX model with ONNXRuntime (polygraphy run <model.onnx> --onnxrt):

lix19937 commented 2 months ago

Before call enqueueV3, you should call setTensorAddress. BTW, if your network inputs are dynamic shape, you also need set input tensor dims.

Like follow


for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
    auto const name = mEngine->getIOTensorName(i);
    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}

...

// Asynchronously enqueue the inference workif (!context->enqueueV3(stream))
{
    return false;
}
Leonjr98 commented 2 months ago

Before call enqueueV3, you should call setTensorAddress. BTW, if your network inputs are dynamic shape, you also need set input tensor dims.

Like follow

for (int32_t i = 0, e = mEngine->getNbIOTensors(); i < e; i++)
{
    auto const name = mEngine->getIOTensorName(i);
    context->setTensorAddress(name, buffers.getDeviceBuffer(name));
}

...

// Asynchronously enqueue the inference workif (!context->enqueueV3(stream))
{
    return false;
}

Sorry, I didn't show the code completely. I've used setTensorAddress before enqueueV3 and my input isn't dynamic

lix19937 commented 2 months ago

You can ref https://github.com/lix19937/trt-samples-for-hackathon-cn/blob/master/cookbook/01-SimpleDemo/TensorRT8.5/main.cpp sample.

Leonjr98 commented 2 months ago

You can ref https://github.com/lix19937/trt-samples-for-hackathon-cn/blob/master/cookbook/01-SimpleDemo/TensorRT8.5/main.cpp sample.

Thanks a lot