int main()
{
TRTLogger logger;
auto engine_data = load_file("v5s6.nms.1280.4.trt");
auto runtime = make_nvshared(nvinfer1::createInferRuntime(logger));
initLibNvInferPlugins(&logger, "");
auto engine = make_nvshared(runtime->deserializeCudaEngine(engine_data.data(), engine_data.size()));
if (engine == nullptr)
{
printf("Deserialize cuda engine failed.\n");
runtime->destroy();
return -1;
}
auto output_num_detections_dims = engine->getBindingDimensions(1);
int output_num_detections = output_num_detections_dims.d[1];
auto output_detection_boxes_dims = engine->getBindingDimensions(2);
int output_detection_boxes = output_detection_boxes_dims.d[1];
auto output_detection_scores_dims = engine->getBindingDimensions(3);
int output_detection_scores = output_detection_scores_dims.d[1];
auto output_detection_classes_dims = engine->getBindingDimensions(4);
int output_detection_classes = output_detection_classes_dims.d[1];
int output_numel = input_batch output_num_detections output_detection_boxes output_detection_scores output_detection_classes;
🐛 Describe the bug
int main() { TRTLogger logger; auto engine_data = load_file("v5s6.nms.1280.4.trt"); auto runtime = make_nvshared(nvinfer1::createInferRuntime(logger)); initLibNvInferPlugins(&logger, ""); auto engine = make_nvshared(runtime->deserializeCudaEngine(engine_data.data(), engine_data.size()));
if (engine == nullptr) { printf("Deserialize cuda engine failed.\n"); runtime->destroy(); return -1; }
printf("Deserialize cuda engine successful.\n");
printf("engine->getNbBindings() %d\n", engine->getNbBindings());
cudaStream_t stream = nullptr; checkRuntime(cudaStreamCreate(&stream)); auto execution_context = make_nvshared(engine->createExecutionContext());
int input_batch = BATCH_SIZE; int input_channel = 3; int input_height = INPUT_H; int input_width = INPUT_W; int input_numel = input_batch input_channel input_height input_width; int input_delta = input_channel input_height input_width; float input_data_host = nullptr; float input_data_device = nullptr; checkRuntime(cudaMallocHost(&input_data_host, input_numel sizeof(float))); checkRuntime(cudaMalloc(&input_data_device, input_numel * sizeof(float)));
vector images = {cv::imread("0.png"), cv::imread("1.png"), cv::imread("2.png"), cv::imread("3.png")};
for (size_t i = 0; i < images.size(); i++) { // 通过双线性插值对图像进行resize float scale_x = input_width / (float)images[i].cols; float scale_y = input_height / (float)images[i].rows; float scale = std::min(scale_x, scale_y); float i2d[6], d2i[6]; // resize图像,源图像和目标图像几何中心的对齐 i2d[0] = scale; i2d[1] = 0; i2d[2] = (-scale images[i].cols + input_width + scale - 1) 0.5; i2d[3] = 0; i2d[4] = scale; i2d[5] = (-scale images[i].rows + input_height + scale - 1) 0.5;
} checkRuntime(cudaMemcpyAsync(input_data_device, input_data_host, input_numel * sizeof(float), cudaMemcpyHostToDevice, stream));
auto output_num_detections_dims = engine->getBindingDimensions(1); int output_num_detections = output_num_detections_dims.d[1]; auto output_detection_boxes_dims = engine->getBindingDimensions(2); int output_detection_boxes = output_detection_boxes_dims.d[1]; auto output_detection_scores_dims = engine->getBindingDimensions(3); int output_detection_scores = output_detection_scores_dims.d[1]; auto output_detection_classes_dims = engine->getBindingDimensions(4); int output_detection_classes = output_detection_classes_dims.d[1];
int output_numel = input_batch output_num_detections output_detection_boxes output_detection_scores output_detection_classes;
float output_data_host = nullptr; float output_data_device = nullptr; checkRuntime(cudaMallocHost(&output_data_host, sizeof(float) output_numel)); checkRuntime(cudaMalloc(&output_data_device, sizeof(float) output_numel));
// 明确当前推理时,使用的数据输入大小 auto input_dims = engine->getBindingDimensions(0); input_dims.d[0] = input_batch;
execution_context->setBindingDimensions(0, input_dims); float *bindings[] = {input_data_device, output_data_device}; bool status = execution_context->enqueueV2((void *)bindings, stream, nullptr); if (!status) std::cout << "Something is wrong in inference!\n"; std::cout << status << std::endl; checkRuntime(cudaMemcpyAsync(output_data_host, output_data_device, sizeof(float) output_numel, cudaMemcpyDeviceToHost, stream)); checkRuntime(cudaStreamSynchronize(stream));
return 0; } when i using bool status = execution_context->enqueueV2((void **)bindings, stream, nullptr);
error: 2: [pluginV2DynamicExtRunner.cpp::nvinfer1::rt::cuda::PluginV2DynamicExtRunner::execute::115] Error Code 2: Internal Error (Assertion status == kSTATUS_SUCCESS failed. )
Versions
master