PaddlePaddle / PaddleX

All-in-One Development Tool based on PaddlePaddle(飞桨低代码开发工具)
Apache License 2.0
4.91k stars 958 forks source link

用PaddleX训练的模型,C#部署方式,调用TensorRT总是报内存受保护错误,用得Github提供的程序不行,后面自己改的还是报错 #1376

Closed 1316540491 closed 2 years ago

1316540491 commented 2 years ago

采用C#的部署方式,尝试了Github提供C++编译利用Cmake生产DLL后C#部署,普通加载模型是没问题的,用TensorRT加速加载模型就会报错误,提示内存受保护,我这边模型用的YOLOV3,用PaddleX训练的。 我的配置如下: 显卡是RTX3060 运行系统:Windows 部署方式:C# Cuda版本11.1 CuDnn版本v8.0.5.39 PaddleX版本2.1 推理库:PaddleInference TensorRT版本7.2.1.6(都是根据Cuda版本对应找的)

此问题搞了一个月了,还没没有成功,请大神解惑,是否PaddleX不支持TensorRT,因为给的示例里面没有用TensorRT, QQ群里和微信的PaddleX交流群也没有人成功过。

推理代码如下:

// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License.

include <gflags/gflags.h>

include

include

include "model_deploy/common/include/paddle_deploy.h"

include "model_deploy/common/include/model_infer.h"

include // GetCurrentThreadId()

/*

// 初始化模型带tensorRT加速 // [suliang] 2021-12-15 增加5个输入参数:min_input_shape, max_input_shape, optim_input_shape分别代表输入尺寸的输入范围, precision代表计算精度(0=fp32,1=fp16,2=int8),min_subgraph_size代表最小优化子图 extern "C" __declspec(dllexport) PaddleDeploy::Model InitModel_TRT(const char model_type, const char model_filename, const char params_filename, const char cfg_file, bool use_gpu, int gpu_id, char paddlex_model_type, std::vectormin_input_shape, std::vectormax_input_shape, std::vectoroptim_input_shape, int precision, int min_subgraph_size) { // create model PaddleDeploy::Model* model = PaddleDeploy::CreateModel(model_type); //FLAGS_model_type

// model init
model->Init(cfg_file);

// inference engine init
PaddleDeploy::PaddleEngineConfig engine_config;
engine_config.model_filename = model_filename;
engine_config.params_filename = params_filename;
engine_config.use_gpu = use_gpu;
engine_config.gpu_id = gpu_id;

// 使用tensorRT则强制打开gpu
engine_config.use_gpu = true;
engine_config.use_trt = true;

// 注意:根据优化目标需要手动调整
engine_config.precision = precision;                // 精度选择,默认fp32,还有fp16,int8
engine_config.min_subgraph_size = min_subgraph_size;// 最小子图,越大则优化度越低,越大越可能忽略动态图: 设置40+不报错但也没啥优化
engine_config.max_workspace_size = 1 << 30;

// 注意:根据模型和输入图像大小,需要手动调整如下变量
//std::vector<int> min_input_shape = { 1, 3, 512, 512 };
//std::vector<int> max_input_shape = { 1, 3, 1024, 1024 };
//std::vector<int> optim_input_shape = { 1, 3, 1024, 1024 };

// 分别定义最小、最大、最优输入尺寸:需要根据模型输入尺寸调整
// 这里三种模型输入的关键字不同(clas对应inputs, det对应image, seg对应x),可通过netron查看INPUTS.name,比如seg模型INPUTS.name=x
// 另外如果有动态输入尺寸不匹配的节点,需要手动定义
if (strcmp("clas", model_type) == 0) {
    // Adjust shape according to the actual model
    engine_config.min_input_shape["inputs"] = min_input_shape;
    engine_config.max_input_shape["inputs"] = max_input_shape;
    engine_config.optim_input_shape["inputs"] = optim_input_shape;
}
else if (strcmp("det", model_type) == 0) {
    // Adjust shape according to the actual model
    engine_config.min_input_shape["image"] = min_input_shape;
    engine_config.max_input_shape["image"] = max_input_shape;
    engine_config.optim_input_shape["image"] = optim_input_shape;
}
else if (strcmp("seg", model_type) == 0) {
    // Additional nodes need to be added, pay attention to the output prompt
    engine_config.min_input_shape["x"] = min_input_shape;
    engine_config.max_input_shape["x"] = max_input_shape;
    engine_config.optim_input_shape["x"] = optim_input_shape;
}
bool init = model->PaddleEngineInit(engine_config);
if (!init)
{
    LOGC("INFO", "init model failed");
}
// det, seg, clas, paddlex
if (strcmp(model_type, "paddlex") == 0) // 是paddlex模型,则返回具体支持的模型类型: det, seg, clas
{
    // detector
    if (model->yaml_config_["model_type"].as<std::string>() == std::string("detector"))
    {
        strcpy(paddlex_model_type, "det");
    }
    else if (model->yaml_config_["model_type"].as<std::string>() == std::string("segmenter"))
    {
        strcpy(paddlex_model_type, "seg");
    }
    else if (model->yaml_config_["model_type"].as<std::string>() == std::string("classifier"))
    {
        strcpy(paddlex_model_type, "clas");
    }
}
return model;

}

extern "C" __declspec(dllexport) void Det_ModelPredict(PaddleDeploy::Model model, const unsigned char img, int nWidth, int nHeight, int nChannel, float output, int nBoxesNum, char* LabelList) { // prepare data std::vector imgs;

int nType = 0;
if (nChannel == 3)
{
    nType = CV_8UC3;
}
else
{
    std::cout << "Only support 3 channel image." << std::endl;
    return;
}

cv::Mat input = cv::Mat::zeros(cv::Size(nWidth, nHeight), nType);
memcpy(input.data, img, nHeight * nWidth * nChannel * sizeof(uchar));
//cv::imwrite("./1.png", input);
imgs.push_back(std::move(input));

// predict
std::vector<PaddleDeploy::Result> results;
model->Predict(imgs, &results, 1);

// nBoxesNum[0] = results.size();  // results.size()得到的是batch_size
nBoxesNum[0] = results[0].det_result->boxes.size();  // 得到单张图片预测的bounding box数
std::string label = "";
//std::cout << "res: " << results[num] << std::endl;
for (int i = 0; i < results[0].det_result->boxes.size(); i++)  // 得到所有框的数据
{
    //std::cout << "category: " << results[num].det_result->boxes[i].category << std::endl;
    label = label + results[0].det_result->boxes[i].category + " ";
    // labelindex
    output[i * 6 + 0] = results[0].det_result->boxes[i].category_id; // 类别的id
    // score
    output[i * 6 + 1] = results[0].det_result->boxes[i].score;  // 得分
    //// box
    output[i * 6 + 2] = results[0].det_result->boxes[i].coordinate[0]; // x1, y1, x2, y2
    output[i * 6 + 3] = results[0].det_result->boxes[i].coordinate[1]; // 左上、右下的顶点
    output[i * 6 + 4] = results[0].det_result->boxes[i].coordinate[2];
    output[i * 6 + 5] = results[0].det_result->boxes[i].coordinate[3];
}
memcpy(LabelList, label.c_str(), strlen(label.c_str()));

}

/*

/*

/*

/*

/*

// 新增二次封装:初始化 void ModelWrapper::InitModelEnter(const char model_type, const char model_dir, int gpu_id, bool use_trt, const std::vectormin_input_shape, const std::vectormax_input_shape, const std::vectoroptim_input_shape, int precision, int min_subgraph_size) { // 初始化线程池:创建指定个数线程,每个线程指定到线程池的一个线程号 pool = new ThreadPool(num_threads); pool->init();

std::string model_filename = std::string(model_dir) + "\\model.pdmodel";
std::string params_filename = std::string(model_dir) + "\\model.pdiparams";
std::string cfg_file = std::string(model_dir) + "\\deploy.yaml";

bool use_gpu = true;
char* paddle_model_type = NULL;
if (!use_trt) {
    _model = InitModel(model_type,
        model_filename.c_str(),    // *.pdmodel
        params_filename.c_str(),   // *.pdiparams
        cfg_file.c_str(),          // *.yaml 
        use_gpu,
        gpu_id,
        paddle_model_type);
}
else
{
    _model = InitMo del_TRT(model_type,
        model_filename.c_str(),    // *.pdmodel
        params_filename.c_str(),   // *.pdiparams
        cfg_file.c_str(),          // *.yaml 
        use_gpu,
        gpu_id,
        paddle_model_type,
        min_input_shape, max_input_shape, optim_input_shape, precision, min_subgraph_size);
}

}

// 新增二次封装:单图推理 void ModelWrapper::SegPredictEnter(unsigned char imageData, int width, int height, int channels, unsigned char result_map) { cv::Mat src; if (channels == 1) { src = cv::Mat(height, width, CV_8UC1, imageData); cv::cvtColor(src, src, cv::COLOR_GRAY2BGR); } else { src = cv::Mat(height, width, CV_8UC3, imageData); } int predChannels = src.channels(); UCHAR* _imageData = src.data; auto future1 = pool->submit(Seg_ModelPredict, _model, _imageData, width, height, predChannels, result_map); future1.get(); }

// 检测模型 void ModelWrapper::DetPredictEnter(unsigned char imageData, int width, int height, int channels, float output, int nBoxesNum, char LabelList) { cv::Mat src; if (channels == 1) { src = cv::Mat(height, width, CV_8UC1, imageData); cv::cvtColor(src, src, cv::COLOR_GRAY2BGR); } else { src = cv::Mat(height, width, CV_8UC3, imageData); } int predChannels = src.channels(); UCHAR* _imageData = src.data; auto future1 = pool->submit(Det_ModelPredict, _model, _imageData, width, height, predChannels, output, nBoxesNum, LabelList); future1.get(); }

// 分类模型 void ModelWrapper::ClsPredictEnter(unsigned char imageData, int width, int height, int channels, float score, char category, int category_id) { cv::Mat src; if (channels == 1) { src = cv::Mat(height, width, CV_8UC1, imageData); cv::cvtColor(src, src, cv::COLOR_GRAY2BGR); } else { src = cv::Mat(height, width, CV_8UC3, imageData); } int predChannels = src.channels(); UCHAR* _imageData = src.data; auto future1 = pool->submit(Cls_ModelPredict, _model, _imageData, width, height, predChannels, score, category, category_id); future1.get(); }

// Mask模型 void ModelWrapper::MaskPredictEnter(unsigned char imageData, int width, int height, int channels, float box_output, unsigned char mask_output, int nBoxesNum, char LabelList) { cv::Mat src; if (channels == 1) { src = cv::Mat(height, width, CV_8UC1, imageData); cv::cvtColor(src, src, cv::COLOR_GRAY2BGR); } else { src = cv::Mat(height, width, CV_8UC3, imageData); } int predChannels = src.channels(); UCHAR _imageData = src.data; auto future1 = pool->submit(Mask_ModelPredict, _model, _imageData, width, height, predChannels, box_output, mask_output, nBoxesNum, LabelList); future1.get(); }

// 新增二次封装:模型资源释放 void ModelWrapper::DestructModelEnter() { // 释放线程池中所有线程 pool->shutdown(); if (pool != NULL) { delete pool; pool = NULL; } // 释放模型资源 if (_model != NULL) { DestructModel(_model); } }

// 新增二次封装接口api extern "C" __declspec(dllexport) ModelWrapper ModelObjInit(const char model_type, const char model_dir, int gpu_id, bool use_trt, const std::vectormin_input_shape, const std::vectormax_input_shape, const std::vectoroptim_input_shape, int precision, int min_subgraph_size) { ModelWrapper modelObj = new ModelWrapper(); modelObj->InitModelEnter(model_type, model_dir, gpu_id, use_trt, min_input_shape, max_input_shape, optim_input_shape, precision, min_subgraph_size); return modelObj; }

extern "C" __declspec(dllexport) void ModelObjDestruct(ModelWrapper * modelObj) { // 先释放模型内部的资源 modelObj->DestructModelEnter(); // 再释放堆区模型资源 delete modelObj; }

extern "C" __declspec(dllexport) void ModelObjPredict_Seg(ModelWrapper modelObj, unsigned char imageData, int width, int height, int channels, unsigned char* resultMap) { modelObj->SegPredictEnter(imageData, width, height, channels, resultMap); }

extern "C" __declspec(dllexport) void ModelObjPredict_Det(ModelWrapper modelObj, unsigned char imageData, int width, int height, int channels, float output, int nBoxesNum, char* LabelList) { modelObj->DetPredictEnter(imageData, width, height, channels, output, nBoxesNum, LabelList); }

extern "C" __declspec(dllexport) void ModelObjPredict_Cls(ModelWrapper modelObj, unsigned char imageData, int width, int height, int channels, float score, char category, int* category_id) { modelObj->ClsPredictEnter(imageData, width, height, channels, score, category, category_id); }

extern "C" __declspec(dllexport) void ModelObjPredict_Mask(ModelWrapper modelObj, unsigned char imageData, int width, int height, int channels, float box_output, unsigned char mask_output, int nBoxesNum, char LabelList) { modelObj->MaskPredictEnter(imageData, width, height, channels, box_output, mask_output, nBoxesNum, LabelList); }

FlyingQianMM commented 2 years ago

YOLOv3是支持trt加速预测的,从您目前的描述看,不确定是否c#部署代码的问题,需要您确认c++部署开启tensor预测成功。

先采用C++部署方式,开启trt测试下是否能够运行成功: