blue-q commented 1 year ago

Env

GPU, RTX2080 ti
OS, Ubuntu18.04
Cuda version 10.2
TensorRT version 7.2.3.4

About this repo

tag，yolov5-v2.0
model，yolov5-2.0 s model

Your problem

When I converted pt model into wts model and regenerated engine model, the result of detection was much worse than pt model

blue-q commented 1 year ago

I added a small object detection layer to the s model

wang-xinyu commented 1 year ago

If you modify the model structure, you need modify the c++ code correspondingly.

This function defined the model structure. https://github.com/wang-xinyu/tensorrtx/blob/5cfa4445170eabaa54acd5ad7f469ef65a8763f1/yolov5/yolov5.cpp#L29

blue-q commented 1 year ago

yes，i have modified the createEngine_s function. yolov5s.yaml `backbone:

[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, BottleneckCSP, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 9, BottleneckCSP, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, BottleneckCSP, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, SPP, [1024, [5, 9, 13]]], [-1, 3, BottleneckCSP, [1024, False]], # 9 ]

head: [[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']], # 40*40
[[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 3, BottleneckCSP, [512, False]], # 13

[-1, 1, Conv, [512, 1, 1]], #4040 [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 8080 [[-1, 4], 1, Concat, [1]], # cat backbone P3 8080 [-1, 3, BottleneckCSP, [512, False]], # 17 (P3/8-small) 8080

[-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], # 160*160 [[-1, 2], 1, Concat, [1]], # cat backbone P2 [-1, 3, BottleneckCSP, [256, False]], # 21

[-1, 1, Conv, [256, 3, 2]], #22 8080 [[-1, 18], 1, Concat, [1]], #23 8080 [-1, 3, BottleneckCSP, [256, False]], #24 80*80

[-1, 1, Conv, [256, 3, 2]], # 40*40 [[-1, 14], 1, Concat, [1]], # cat head P4 [-1, 3, BottleneckCSP, [512, False]], # 27

[-1, 1, Conv, [512, 3, 2]], # 20*20 [[-1, 10], 1, Concat, [1]], # cat head P5 [-1, 3, BottleneckCSP, [1024, False]], # 30

[[21, 24, 27, 30], 1, Detect, [nc, anchors]], # Detect(P2, P3, P4, P5) ]`

createEngine_s `ICudaEngine createEngine_s(unsigned int maxBatchSize, IBuilder builder, IBuilderConfig config, DataType dt,std::string &input,int CLASS_NUM_SPP) { INetworkDefinition network = builder->createNetworkV2(0U);

// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
assert(data);

std::map<std::string, Weights> weightMap = loadWeights(input);
Weights emptywts{DataType::kFLOAT, nullptr, 0};

// yolov5 backbone
auto focus0 = focus(network, weightMap, *data, 3, 32, 3, "model.0");
auto conv1 = convBnLeaky(network, weightMap, *focus0->getOutput(0), 64, 3, 2, 1, "model.1");
auto bottleneck_CSP2 = bottleneckCSP(network, weightMap, *conv1->getOutput(0), 64, 64, 1, true, 1, 0.5, "model.2");
auto conv3 = convBnLeaky(network, weightMap, *bottleneck_CSP2->getOutput(0), 128, 3, 2, 1, "model.3");
auto bottleneck_csp4 = bottleneckCSP(network, weightMap, *conv3->getOutput(0), 128, 128, 3, true, 1, 0.5, "model.4");
auto conv5 = convBnLeaky(network, weightMap, *bottleneck_csp4->getOutput(0), 256, 3, 2, 1, "model.5");
auto bottleneck_csp6 = bottleneckCSP(network, weightMap, *conv5->getOutput(0), 256, 256, 3, true, 1, 0.5, "model.6");
auto conv7 = convBnLeaky(network, weightMap, *bottleneck_csp6->getOutput(0), 512, 3, 2, 1, "model.7");
auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), 512, 512, 5, 9, 13, "model.8");

// yolov5 head
auto bottleneck_csp9 = bottleneckCSP(network, weightMap, *spp8->getOutput(0), 512, 512, 1, false, 1, 0.5, "model.9");
auto conv10 = convBnLeaky(network, weightMap, *bottleneck_csp9->getOutput(0), 256, 1, 1, 1, "model.10");

float *deval = reinterpret_cast<float*>(malloc(sizeof(float) * 256 * 2 * 2));
for (int i = 0; i < 256 * 2 * 2; i++) {
    deval[i] = 1.0;
}
//利用反卷积来实现上采样
Weights deconvwts11{DataType::kFLOAT, deval, 256 * 2 * 2};
IDeconvolutionLayer* deconv11 = network->addDeconvolutionNd(*conv10->getOutput(0), 256, DimsHW{2, 2}, deconvwts11, emptywts);
deconv11->setStrideNd(DimsHW{2, 2});
deconv11->setNbGroups(256);
weightMap["deconv11"] = deconvwts11;

ITensor* inputTensors12[] = {deconv11->getOutput(0), bottleneck_csp6->getOutput(0)};
auto cat12 = network->addConcatenation(inputTensors12, 2);
auto bottleneck_csp13 = bottleneckCSP(network, weightMap, *cat12->getOutput(0), 512, 256, 1, false, 1, 0.5, "model.13");
//从这里开始改！
auto conv14 = convBnLeaky(network, weightMap, *bottleneck_csp13->getOutput(0), 256, 1, 1, 1, "model.14");
//利用反卷积来实现上采样
Weights deconvwts15{DataType::kFLOAT, deval, 256 * 2 * 2};
IDeconvolutionLayer* deconv15 = network->addDeconvolutionNd(*conv14->getOutput(0), 256, DimsHW{2, 2}, deconvwts15, emptywts);
deconv15->setStrideNd(DimsHW{2, 2});
deconv15->setNbGroups(256);
//weightMap["deconv15"] = deconvwts15;
//这一个控制那两层进行concat
ITensor* inputTensors16[] = {deconv15->getOutput(0), bottleneck_csp4->getOutput(0)};
auto cat16 = network->addConcatenation(inputTensors16, 2);
auto bottleneck_csp17 = bottleneckCSP(network, weightMap, *cat16->getOutput(0), 512, 256, 1, false, 1, 0.5, "model.17");
auto conv18 = convBnLeaky(network, weightMap, *bottleneck_csp17->getOutput(0), 128, 1, 1, 1, "model.18");

Weights deconvwts19{DataType::kFLOAT, deval, 128 * 2 * 2};
IDeconvolutionLayer* deconv19 = network->addDeconvolutionNd(*conv18->getOutput(0), 128, DimsHW{2, 2}, deconvwts19, emptywts);
deconv19->setStrideNd(DimsHW{2, 2});
deconv19->setNbGroups(128);

ITensor* inputTensors20[] = {deconv19->getOutput(0), bottleneck_CSP2->getOutput(0)};
auto cat20 = network->addConcatenation(inputTensors20, 2);
auto bottleneck_csp21 = bottleneckCSP(network, weightMap, *cat20->getOutput(0), 256, 128, 1, false, 1, 0.5, "model.21");
IConvolutionLayer* conv22 = network->addConvolutionNd(*bottleneck_csp21->getOutput(0), 3 * (CLASS_NUM_SPP + 5), DimsHW{1, 1}, weightMap["model.31.m.0.weight"], weightMap["model.31.m.0.bias"]);

auto conv23 = convBnLeaky(network, weightMap, *bottleneck_csp21->getOutput(0), 128, 3, 2, 1, "model.22");
ITensor* inputTensors24[] = {conv23->getOutput(0), conv18->getOutput(0)};
auto cat24 = network->addConcatenation(inputTensors24, 2);
auto bottleneck_csp25 = bottleneckCSP(network, weightMap, *cat24->getOutput(0), 256, 128, 1, false, 1, 0.5, "model.24");
IConvolutionLayer* conv26 = network->addConvolutionNd(*bottleneck_csp25->getOutput(0), 3 * (CLASS_NUM_SPP + 5), DimsHW{1, 1}, weightMap["model.31.m.1.weight"], weightMap["model.31.m.1.bias"]);

auto conv27 = convBnLeaky(network, weightMap, *bottleneck_csp25->getOutput(0), 128, 3, 2, 1, "model.25");
ITensor* inputTensors28[] = {conv27->getOutput(0), conv14->getOutput(0)};
auto cat28 = network->addConcatenation(inputTensors28, 2);
auto bottleneck_csp29 = bottleneckCSP(network, weightMap, *cat28->getOutput(0), 256, 256, 1, false, 1, 0.5, "model.27");
IConvolutionLayer* conv30 = network->addConvolutionNd(*bottleneck_csp29->getOutput(0), 3 * (CLASS_NUM_SPP + 5), DimsHW{1, 1}, weightMap["model.31.m.2.weight"], weightMap["model.31.m.2.bias"]);

auto conv31 = convBnLeaky(network, weightMap, *bottleneck_csp29->getOutput(0), 256, 3, 2, 1, "model.28");
ITensor* inputTensors31[] = {conv31->getOutput(0), conv10->getOutput(0)};
auto cat31 = network->addConcatenation(inputTensors31, 2);
auto bottleneck_csp32 = bottleneckCSP(network, weightMap, *cat31->getOutput(0), 512, 512, 1, false, 1, 0.5, "model.30");
IConvolutionLayer* conv33 = network->addConvolutionNd(*bottleneck_csp32->getOutput(0), 3 * (CLASS_NUM_SPP + 5), DimsHW{1, 1}, weightMap["model.31.m.3.weight"], weightMap["model.31.m.3.bias"]);

auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
const PluginFieldCollection* pluginData = creator->getFieldNames();
YoloPluginCreator *create=dynamic_cast<YoloPluginCreator *> (creator);
if(create == nullptr)
{
    return nullptr;
}
// YoloPluginCreator *create=(YoloPluginCreator*)creator;
IPluginV2 *pluginObj = create->createPlugin_bak("yololayerh", pluginData,CLASS_NUM_SPP);
ITensor* inputTensors_yolo[] = {conv33->getOutput(0), conv30->getOutput(0), conv26->getOutput(0), conv22->getOutput(0)};
auto yolo = network->addPluginV2(inputTensors_yolo, 4, *pluginObj);

yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
network->markOutput(*yolo->getOutput(0));

// Build engine
std::cout<<"maxbatchSize is : "<<maxBatchSize << std::endl;
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB

ifdef USE_FP16

config->setFlag(BuilderFlag::kFP16);

endif

std::cout << "Building engine, please wait for a while..." << std::endl;
ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
std::cout << "Build engine successfully!" << std::endl;

// Don't need the network any more
network->destroy();

// Release host memory
for (auto& mem : weightMap)
{
    free((void*) (mem.second.values));
}

return engine;

}`

stale[bot] commented 1 year ago

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.

Lemonononon commented 10 months ago

Friend, how did you solve this problem? @blue-q

wang-xinyu / tensorrtx

Why is there a big difference between the detection effect of PT model and engine model? #1232

Env

About this repo

Your problem

ifdef USE_FP16

endif