yolov5 TRT PANet update

Hi @wang-xinyu , I tried to update the createEngine method and faced an issue. If you can help me resolve it would be wonderful. So, this is the updated method according to the updated yolov5s model:

ICudaEngine* createEngine(unsigned int maxBatchSize, IBuilder* builder, IBuilderConfig* config, DataType dt) {
    INetworkDefinition* network = builder->createNetworkV2(0U);

    // Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
    ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
    assert(data);

    std::map<std::string, Weights> weightMap = loadWeights("../yolov5s.wts");
    Weights emptywts{DataType::kFLOAT, nullptr, 0};

    // yolov5 backbone
    auto focus0 = focus(network, weightMap, *data, 3, 32, 3, "model.0");
    auto conv1 = convBnLeaky(network, weightMap, *focus0->getOutput(0), 64, 3, 2, 1, "model.1");
    auto bottleneck_csp2 = bottleneckCSP(network, weightMap, *conv1->getOutput(0), 64, 64, 1, true, 1, 0.5, "model.2");
    auto conv3 = convBnLeaky(network, weightMap, *bottleneck_csp2->getOutput(0), 128, 3, 2, 1, "model.3");
    auto bottleneck_csp4 = bottleneckCSP(network, weightMap, *conv3->getOutput(0), 128, 128, 3, true, 1, 0.5, "model.4");
    auto conv5 = convBnLeaky(network, weightMap, *bottleneck_csp4->getOutput(0), 256, 3, 2, 1, "model.5");
    auto bottleneck_csp6 = bottleneckCSP(network, weightMap, *conv5->getOutput(0), 256, 256, 3, true, 1, 0.5, "model.6");
    auto conv7 = convBnLeaky(network, weightMap, *bottleneck_csp6->getOutput(0), 512, 3, 2, 1, "model.7");
    auto spp8 = SPP(network, weightMap, *conv7->getOutput(0), 512, 512, 5, 9, 13, "model.8");

    // yolov5 head
    auto bottleneck_csp9 = bottleneckCSP(network, weightMap, *spp8->getOutput(0), 512, 512, 1, false, 1, 0.5, "model.9");
    auto conv10 = convBnLeaky(network, weightMap, *bottleneck_csp9->getOutput(0), 256, 1, 1, 1, "model.10");

    float *deval = reinterpret_cast<float*>(malloc(sizeof(float) * 256 * 2 * 2));
    for (int i = 0; i < 256 * 2 * 2; i++) {
        deval[i] = 1.0;
    }
    Weights deconvwts11{DataType::kFLOAT, deval, 256 * 2 * 2};
    IDeconvolutionLayer* deconv11 = network->addDeconvolutionNd(*conv10->getOutput(0), 256, DimsHW{2, 2}, deconvwts11, emptywts);

    ITensor* inputTensors12[] = {deconv11->getOutput(0), bottleneck_csp6->getOutput(0)};
    auto cat12 = network->addConcatenation(inputTensors12, 2);
    auto bottleneck_csp13 = bottleneckCSP(network, weightMap, *cat12->getOutput(0), 256, 256, 1, false, 1, 0.5, "model.13");
    auto conv14 = convBnLeaky(network, weightMap, *bottleneck_csp13->getOutput(0), 128, 1, 1, 1, "model.14");
    Weights deconvwts15{DataType::kFLOAT, deval, 128 * 2 * 2};
    IDeconvolutionLayer* deconv15 = network->addDeconvolutionNd(*conv14->getOutput(0), 128, DimsHW{2, 2}, deconvwts15, emptywts);

    ITensor* inputTensors16[] = {deconv15->getOutput(0), bottleneck_csp4->getOutput(0)};
    auto cat16 = network->addConcatenation(inputTensors16, 2);

    auto bottleneck_csp17 = bottleneckCSP(network, weightMap, *cat16->getOutput(0), 128, 128, 1, false, 1, 0.5, "model.17");
    IConvolutionLayer* conv18 = network->addConvolutionNd(*bottleneck_csp17->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["model.18.weight"], weightMap["model.18.bias"]);
    auto conv19 = convBnLeaky(network, weightMap, *bottleneck_csp17->getOutput(0), 128, 3, 2, 1, "model.19");

    ITensor* inputTensors20[] = {conv19->getOutput(0), conv14->getOutput(0)};
    auto cat20 = network->addConcatenation(inputTensors20, 2);

    auto bottleneck_csp21 = bottleneckCSP(network, weightMap, *cat20->getOutput(0), 256, 256, 1, false, 1, 0.5, "model.21");
    IConvolutionLayer* conv22 = network->addConvolutionNd(*bottleneck_csp21->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["model.22.weight"], weightMap["model.22.bias"]);
    auto conv23 = convBnLeaky(network, weightMap, *bottleneck_csp21->getOutput(0), 256, 3, 2, 1, "model.23");

    ITensor* inputTensors24[] = {conv23->getOutput(0), conv10->getOutput(0)};
    auto cat24 = network->addConcatenation(inputTensors24, 2);
    auto bottleneck_csp25 = bottleneckCSP(network, weightMap, *cat24->getOutput(0), 512, 512, 1, false, 1, 0.5, "model.25");
    IConvolutionLayer* conv26 = network->addConvolutionNd(*bottleneck_csp25->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["model.22.weight"], weightMap["model.22.bias"]);

    auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
    const PluginFieldCollection* pluginData = creator->getFieldNames();
    IPluginV2 *pluginObj = creator->createPlugin("yololayer", pluginData);
    ITensor* inputTensors_yolo[] = {conv18->getOutput(0), conv22->getOutput(0), conv26->getOutput(0)};
    auto yolo = network->addPluginV2(inputTensors_yolo, 3, *pluginObj);

    yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
    network->markOutput(*yolo->getOutput(0));

    // Build engine
    builder->setMaxBatchSize(maxBatchSize);
    config->setMaxWorkspaceSize(1 << 30);  // 1 GB && (1 << 20) => 1 MB
#ifdef USE_FP16
    config->setFlag(BuilderFlag::kFP16);
#endif
    std::cout << "Building engine, please wait for a while..." << std::endl;
    ICudaEngine* engine = builder->buildEngineWithConfig(*network, *config);
    std::cout << "Build engine successfully!" << std::endl;

    // Don't need the network any more
    network->destroy();

    // Release host memory
    for (auto& mem : weightMap)
    {
        free((void*) (mem.second.values));
    }

    return engine;
}

And when I run ./yolov5s -s I get this:

root@c5de50828b33:/home/build# ./yolov5s -s
Loading weights: ../yolov5s.wts
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
Building engine, please wait for a while...
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: kernel weights has count 1024 but 262144 was expected
[06/26/2020-03:50:24] [E] [TRT] (Unnamed Layer* 132) [Deconvolution]: count of 1024 weights in kernel, but kernel dimensions (2,2) with 256 input channels, 256 output channels and 1 groups were specified. Expected Weights count is 256 * 2*2 * 256 / 1 = 262144
[06/26/2020-03:50:24] [E] [TRT] Could not compute dimensions for (Unnamed Layer* 132) [Deconvolution]_output, because the network is not valid
[06/26/2020-03:50:24] [E] [TRT] Network validation failed.
Build engine successfully!
yolov5s: /home/yolov5s.cpp:233: void APIToModel(unsigned int, nvinfer1::IHostMemory**): Assertion `engine != nullptr' failed.
Aborted (core dumped)

I am facing problems in the updated Deconvolution which now takes input from a ConvBnLeaky block rather than bottleneck CSP Any help would be great. Thanks!

wang-xinyu / tensorrtx

yolov5 TRT PANet update #65