scaled-yolov4 engine build,but got error

scaled-yolov4 link:
createEngine as follows: `ICudaEngine createEngine(unsigned int maxBatchSize, IBuilder builder, IBuilderConfig config, DataType dt) { INetworkDefinition network = builder->createNetworkV2(0U);
// Create input tensor of shape {3, INPUT_H, INPUT_W} with name INPUT_BLOB_NAME
ITensor* data = network->addInput(INPUT_BLOB_NAME, dt, Dims3{3, INPUT_H, INPUT_W});
assert(data);

std::map<std::string, Weights> weightMap = loadWeights("../scaled_yolov4.wts");
Weights emptywts{DataType::kFLOAT, nullptr, 0};

// define each layer.
auto l0 = convBnMish(network, weightMap, *data, 32, 3, 1, 1, 0);
auto l1 = convBnMish(network, weightMap, *l0->getOutput(0), 64, 3, 2, 1, 1);
auto l2 = convBnMish(network, weightMap, *l1->getOutput(0), 64, 1, 1, 0, 2);
auto l3 = convBnMish(network, weightMap, *l2->getOutput(0), 64, 3, 1, 1, 1);
auto ew4 = network->addElementWise(*l3->getOutput(0), *l1->getOutput(0), ElementWiseOperation::kSUM);

auto l5 = convBnMish(network, weightMap, *ew4->getOutput(0), 128, 3, 2, 1, 5);
auto l6 = convBnMish(network, weightMap, *l5->getOutput(0), 64, 1, 1, 0, 6);
auto l7 = l5;
auto l8 = convBnMish(network, weightMap, *l7->getOutput(0), 64, 1, 1, 0, 8);
auto l9 = convBnMish(network, weightMap, *l8->getOutput(0), 64, 1, 1, 0, 9);
auto l10 = convBnMish(network, weightMap, *l9->getOutput(0), 64, 3, 1, 1, 10);
auto ew11 = network->addElementWise(*l10->getOutput(0), *l8->getOutput(0), ElementWiseOperation::kSUM);
auto l12 = convBnMish(network, weightMap, *ew11->getOutput(0), 64, 1, 1, 0, 12);
auto l13 = convBnMish(network, weightMap, *l12->getOutput(0), 64, 3, 1, 1, 13);
auto ew14 = network->addElementWise(*l13->getOutput(0), *ew11->getOutput(0), ElementWiseOperation::kSUM);
auto l15 = convBnMish(network, weightMap, *ew14->getOutput(0), 64, 1, 1, 0, 15);

ITensor* inputTensors16[] = {l15->getOutput(0), l6->getOutput(0)};
auto cat16 = network->addConcatenation(inputTensors16, 2);

auto l17 = convBnMish(network, weightMap, *cat16->getOutput(0), 128, 1, 1, 0, 17);
auto l18 = convBnMish(network, weightMap, *l17->getOutput(0), 256, 3, 2, 1, 18);
auto l19 = convBnMish(network, weightMap, *l18->getOutput(0), 128, 1, 1, 0, 19);
auto l20 = l18;
auto l21 = convBnMish(network, weightMap, *l20->getOutput(0), 128, 1, 1, 0, 21);
auto l22 = convBnMish(network, weightMap, *l21->getOutput(0), 128, 1, 1, 0, 22);
auto l23 = convBnMish(network, weightMap, *l22->getOutput(0), 128, 3, 1, 1, 23);
auto ew24 = network->addElementWise(*l23->getOutput(0), *l21->getOutput(0), ElementWiseOperation::kSUM);
auto l25 = convBnMish(network, weightMap, *ew24->getOutput(0), 128, 1, 1, 0, 25);
auto l26 = convBnMish(network, weightMap, *l25->getOutput(0), 128, 3, 1, 1, 26);
auto ew27 = network->addElementWise(*l26->getOutput(0), *ew24->getOutput(0), ElementWiseOperation::kSUM);
auto l28 = convBnMish(network, weightMap, *ew27->getOutput(0), 128, 1, 1, 0, 28);
auto l29 = convBnMish(network, weightMap, *l28->getOutput(0), 128, 3, 1, 1, 29);
auto ew30 = network->addElementWise(*l29->getOutput(0), *ew27->getOutput(0), ElementWiseOperation::kSUM);
auto l31 = convBnMish(network, weightMap, *ew30->getOutput(0), 128, 1, 1, 0, 31);
auto l32 = convBnMish(network, weightMap, *l31->getOutput(0), 128, 3, 1, 1, 32);
auto ew33 = network->addElementWise(*l32->getOutput(0), *ew30->getOutput(0), ElementWiseOperation::kSUM);
auto l34 = convBnMish(network, weightMap, *ew33->getOutput(0), 128, 1, 1, 0, 34);
auto l35 = convBnMish(network, weightMap, *l34->getOutput(0), 128, 3, 1, 1, 35);
auto ew36 = network->addElementWise(*l35->getOutput(0), *ew33->getOutput(0), ElementWiseOperation::kSUM);
auto l37 = convBnMish(network, weightMap, *ew36->getOutput(0), 128, 1, 1, 0, 37);
auto l38 = convBnMish(network, weightMap, *l37->getOutput(0), 128, 3, 1, 1, 38);
auto ew39 = network->addElementWise(*l38->getOutput(0), *ew36->getOutput(0), ElementWiseOperation::kSUM);
auto l40 = convBnMish(network, weightMap, *ew39->getOutput(0), 128, 1, 1, 0, 40);
auto l41 = convBnMish(network, weightMap, *l40->getOutput(0), 128, 3, 1, 1, 41);
auto ew42 = network->addElementWise(*l41->getOutput(0), *ew39->getOutput(0), ElementWiseOperation::kSUM);
auto l43 = convBnMish(network, weightMap, *ew42->getOutput(0), 128, 1, 1, 0, 43);
auto l44 = convBnMish(network, weightMap, *l43->getOutput(0), 128, 3, 1, 1, 44);
auto ew45 = network->addElementWise(*l44->getOutput(0), *ew42->getOutput(0), ElementWiseOperation::kSUM);
auto l46 = convBnMish(network, weightMap, *ew45->getOutput(0), 128, 1, 1, 0, 46);

ITensor* inputTensors47[] = {l46->getOutput(0), l19->getOutput(0)};
auto cat47 = network->addConcatenation(inputTensors47, 2);

auto l48 = convBnMish(network, weightMap, *cat47->getOutput(0), 256, 1, 1, 0, 48);
auto l49 = convBnMish(network, weightMap, *l48->getOutput(0), 512, 3, 2, 1, 49);
auto l50 = convBnMish(network, weightMap, *l49->getOutput(0), 256, 1, 1, 0, 50);
auto l51 = l49;
auto l52 = convBnMish(network, weightMap, *l51->getOutput(0), 256, 1, 1, 0, 52);
auto l53 = convBnMish(network, weightMap, *l52->getOutput(0), 256, 1, 1, 0, 53);
auto l54 = convBnMish(network, weightMap, *l53->getOutput(0), 256, 3, 1, 1, 54);
auto ew55 = network->addElementWise(*l54->getOutput(0), *l52->getOutput(0), ElementWiseOperation::kSUM);
auto l56 = convBnMish(network, weightMap, *ew55->getOutput(0), 256, 1, 1, 0, 56);
auto l57 = convBnMish(network, weightMap, *l56->getOutput(0), 256, 3, 1, 1, 57);
auto ew58 = network->addElementWise(*l57->getOutput(0), *ew55->getOutput(0), ElementWiseOperation::kSUM);
auto l59 = convBnMish(network, weightMap, *ew58->getOutput(0), 256, 1, 1, 0, 59);
auto l60 = convBnMish(network, weightMap, *l59->getOutput(0), 256, 3, 1, 1, 60);
auto ew61 = network->addElementWise(*l60->getOutput(0), *ew58->getOutput(0), ElementWiseOperation::kSUM);
auto l62 = convBnMish(network, weightMap, *ew61->getOutput(0), 256, 1, 1, 0, 62);
auto l63 = convBnMish(network, weightMap, *l62->getOutput(0), 256, 3, 1, 1, 63);
auto ew64 = network->addElementWise(*l63->getOutput(0), *ew61->getOutput(0), ElementWiseOperation::kSUM);
auto l65 = convBnMish(network, weightMap, *ew64->getOutput(0), 256, 1, 1, 0, 65);
auto l66 = convBnMish(network, weightMap, *l65->getOutput(0), 256, 3, 1, 1, 66);
auto ew67 = network->addElementWise(*l66->getOutput(0), *ew64->getOutput(0), ElementWiseOperation::kSUM);
auto l68 = convBnMish(network, weightMap, *ew67->getOutput(0), 256, 1, 1, 0, 68);
auto l69 = convBnMish(network, weightMap, *l68->getOutput(0), 256, 3, 1, 1, 69);
auto ew70 = network->addElementWise(*l69->getOutput(0), *ew67->getOutput(0), ElementWiseOperation::kSUM);
auto l71 = convBnMish(network, weightMap, *ew70->getOutput(0), 256, 1, 1, 0, 71);
auto l72 = convBnMish(network, weightMap, *l71->getOutput(0), 256, 3, 1, 1, 72);
auto ew73 = network->addElementWise(*l72->getOutput(0), *ew70->getOutput(0), ElementWiseOperation::kSUM);
auto l74 = convBnMish(network, weightMap, *ew73->getOutput(0), 256, 1, 1, 0, 74);
auto l75 = convBnMish(network, weightMap, *l74->getOutput(0), 256, 3, 1, 1, 75);
auto ew76 = network->addElementWise(*l75->getOutput(0), *ew73->getOutput(0), ElementWiseOperation::kSUM);
auto l77 = convBnMish(network, weightMap, *ew76->getOutput(0), 256, 1, 1, 0, 77);

ITensor* inputTensors78[] = {l77->getOutput(0), l50->getOutput(0)};
auto cat78 = network->addConcatenation(inputTensors78, 2);

auto l79 = convBnMish(network, weightMap, *cat78->getOutput(0), 512, 1, 1, 0, 79);
auto l80 = convBnMish(network, weightMap, *l79->getOutput(0), 1024, 3, 2, 1, 80);
auto l81 = convBnMish(network, weightMap, *l80->getOutput(0), 512, 1, 1, 0, 81);
auto l82 = l80;
auto l83 = convBnMish(network, weightMap, *l82->getOutput(0), 512, 1, 1, 0, 83);
auto l84 = convBnMish(network, weightMap, *l83->getOutput(0), 512, 1, 1, 0, 84);
auto l85 = convBnMish(network, weightMap, *l84->getOutput(0), 512, 3, 1, 1, 85);
auto ew86 = network->addElementWise(*l85->getOutput(0), *l83->getOutput(0), ElementWiseOperation::kSUM);
auto l87 = convBnMish(network, weightMap, *ew86->getOutput(0), 512, 1, 1, 0, 87);
auto l88 = convBnMish(network, weightMap, *l87->getOutput(0), 512, 3, 1, 1, 88);
auto ew89 = network->addElementWise(*l88->getOutput(0), *ew86->getOutput(0), ElementWiseOperation::kSUM);
auto l90 = convBnMish(network, weightMap, *ew89->getOutput(0), 512, 1, 1, 0, 90);
auto l91 = convBnMish(network, weightMap, *l90->getOutput(0), 512, 3, 1, 1, 91);
auto ew92 = network->addElementWise(*l91->getOutput(0), *ew89->getOutput(0), ElementWiseOperation::kSUM);
auto l93 = convBnMish(network, weightMap, *ew92->getOutput(0), 512, 1, 1, 0, 93);
auto l94 = convBnMish(network, weightMap, *l93->getOutput(0), 512, 3, 1, 1, 94);
auto ew95 = network->addElementWise(*l94->getOutput(0), *ew92->getOutput(0), ElementWiseOperation::kSUM);
auto l96 = convBnMish(network, weightMap, *ew95->getOutput(0), 512, 1, 1, 0, 96);

ITensor* inputTensors97[] = {l96->getOutput(0), l81->getOutput(0)};
auto cat97 = network->addConcatenation(inputTensors97, 2);

auto l98 = convBnMish(network, weightMap, *cat97->getOutput(0), 1024, 1, 1, 0, 98);
auto l99 = convBnMish(network, weightMap, *l98->getOutput(0), 512, 1, 1, 0, 99);
auto l100 = l98;
auto l101 = convBnMish(network, weightMap, *l100->getOutput(0), 512, 1, 1, 0, 101);
auto l102 = convBnMish(network, weightMap, *l101->getOutput(0), 512, 3, 1, 1, 102);
auto l103 = convBnMish(network, weightMap, *l102->getOutput(0), 512, 1, 1, 0, 103);

auto pool104 = network->addPoolingNd(*l103->getOutput(0), PoolingType::kMAX, DimsHW{5, 5});
pool104->setPaddingNd(DimsHW{2, 2});
pool104->setStrideNd(DimsHW{1, 1});

auto l105=l103;

auto pool106 = network->addPoolingNd(*l105->getOutput(0), PoolingType::kMAX, DimsHW{9, 9});
pool106->setPaddingNd(DimsHW{4, 4});
pool106->setStrideNd(DimsHW{1, 1});

auto l107=l103;

auto pool108 = network->addPoolingNd(*l107->getOutput(0), PoolingType::kMAX, DimsHW{13, 13});
pool108->setPaddingNd(DimsHW{6, 6});
pool108->setStrideNd(DimsHW{1, 1}); 
ITensor* inputTensors109[] = {pool108->getOutput(0), pool106->getOutput(0), pool104->getOutput(0), l103->getOutput(0)};
auto cat109 = network->addConcatenation(inputTensors109, 4);

auto l110 = convBnMish(network, weightMap, *cat109->getOutput(0), 512, 1, 1, 0, 110);
auto l111 = convBnMish(network, weightMap, *l110->getOutput(0), 512, 3, 1, 1, 111);
ITensor* inputTensors112[] = {l111->getOutput(0), l99->getOutput(0)};
auto cat112 = network->addConcatenation(inputTensors112, 2);
auto l113 = convBnMish(network, weightMap, *cat112->getOutput(0), 512, 1, 1, 0, 113);
auto l114 = convBnMish(network, weightMap, *l113->getOutput(0), 256, 1, 1, 0, 114);

float *deval = reinterpret_cast<float*>(malloc(sizeof(float) * 256 * 2 * 2));
for (int i = 0; i < 256 * 2 * 2; i++) {
    deval[i] = 1.0;
}
Weights deconvwts115{DataType::kFLOAT, deval, 256 * 2 * 2};
IDeconvolutionLayer* deconv115 = network->addDeconvolutionNd(*l114->getOutput(0), 256, DimsHW{2, 2}, deconvwts115, emptywts);
assert(deconv115);
deconv115->setStrideNd(DimsHW{2, 2});
deconv115->setNbGroups(256);
weightMap["deconv115"] = deconvwts115;
auto l116=l79;
auto l117 = convBnMish(network, weightMap, *l116->getOutput(0), 256, 1, 1, 0, 117);
ITensor* inputTensors118[] = {l117->getOutput(0), deconv115->getOutput(0)};
auto cat118 = network->addConcatenation(inputTensors118, 2);

auto l119 = convBnMish(network, weightMap, *cat118->getOutput(0), 256, 1, 1, 0, 119);
auto l120 = convBnMish(network, weightMap, *l119->getOutput(0), 256, 1, 1, 0, 120);
auto l121=l119;
auto l122 = convBnMish(network, weightMap, *l121->getOutput(0), 256, 1, 1, 0, 122);
auto l123 = convBnMish(network, weightMap, *l122->getOutput(0), 256, 3, 1, 1, 123);
auto l124 = convBnMish(network, weightMap, *l123->getOutput(0), 256, 1, 1, 0, 124);
auto l125 = convBnMish(network, weightMap, *l124->getOutput(0), 256, 3, 1, 1, 125);
ITensor* inputTensors126[] = {l125->getOutput(0), l120->getOutput(0)};
auto cat126 = network->addConcatenation(inputTensors126, 2);

auto l127 = convBnMish(network, weightMap, *cat126->getOutput(0), 256, 1, 1, 0, 127);
auto l128 = convBnMish(network, weightMap, *l127->getOutput(0), 128, 1, 1, 0, 128);

Weights deconvwts129{DataType::kFLOAT, deval, 128 * 2 * 2};
IDeconvolutionLayer* deconv129 = network->addDeconvolutionNd(*l128->getOutput(0), 128, DimsHW{2, 2}, deconvwts129, emptywts);
assert(deconv129);
deconv129->setStrideNd(DimsHW{2, 2});
deconv129->setNbGroups(128);

auto l130=l48;
auto l131 = convBnMish(network, weightMap, *l130->getOutput(0), 128, 1, 1, 0, 131);
ITensor* inputTensors132[] = {l131->getOutput(0), deconv129->getOutput(0)};
auto cat132 = network->addConcatenation(inputTensors132, 2);

auto l133 = convBnMish(network, weightMap, *cat132->getOutput(0), 128, 1, 1, 0, 133);
auto l134 = convBnMish(network, weightMap, *l133->getOutput(0), 128, 1, 1, 0, 134);
auto l135 = l133;
auto l136 = convBnMish(network, weightMap, *l135->getOutput(0), 128, 1, 1, 0, 136);
auto l137 = convBnMish(network, weightMap, *l136->getOutput(0), 128, 3, 1, 1, 137);
auto l138 = convBnMish(network, weightMap, *l137->getOutput(0), 128, 1, 1, 0, 138);
auto l139 = convBnMish(network, weightMap, *l138->getOutput(0), 128, 3, 1, 1, 139);
ITensor* inputTensors140[] = {l139->getOutput(0), l134->getOutput(0)};
auto cat140 = network->addConcatenation(inputTensors140, 2);
auto l141 = convBnMish(network, weightMap, *cat140->getOutput(0), 128, 1, 1, 0, 141);
auto l142 = convBnMish(network, weightMap, *l141->getOutput(0), 256, 3, 1, 1, 142);
IConvolutionLayer* conv143 = network->addConvolutionNd(*l142->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["module_list.143.Conv2d.weight"], weightMap["module_list.143.Conv2d.bias"]);
assert(conv143);
//144 is yolo layer
auto l145=l141;
auto l146 = convBnMish(network, weightMap, *l145->getOutput(0), 256, 3, 2, 1, 146);
ITensor* inputTensors147[] = {l146->getOutput(0), l127->getOutput(0)};
auto cat147 = network->addConcatenation(inputTensors147, 2);

auto l148 = convBnMish(network, weightMap, *cat147->getOutput(0), 256, 1, 1, 0, 148);
auto l149 = convBnMish(network, weightMap, *l148->getOutput(0), 256, 1, 1, 0, 149);
auto l150 = l148;
auto l151 = convBnMish(network, weightMap, *l150->getOutput(0), 256, 1, 1, 0, 151);
auto l152 = convBnMish(network, weightMap, *l151->getOutput(0), 256, 3, 1, 1, 152);
auto l153 = convBnMish(network, weightMap, *l152->getOutput(0), 256, 1, 1, 0, 153);
auto l154 = convBnMish(network, weightMap, *l153->getOutput(0), 256, 3, 1, 1, 154);
ITensor* inputTensors155[] = {l154->getOutput(0), l149->getOutput(0)};
auto cat155 = network->addConcatenation(inputTensors155, 2);

auto l156 = convBnMish(network, weightMap, *cat155->getOutput(0), 256, 1, 1, 0, 156);
auto l157 = convBnMish(network, weightMap, *l156->getOutput(0), 512, 3, 1, 1, 157);
IConvolutionLayer* conv158 = network->addConvolutionNd(*l157->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["module_list.158.Conv2d.weight"], weightMap["module_list.158.Conv2d.bias"]);
assert(conv158);
//159 is yolo layer
auto l160 = l156;
auto l161 = convBnMish(network, weightMap, *l160->getOutput(0), 512, 3, 2, 1, 161);
ITensor* inputTensors162[] = {l161->getOutput(0), l113->getOutput(0)};
auto cat162 = network->addConcatenation(inputTensors162, 2);

auto l163 = convBnMish(network, weightMap, *cat162->getOutput(0), 512, 1, 1, 0, 163);
auto l164 = convBnMish(network, weightMap, *l163->getOutput(0), 512, 1, 1, 0, 164);
auto l165 = l163;
auto l166 = convBnMish(network, weightMap, *l165->getOutput(0), 512, 1, 1, 0, 166);
auto l167 = convBnMish(network, weightMap, *l166->getOutput(0), 512, 3, 1, 1, 167);
auto l168 = convBnMish(network, weightMap, *l167->getOutput(0), 512, 1, 1, 0, 168);
auto l169 = convBnMish(network, weightMap, *l168->getOutput(0), 512, 3, 1, 1, 169);
ITensor* inputTensors170[] = {l169->getOutput(0), l164->getOutput(0)};
auto cat170 = network->addConcatenation(inputTensors170, 2);
auto l171 = convBnMish(network, weightMap, *cat170->getOutput(0), 512, 1, 1, 0, 171);
auto l172 = convBnMish(network, weightMap, *l171->getOutput(0), 1024, 3, 1, 1, 172);
IConvolutionLayer* conv173 = network->addConvolutionNd(*l172->getOutput(0), 3 * (Yolo::CLASS_NUM + 5), DimsHW{1, 1}, weightMap["module_list.173.Conv2d.weight"], weightMap["module_list.173.Conv2d.bias"]);
assert(conv173);
// 174 is yolo layer

auto creator = getPluginRegistry()->getPluginCreator("YoloLayer_TRT", "1");
const PluginFieldCollection* pluginData = creator->getFieldNames();
IPluginV2 *pluginObj = creator->createPlugin("yololayer", pluginData);
ITensor* inputTensors_yolo[] = {conv143->getOutput(0), conv158->getOutput(0), conv173->getOutput(0)};
auto yolo = network->addPluginV2(inputTensors_yolo, 3, *pluginObj);

yolo->getOutput(0)->setName(OUTPUT_BLOB_NAME);
std::cout << "set name out" << std::endl;
network->markOutput(*yolo->getOutput(0));

// Build engine
builder->setMaxBatchSize(maxBatchSize);
config->setMaxWorkspaceSize(16 * (1 << 20));  // 16MB
build success;
run error: set name out [12/02/2020-14:17:01] [E] [TRT] (Unnamed Layer 6) [Convolution]: kernel weights has count 2048 but 4096 was expected [12/02/2020-14:17:01] [E] [TRT] (Unnamed Layer 6) [Convolution]: count of 2048 weights in kernel, but kernel dimensions (1,1) with 64 input channels, 64 output channels and 1 groups were specified. Expected Weights count is 64 11 64 / 1 = 4096 [12/02/2020-14:17:01] [E] [TRT] Could not compute dimensions for (Unnamed Layer 6) [Convolution]_output, because the network is not valid [12/02/2020-14:17:01] [E] [TRT] Network validation failed. build out scaled_yolov4:/tensorrtx/yolov4_scaled/scaled_yolov4.cpp:532: void APIToModel(unsigned int, nvinfer1::IHostMemory**): Assertion `engine != nullptr' failed.
Thanks~
wang-xinyu / tensorrtx

scaled-yolov4 engine build,but got error #311