YOLOv5-seg support FP32/FP16/INT8 and Python/C++ API
Repairing v5-seg model failed in quantization in Int8
Detailed repair code:
A Concat operator without calculation is added in the middle of CBS with Proto structure to prevent quantization failure due to operator fusion.
static ILayer *
convBlockProto(INetworkDefinition *network, std::map<std::string, Weights> &weightMap, ITensor &input, int outch,
int ksize, int s, int g, std::string lname) {
Weights emptywts{DataType::kFLOAT, nullptr, 0};
int p = ksize / 3;
IConvolutionLayer *conv1 = network->addConvolutionNd(input, outch, DimsHW{ksize, ksize},
weightMap[lname + ".conv.weight"], emptywts);
assert(conv1);
conv1->setStrideNd(DimsHW{s, s});
conv1->setPaddingNd(DimsHW{p, p});
conv1->setNbGroups(g);
conv1->setName((lname + ".conv").c_str());
IScaleLayer *bn1 = addBatchNorm2d(network, weightMap, *conv1->getOutput(0), lname + ".bn", 1e-3);
assert(bn1);
bn1->setName((lname + ".bn").c_str());
// This concat operator is not used for calculation, in order to prevent the operator fusion unrealized error when int8 is quantized.
// Error Code 10: Internal Error (Could not find any implementation for node
// model.24.proto.cv3.conv + model.24.proto.cv3.bn + PWN(PWN(model.24.proto.cv3.sigmoid), PWN(model.24.proto.cv3.silu)).)
#if defined(USE_INT8)
ITensor *inputTensors[] = {bn1->getOutput(0)};
auto concat = network->addConcatenation(inputTensors, 1);
// silu = x * sigmoid
auto sig = network->addActivation(*concat->getOutput(0), ActivationType::kSIGMOID);
assert(sig);
sig->setName((lname + ".sigmoid").c_str());
auto ew = network->addElementWise(*concat->getOutput(0), *sig->getOutput(0), ElementWiseOperation::kPROD);
assert(ew);
ew->setName((lname + ".silu").c_str());
#else
// silu = x * sigmoid
auto sig = network->addActivation(*bn1->getOutput(0), ActivationType::kSIGMOID);
assert(sig);
sig->setName((lname + ".sigmoid").c_str());
auto ew = network->addElementWise(*bn1->getOutput(0), *sig->getOutput(0), ElementWiseOperation::kPROD);
assert(ew);
ew->setName((lname + ".silu").c_str());
#endif
return ew;
}
static ILayer *
Proto(INetworkDefinition *network, std::map<std::string, Weights> &weightMap, ITensor &input, int c_, int c2,
std::string lname) {
auto cv1 = convBlockProto(network, weightMap, input, c_, 3, 1, 1, lname + ".cv1");
auto upsample = network->addResize(*cv1->getOutput(0));
assert(upsample);
upsample->setResizeMode(nvinfer1::InterpolationMode::kNEAREST);
const float scales[] = {1, 1, 2, 2};
upsample->setScales(scales, 4);
auto cv2 = convBlockProto(network, weightMap, *upsample->getOutput(0), c_, 3, 1, 1, lname + ".cv2");
auto cv3 = convBlockProto(network, weightMap, *cv2->getOutput(0), c2, 1, 1, 1, lname + ".cv3");
assert(cv3);
return cv3;
}
Detailed repair code: A Concat operator without calculation is added in the middle of CBS with Proto structure to prevent quantization failure due to operator fusion.