Node name mismatch when using sliced tensor

When using a network that splits the tensor at some point in the network into two pieces, I run into a problem. The torch.split operator is not supported, so I rewrote it using two slices:

b = a[:, :48, :, :]
c = a[:, 48:, :, :]

This works, however, as soon as you try to use the new tensor (by applying a convolution), you get an error in the compilation step (quantization succeeds):

[VAIQ_NOTE]: OS and CPU information:
               system --- Linux
                 node --- 9a3b203220b2
              release --- 5.4.0-122-generic
              version --- #138~18.04.1-Ubuntu SMP Fri Jun 24 14:14:03 UTC 2022
              machine --- x86_64
            processor --- x86_64

[VAIQ_NOTE]: Tools version information:
                  GCC --- GCC 9.4.0
               python --- 3.7.12
              pytorch --- 1.12.1
        vai_q_pytorch --- 3.0.0+a44284e+torch1.12.1

[VAIQ_NOTE]: GPU information:
          device name --- NVIDIA RTX A4000
     device available --- True
         device count --- 1
       current device --- 0

... (snipped) ...
[VAIQ_NOTE]: Find subgraph for eltwise_fix_24:
node name:Model::Model/570, op type:nndct_elemwise_add, output shape: [1, 288, 288, 48]

I20230404 08:09:28.695523  7290 compile_pass_manager.cpp:287] [UNILOG][INFO] Compile mode: dpu
I20230404 08:09:28.695554  7290 compile_pass_manager.cpp:288] [UNILOG][INFO] Debug mode: null
I20230404 08:09:28.695559  7290 compile_pass_manager.cpp:292] [UNILOG][INFO] Target architecture: DPUCZDX8G_ISA1_B1024
I20230404 08:09:28.695672  7290 compile_pass_manager.cpp:357] [UNILOG][INFO] Graph name: nndct_elemwise_add_ZL2UzPahbf8XI1r5, with op num: 6
I20230404 08:09:28.695683  7290 compile_pass_manager.cpp:370] [UNILOG][INFO] Begin to compile...
I20230404 08:09:28.698843  7290 compile_pass_manager.cpp:381] [UNILOG][INFO] Total device subgraph number 4, DPU subgraph number 1
I20230404 08:09:28.698869  7290 compile_pass_manager.cpp:386] [UNILOG][INFO] Compile done.

[VAIQ_ERROR][QUANTIZER_TORCH_CALIB_RESULT_MISMATCH]: Node name mismatch is found when loading quantization steps of tensors. Please make sure Vai_q_pytorch version and pytorch version for test mode are the same as those in calibration (or QAT training) mode.

In the example script below, the problem occurs on the line with t_213, applying the convolution to the sliced tensor.

Here is a minimal script to reproduce the problem. Run with:

python vitis_min.py
python vitis_min.py --quant_mode test --export --target DPUCZDX8G_ISA1_B1024

import argparse
import numpy as np
import torch
import torch.nn as nn

def randdata(size):
    return nn.Parameter(torch.from_numpy(np.random.randn(*size).astype(np.float32)), requires_grad=False)

class Model(nn.Module):
  def __init__(self):
    super(Model, self).__init__()
    self.n_Conv_0 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 48, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [2, 2], 'in_channels': 3, 'bias': True})
    self.n_Conv_0.weight.data = randdata([48, 3, 3, 3])
    self.n_Conv_0.bias.data = randdata([48])
    self.n_Conv_3 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 96, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [2, 2], 'in_channels': 48, 'bias': True})
    self.n_Conv_3.weight.data = randdata([96, 48, 3, 3])
    self.n_Conv_3.bias.data = randdata([96])
    self.n_Conv_6 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 96, 'padding': [0, 0], 'kernel_size': (1, 1), 'stride': [1, 1], 'in_channels': 96, 'bias': True})
    self.n_Conv_6.weight.data = randdata([96, 96, 1, 1])
    self.n_Conv_6.bias.data = randdata([96])
    self.n_Conv_19 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 48, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [1, 1], 'in_channels': 48, 'bias': True})
    self.n_Conv_19.weight.data = randdata([48, 48, 3, 3])
    self.n_Conv_19.bias.data = randdata([48])
    self.n_Conv_22 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 48, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [1, 1], 'in_channels': 48, 'bias': True})
    self.n_Conv_22.weight.data = randdata([48, 48, 3, 3])
    self.n_Conv_22.bias.data = randdata([48])
    self.n_Conv_26 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 48, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [1, 1], 'in_channels': 48, 'bias': True})
    self.n_Conv_26.weight.data = randdata([48, 48, 3, 3])
    self.n_Conv_26.bias.data = randdata([48])
    self.n_Conv_29 = nn.Conv2d(**{'groups': 1, 'dilation': [1, 1], 'out_channels': 48, 'padding': [1, 1], 'kernel_size': (3, 3), 'stride': [1, 1], 'in_channels': 48, 'bias': True})
    self.n_Conv_29.weight.data = randdata([48, 48, 3, 3])
    self.n_Conv_29.bias.data = randdata([48])

  def forward(self, *inputs):
    images, = inputs
    t_194 = self.n_Conv_0(images)
    t_195 = torch.sigmoid(t_194)
    t_196 = torch.mul(t_194, t_195)
    t_197 = self.n_Conv_3(t_196)
    t_198 = torch.sigmoid(t_197)
    t_199 = torch.mul(t_197, t_198)
    t_200 = self.n_Conv_6(t_199)
    t_201 = torch.sigmoid(t_200)
    t_202 = torch.mul(t_200, t_201)
    #return t_202 #OK
    t_207 = t_202[:, :48, :, :]
    #return t_207 #OK
    t_212 = t_202[:, 48:, :, :]
    #return t_207, t_212 #OK
    t_213 = self.n_Conv_19(t_212)
    #return t_213 #fail
    t_214 = torch.sigmoid(t_213)
    t_215 = torch.mul(t_213, t_214)
    t_216 = self.n_Conv_22(t_215)
    t_217 = torch.sigmoid(t_216)
    t_218 = torch.mul(t_216, t_217)
    t_219 = torch.add(t_212, t_218)
    t_220 = self.n_Conv_26(t_219)
    t_221 = torch.sigmoid(t_220)
    t_222 = torch.mul(t_220, t_221)
    t_223 = self.n_Conv_29(t_222)
    t_224 = torch.sigmoid(t_223)
    t_225 = torch.mul(t_223, t_224)
    t_226 = torch.add(t_219, t_225)
    t_227 = torch.cat((t_207, t_212, t_219, t_226), **{'dim': 1})
    return t_227

@torch.no_grad()
def test_run_model(inputs=[torch.from_numpy(np.random.randn(*[1, 3, 1152, 1152]).astype(np.float32))]):
  model = Model()
  model.eval()
  rs = model(*inputs)
  print(rs)
  return rs

model = Model()
print(model)

for p in model.parameters():
    p.requires_grad = False
model.eval()
model.float()
model.cuda()

example_inputs = tuple([torch.rand(1, 3, 1152, 1152).cuda()])

from pytorch_nndct.apis import torch_quantizer, dump_xmodel

def quantization(args):
    if args.quant_mode != 'test' and args.deploy:
        args.deploy = False
        print(r'Warning: Exporting xmodel needs to be done in quantization test mode, turn off it in this running!')
    if args.deploy and (args.batch_size != 1 or args.subset_len != 1):
        print(r'Warning: Exporting xmodel needs batch size to be 1 and only 1 iteration of inference, change them automatically!')
        args.batch_size = 1
        args.subset_len = 1

    quantizer = torch_quantizer(args.quant_mode, model, example_inputs, quant_config_file=args.config_file, target=args.target)
    quant_model = quantizer.quant_model

    # push some data through the model
    for i in range(args.subset_len):
        calib_input = [torch.rand(1, 3, 1152, 1152).cuda()]
        y = quant_model(calib_input)

    if args.quant_mode == 'calib':
        quantizer.export_quant_config()
    if args.deploy:
        #quantizer.export_torch_script()
        #quantizer.export_onnx_model()
        quantizer.export_xmodel(deploy_check=False)

if __name__ == "__main__":
    parser = argparse.ArgumentParser()

    parser.add_argument('--config_file',    default=None,    help='quantization configuration file')
    parser.add_argument('--subset_len',    default=100,    type=int,    help='subset_len to evaluate model, using the whole validation dataset if it is not set')
    parser.add_argument('--batch_size',    default=32,    type=int,    help='input data batch size to evaluate model')
    parser.add_argument('--quant_mode',     default='calib',     choices=['calib', 'test'],     help='quantization mode. calib: quantize, test: evaluate quantized model')
    parser.add_argument('--deploy',     dest='deploy',    action='store_true',    help='export xmodel for deployment')
    parser.add_argument('--target',     dest='target',    nargs="?",    const="",    help='specify target device')

    args, _ = parser.parse_known_args()
    quantization(args)

Xilinx / Vitis-AI

Node name mismatch when using sliced tensor #1216