Torchscript model to pnnx model error

error log | 日志或报错信息 | ログ

manman@ubuntu:~/docker_data/python-workspace$ ./install/pnnx scripted_vgg16netvlad.pt inputshape=[1,3,480,640]
pnnxparam = scripted_vgg16netvlad.pnnx.param
pnnxbin = scripted_vgg16netvlad.pnnx.bin
pnnxpy = scripted_vgg16netvlad_pnnx.py
pnnxonnx = scripted_vgg16netvlad.pnnx.onnx
ncnnparam = scripted_vgg16netvlad.ncnn.param
ncnnbin = scripted_vgg16netvlad.ncnn.bin
ncnnpy = scripted_vgg16netvlad_ncnn.py
fp16 = 1
optlevel = 2
device = cpu
inputshape = [1,3,480,640]f32
inputshape2 = 
customop = 
moduleop = 
############# pass_level0
inline function normalize
inline function _get_softmax_dim
inline function softmax
inline function normalize
inline function normalize
inline function normalize
inline module = ibl.models.netvlad.NetVLAD
inline module = ibl.models.vgg.VGG
inline function normalize
inline function _get_softmax_dim
inline function softmax
inline function normalize
inline function normalize
inline function normalize
inline module = ibl.models.netvlad.NetVLAD
inline module = ibl.models.vgg.VGG

----------------

############# pass_level1
no attribute value
%19 : Function = prim::Constant[name="normalize"]()

Segmentation fault (core dumped)

model | 模型 | モデル

original model

class VGG(nn.Module):
__factory = {
    16: torchvision.models.vgg16,
}

__fix_layers = { # vgg16
    'conv5':24,
    'conv4':17,
    'conv3':10,
    'conv2':5,
    'full':0
}

def __init__(self, depth, pretrained=True, cut_at_pooling=False,
                train_layers='conv5', matconvnet=None):
    super(VGG, self).__init__()
    self.pretrained = pretrained
    self.depth = depth
    self.cut_at_pooling = cut_at_pooling
    self.train_layers = train_layers
    self.feature_dim = 512
    self.matconvnet = matconvnet
    # Construct base (pretrained) resnet
    if depth not in VGG.__factory:
        raise KeyError("Unsupported depth:", depth)
    vgg = VGG.__factory[depth](pretrained=pretrained)
    layers = list(vgg.features.children())[:-2]
    self.base = nn.Sequential(*layers) # capture only feature part and remove last relu and maxpool
    self.gap = nn.AdaptiveMaxPool2d(1)

    self._init_params()

    if not pretrained:
        self.reset_params()
    else:
        layers = list(self.base.children())
        for l in layers[:VGG.__fix_layers[train_layers]]:
            for p in l.parameters():
                p.requires_grad = False

def _init_params(self):
    # optional load pretrained weights from matconvnet
    if (self.matconvnet is not None):
        self.base.load_state_dict(torch.load(self.matconvnet))
        self.pretrained = True

def forward(self, x):
    x = self.base(x)

    if self.cut_at_pooling:
        return x, x

    pool_x = self.gap(x)
    pool_x = pool_x.view(pool_x.size(0), -1)

    return pool_x, x

def reset_params(self):
    for m in self.modules():
        if isinstance(m, nn.Conv2d):
            init.kaiming_normal_(m.weight, mode='fan_out')
            if m.bias is not None:
                init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm2d):
            init.constant_(m.weight, 1)
            init.constant_(m.bias, 0)
        elif isinstance(m, nn.BatchNorm1d):
            init.constant_(m.weight, 1)
            init.constant_(m.bias, 0)
        elif isinstance(m, nn.Linear):
            init.normal_(m.weight, std=0.001)
            if m.bias is not None:
                init.constant_(m.bias, 0)

class NetVLAD(nn.Module):
    """NetVLAD layer implementation"""

    def __init__(self, num_clusters=64, dim=512, alpha=100.0, normalize_input=True):
        """
        Args:
            num_clusters : int
                The number of clusters
            dim : int
                Dimension of descriptors
            alpha : float
                Parameter of initialization. Larger value is harder assignment.
            normalize_input : bool
                If true, descriptor-wise L2 normalization is applied to input.
        """
        super(NetVLAD, self).__init__()
        self.num_clusters = num_clusters
        self.dim = dim
        self.alpha = alpha
        self.normalize_input = normalize_input
        self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=False)
        self.centroids = nn.Parameter(torch.rand(num_clusters, dim), requires_grad=True)

        self.clsts = None
        self.traindescs = None

    def _init_params(self):
        clstsAssign = self.clsts / np.linalg.norm(self.clsts, axis=1, keepdims=True)
        dots = np.dot(clstsAssign, self.traindescs.T)
        dots.sort(0)
        dots = dots[::-1, :] # sort, descending

        self.alpha = (-np.log(0.01) / np.mean(dots[0,:] - dots[1,:])).item()
        self.centroids.data.copy_(torch.from_numpy(self.clsts))
        self.conv.weight.data.copy_(torch.from_numpy(self.alpha*clstsAssign).unsqueeze(2).unsqueeze(3))

    def forward(self, x):
        N, C = x.shape[:2]
        if self.normalize_input:
            x = F.normalize(x, p=2., dim=1)  # across descriptor dim

        # soft-assignment
        soft_assign = self.conv(x).view(N, self.num_clusters, -1)
        soft_assign = F.softmax(soft_assign, dim=1)

        x_flatten = x.view(N, C, -1)

        # calculate residuals to each clusters in one loop
        residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
        residual *= soft_assign.unsqueeze(2)
        vlad = residual.sum(dim=-1)

        return vlad

class EmbedNetPCA(nn.Module):
    def __init__(self, base_model, net_vlad, dim=4096):
        super(EmbedNetPCA, self).__init__()
        self.base_model = base_model
        self.net_vlad = net_vlad
        self.pca_layer = nn.Conv2d(net_vlad.num_clusters*net_vlad.dim, dim, 1, stride=1, padding=0)

    def _init_params(self):
        self.base_model._init_params()
        self.net_vlad._init_params()

    def forward(self, x):
        _, x = self.base_model(x)
        vlad_x = self.net_vlad(x)

        # [IMPORTANT] normalize
        vlad_x = F.normalize(vlad_x, p=2., dim=2)  # intra-normalization
        vlad_x = vlad_x.view(x.size(0), -1)  # flatten
        vlad_x = F.normalize(vlad_x, p=2., dim=1)  # L2 normalize

        # reduction
        N, D = vlad_x.size()
        vlad_x = vlad_x.view(N, D, 1, 1)
        vlad_x = self.pca_layer(vlad_x).view(N, -1)
        vlad_x = F.normalize(vlad_x, p=2., dim=-1)  # L2 normalize

        return vlad_x

how to reproduce | 复现步骤 | 再現方法

1.构建网络

def vgg16_netvlad(pretrained=False):
    base_model = models.create('vgg16', pretrained=False)
    pool_layer = models.create('netvlad', dim=base_model.feature_dim)
    model = models.create('embednetpca', base_model, pool_layer)
    if pretrained:
        model.load_state_dict(torch.hub.load_state_dict_from_url('https://github.com/yxgeee/OpenIBL/releases/download/v0.1.0-beta/vgg16_netvlad.pth', map_location=torch.device('cpu')))
    return model

大大，我下载的网络是来自于SFRS: Self-supervising Fine-grained Region Similarities for Large-scale Image Localization (ECCV'20 Spotlight)开源；将torch model转换为pytorch script的过程出现一些格式的错误，根据提示修改了以下代码： 20231225-175015 接着生成pytorch_script文件

model = torch.hub.load('yxgeee/OpenIBL', 'vgg16_netvlad', pretrained=True).eval()
# Script the model
scripted_model = torch.jit.script(model)
# Save the scripted model to a file
scripted_model.save("scripted_vgg16netvlad.pt")

@2、转pnnx时，出现上面的误差；请大佬帮我看看

Tencent / ncnn

Torchscript model to pnnx model error #5247

error log | 日志或报错信息 | ログ

model | 模型 | モデル

how to reproduce | 复现步骤 | 再現方法