Tencent / ncnn

ncnn is a high-performance neural network inference framework optimized for the mobile platform
Other
20.35k stars 4.16k forks source link

Torchscript model to pnnx model error #5247

Closed manmantang closed 9 months ago

manmantang commented 10 months ago

error log | 日志或报错信息 | ログ

manman@ubuntu:~/docker_data/python-workspace$ ./install/pnnx scripted_vgg16netvlad.pt inputshape=[1,3,480,640]
pnnxparam = scripted_vgg16netvlad.pnnx.param
pnnxbin = scripted_vgg16netvlad.pnnx.bin
pnnxpy = scripted_vgg16netvlad_pnnx.py
pnnxonnx = scripted_vgg16netvlad.pnnx.onnx
ncnnparam = scripted_vgg16netvlad.ncnn.param
ncnnbin = scripted_vgg16netvlad.ncnn.bin
ncnnpy = scripted_vgg16netvlad_ncnn.py
fp16 = 1
optlevel = 2
device = cpu
inputshape = [1,3,480,640]f32
inputshape2 = 
customop = 
moduleop = 
############# pass_level0
inline function normalize
inline function _get_softmax_dim
inline function softmax
inline function normalize
inline function normalize
inline function normalize
inline module = ibl.models.netvlad.NetVLAD
inline module = ibl.models.vgg.VGG
inline function normalize
inline function _get_softmax_dim
inline function softmax
inline function normalize
inline function normalize
inline function normalize
inline module = ibl.models.netvlad.NetVLAD
inline module = ibl.models.vgg.VGG

----------------

############# pass_level1
no attribute value
%19 : Function = prim::Constant[name="normalize"]()

Segmentation fault (core dumped)

model | 模型 | モデル

  1. original model

    class VGG(nn.Module):
    __factory = {
        16: torchvision.models.vgg16,
    }
    
    __fix_layers = { # vgg16
        'conv5':24,
        'conv4':17,
        'conv3':10,
        'conv2':5,
        'full':0
    }
    
    def __init__(self, depth, pretrained=True, cut_at_pooling=False,
                    train_layers='conv5', matconvnet=None):
        super(VGG, self).__init__()
        self.pretrained = pretrained
        self.depth = depth
        self.cut_at_pooling = cut_at_pooling
        self.train_layers = train_layers
        self.feature_dim = 512
        self.matconvnet = matconvnet
        # Construct base (pretrained) resnet
        if depth not in VGG.__factory:
            raise KeyError("Unsupported depth:", depth)
        vgg = VGG.__factory[depth](pretrained=pretrained)
        layers = list(vgg.features.children())[:-2]
        self.base = nn.Sequential(*layers) # capture only feature part and remove last relu and maxpool
        self.gap = nn.AdaptiveMaxPool2d(1)
    
        self._init_params()
    
        if not pretrained:
            self.reset_params()
        else:
            layers = list(self.base.children())
            for l in layers[:VGG.__fix_layers[train_layers]]:
                for p in l.parameters():
                    p.requires_grad = False
    
    def _init_params(self):
        # optional load pretrained weights from matconvnet
        if (self.matconvnet is not None):
            self.base.load_state_dict(torch.load(self.matconvnet))
            self.pretrained = True
    
    def forward(self, x):
        x = self.base(x)
    
        if self.cut_at_pooling:
            return x, x
    
        pool_x = self.gap(x)
        pool_x = pool_x.view(pool_x.size(0), -1)
    
        return pool_x, x
    
    def reset_params(self):
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                init.kaiming_normal_(m.weight, mode='fan_out')
                if m.bias is not None:
                    init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm2d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.BatchNorm1d):
                init.constant_(m.weight, 1)
                init.constant_(m.bias, 0)
            elif isinstance(m, nn.Linear):
                init.normal_(m.weight, std=0.001)
                if m.bias is not None:
                    init.constant_(m.bias, 0)
class NetVLAD(nn.Module):
    """NetVLAD layer implementation"""

    def __init__(self, num_clusters=64, dim=512, alpha=100.0, normalize_input=True):
        """
        Args:
            num_clusters : int
                The number of clusters
            dim : int
                Dimension of descriptors
            alpha : float
                Parameter of initialization. Larger value is harder assignment.
            normalize_input : bool
                If true, descriptor-wise L2 normalization is applied to input.
        """
        super(NetVLAD, self).__init__()
        self.num_clusters = num_clusters
        self.dim = dim
        self.alpha = alpha
        self.normalize_input = normalize_input
        self.conv = nn.Conv2d(dim, num_clusters, kernel_size=(1, 1), bias=False)
        self.centroids = nn.Parameter(torch.rand(num_clusters, dim), requires_grad=True)

        self.clsts = None
        self.traindescs = None

    def _init_params(self):
        clstsAssign = self.clsts / np.linalg.norm(self.clsts, axis=1, keepdims=True)
        dots = np.dot(clstsAssign, self.traindescs.T)
        dots.sort(0)
        dots = dots[::-1, :] # sort, descending

        self.alpha = (-np.log(0.01) / np.mean(dots[0,:] - dots[1,:])).item()
        self.centroids.data.copy_(torch.from_numpy(self.clsts))
        self.conv.weight.data.copy_(torch.from_numpy(self.alpha*clstsAssign).unsqueeze(2).unsqueeze(3))

    def forward(self, x):
        N, C = x.shape[:2]
        if self.normalize_input:
            x = F.normalize(x, p=2., dim=1)  # across descriptor dim

        # soft-assignment
        soft_assign = self.conv(x).view(N, self.num_clusters, -1)
        soft_assign = F.softmax(soft_assign, dim=1)

        x_flatten = x.view(N, C, -1)

        # calculate residuals to each clusters in one loop
        residual = x_flatten.expand(self.num_clusters, -1, -1, -1).permute(1, 0, 2, 3) - \
            self.centroids.expand(x_flatten.size(-1), -1, -1).permute(1, 2, 0).unsqueeze(0)
        residual *= soft_assign.unsqueeze(2)
        vlad = residual.sum(dim=-1)

        return vlad
class EmbedNetPCA(nn.Module):
    def __init__(self, base_model, net_vlad, dim=4096):
        super(EmbedNetPCA, self).__init__()
        self.base_model = base_model
        self.net_vlad = net_vlad
        self.pca_layer = nn.Conv2d(net_vlad.num_clusters*net_vlad.dim, dim, 1, stride=1, padding=0)

    def _init_params(self):
        self.base_model._init_params()
        self.net_vlad._init_params()

    def forward(self, x):
        _, x = self.base_model(x)
        vlad_x = self.net_vlad(x)

        # [IMPORTANT] normalize
        vlad_x = F.normalize(vlad_x, p=2., dim=2)  # intra-normalization
        vlad_x = vlad_x.view(x.size(0), -1)  # flatten
        vlad_x = F.normalize(vlad_x, p=2., dim=1)  # L2 normalize

        # reduction
        N, D = vlad_x.size()
        vlad_x = vlad_x.view(N, D, 1, 1)
        vlad_x = self.pca_layer(vlad_x).view(N, -1)
        vlad_x = F.normalize(vlad_x, p=2., dim=-1)  # L2 normalize

        return vlad_x

how to reproduce | 复现步骤 | 再現方法

1.构建网络

def vgg16_netvlad(pretrained=False):
    base_model = models.create('vgg16', pretrained=False)
    pool_layer = models.create('netvlad', dim=base_model.feature_dim)
    model = models.create('embednetpca', base_model, pool_layer)
    if pretrained:
        model.load_state_dict(torch.hub.load_state_dict_from_url('https://github.com/yxgeee/OpenIBL/releases/download/v0.1.0-beta/vgg16_netvlad.pth', map_location=torch.device('cpu')))
    return model

大大,我下载的网络是来自于SFRS: Self-supervising Fine-grained Region Similarities for Large-scale Image Localization (ECCV'20 Spotlight)开源; 将torch model转换为pytorch script的过程出现一些格式的错误,根据提示修改了以下代码: 20231225-175015 接着生成pytorch_script文件

model = torch.hub.load('yxgeee/OpenIBL', 'vgg16_netvlad', pretrained=True).eval()
# Script the model
scripted_model = torch.jit.script(model)
# Save the scripted model to a file
scripted_model.save("scripted_vgg16netvlad.pt")

@2、转pnnx时,出现上面的误差;请大佬帮我看看

manmantang commented 10 months ago

@nihui 求大佬帮我看一下

nihui commented 9 months ago

https://github.com/Tencent/ncnn/pull/5272