traveller59 / spconv

Spatial Sparse Convolution Library
Apache License 2.0
1.89k stars 366 forks source link

RuntimeError: N == input.dim(0) assert faild. error BUT can‘t solve by existing solution #663

Open Christic9 opened 1 year ago

Christic9 commented 1 year ago

Hi, Thank you for the awesome work! I have encoutered the same question as > https://github.com/traveller59/spconv/issues/460.BUT i don't use decoder blocks. Here is my spconv class code:

class SparseConvNet(nn.Module):
    def __init__(self,num_channels):
        super(SparseConvNet, self).__init__()

        self.conv0 = double_conv(num_channels, 16, 'subm0')
        self.down0 = stride_conv(16, 32, 'down0')

        self.conv1 = double_conv(32, 32, 'subm1')
        self.down1 = stride_conv(32, 64, 'down1')

        self.conv2 = triple_conv(64, 64, 'subm2')
        self.down2 = stride_conv(64, 128, 'down2')

        self.conv3 = triple_conv(128, 128, 'subm3')
        self.down3 = stride_conv(128, 128, 'down3')

        self.conv4 = triple_conv(128, 128, 'subm4')

    def forward(self, x):
        net = self.conv0(x)
        net = self.down0(net)

        net = self.conv1(net)
        net1 = net.dense()
        net = self.down1(net)

        net = self.conv2(net)
        net2 = net.dense()
        net = self.down2(net)

        net = self.conv3(net)
        net3 = net.dense()
        net = self.down3(net)

        net = self.conv4(net)
        net4 = net.dense()

        volumes = [net1, net2, net3, net4]

        return volumes

def single_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          1,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def double_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def triple_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def stride_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SparseConv3d(in_channels,
                            out_channels,
                            3,
                            2,
                            padding=1,
                            bias=False,
                            indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), nn.ReLU())

Now I'm basically confirm the issue is caused by stride_conv block in loss backwards phase. So how to solve this error?Appreciate your time!

Here is my error infor:

Traceback (most recent call last):
  File "E:\anaconda\envs\neuralbody\lib\site-packages\torch\autograd\function.py", line 399, in wrapper
    outputs = fn(ctx, *args)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\torch\cuda\amp\autocast_mode.py", line 135, in decorate_bwd
    return bwd(*args, **kwargs)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\functional.py", line 283, in backward
    raise e
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\functional.py", line 273, in backward
    fp32_accum=fp32_accum)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\ops.py", line 1739, in implicit_gemm_backward
    use_tf32=constants.SPCONV_ALLOW_TF32)
RuntimeError: d:\a\spconv\spconv\build\temp.win-amd64-cpython-37\release\spconv\build\core_cc\src\cumm\conv\main\convmainunittest\convmainunittest_matmul_split_simt_f32f32f32_0.cu(227)
N == input.dim(0) assert faild. error

Process finished with exit code 1
LipLipGo commented 8 months ago

Hi, Thank you for the awesome work! I have encoutered the same question as > #460 i don't use decoder blocks. Here is my spconv class code:

class SparseConvNet(nn.Module):
    def __init__(self,num_channels):
        super(SparseConvNet, self).__init__()

        self.conv0 = double_conv(num_channels, 16, 'subm0')
        self.down0 = stride_conv(16, 32, 'down0')

        self.conv1 = double_conv(32, 32, 'subm1')
        self.down1 = stride_conv(32, 64, 'down1')

        self.conv2 = triple_conv(64, 64, 'subm2')
        self.down2 = stride_conv(64, 128, 'down2')

        self.conv3 = triple_conv(128, 128, 'subm3')
        self.down3 = stride_conv(128, 128, 'down3')

        self.conv4 = triple_conv(128, 128, 'subm4')

    def forward(self, x):
        net = self.conv0(x)
        net = self.down0(net)

        net = self.conv1(net)
        net1 = net.dense()
        net = self.down1(net)

        net = self.conv2(net)
        net2 = net.dense()
        net = self.down2(net)

        net = self.conv3(net)
        net3 = net.dense()
        net = self.down3(net)

        net = self.conv4(net)
        net4 = net.dense()

        volumes = [net1, net2, net3, net4]

        return volumes

def single_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          1,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def double_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def triple_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SubMConv3d(in_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
        spconv.SubMConv3d(out_channels,
                          out_channels,
                          3,
                          bias=False,
                          indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01),
        nn.ReLU(),
    )

def stride_conv(in_channels, out_channels, indice_key=None):
    return spconv.SparseSequential(
        spconv.SparseConv3d(in_channels,
                            out_channels,
                            3,
                            2,
                            padding=1,
                            bias=False,
                            indice_key=indice_key),
        nn.BatchNorm1d(out_channels, eps=1e-3, momentum=0.01), nn.ReLU())

Now I'm basically confirm the issue is caused by stride_conv block in loss backwards phase. So how to solve this error?Appreciate your time!

Here is my error infor:

Traceback (most recent call last):
  File "E:\anaconda\envs\neuralbody\lib\site-packages\torch\autograd\function.py", line 399, in wrapper
    outputs = fn(ctx, *args)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\torch\cuda\amp\autocast_mode.py", line 135, in decorate_bwd
    return bwd(*args, **kwargs)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\functional.py", line 283, in backward
    raise e
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\functional.py", line 273, in backward
    fp32_accum=fp32_accum)
  File "E:\anaconda\envs\neuralbody\lib\site-packages\spconv\pytorch\ops.py", line 1739, in implicit_gemm_backward
    use_tf32=constants.SPCONV_ALLOW_TF32)
RuntimeError: d:\a\spconv\spconv\build\temp.win-amd64-cpython-37\release\spconv\build\core_cc\src\cumm\conv\main\convmainunittest\convmainunittest_matmul_split_simt_f32f32f32_0.cu(227)
N == input.dim(0) assert faild. error

Process finished with exit code 1

Hi,did you have solved this question? I have met the same error as yours!

JY9898 commented 6 months ago

Hi,did you have solved this question? I have met the same error as yours!