mit-han-lab / torchsparse

[MICRO'23, MLSys'22] TorchSparse: Efficient Training and Inference Framework for Sparse Convolution on GPUs.
https://torchsparse.mit.edu
MIT License
1.15k stars 131 forks source link

loss.backward() shows runtime error #266

Open akshay-antony opened 7 months ago

akshay-antony commented 7 months ago

Is there an existing issue for this?

Current Behavior

File "torchsparse/nn/functional/conv/func/implicit_gemm.pyx", line 160, in torchsparse.nn.functional.conv.func.implicit_gemm.ImplicitGEMMConvolutionFuntion.backward RuntimeError: shape '[27, 32, 4]' is invalid for input of size 27648

Expected Behavior

No response

Environment

- GCC:
- NVCC:
- PyTorch:
- PyTorch CUDA:
- TorchSparse:

Anything else?

No response

ys-2020 commented 7 months ago

Could you please provide more details or a short code snippet to reproduce this error? Thank you!

ChenThree commented 7 months ago

check the in_channels in your network? 27648=27x32x32, seems that the in_channels should be 32 other than 4?

akshay-antony commented 7 months ago

import os import numpy as np import torch import torch.nn as nn import torchsparse import yaml import torchsparse.nn.functional as F

F.set_kmap_mode("hashmap")

from torchsparse.backbones.unet import SparseResUNet

class SparseSigmoid(nn.Module): def init(self): super(SparseSigmoid, self).init()

def forward(self, input):
    # input: SparseTensor with coordinates and features
    # output: SparseTensor with coordinates and features
    input.F = torch.sigmoid(input.F)
    return input

class AttentiveFeatureFusionTorchSparse(nn.Module): def init(self, config: dict ) -> None: super(AttentiveFeatureFusionTorchSparse, self).init() self.config = config self.point_features_layer = nn.ModuleList() self.medium_scale_voxel_features_layer = nn.ModuleList() self.large_scale_voxel_features_layer = nn.ModuleList() self.attention_layer = nn.ModuleList() self.pi_layer = nn.ModuleList() self.fusion_layer = nn.ModuleList()

    for i in range(len(self.config['point_features_dims'])-1):
        self.point_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['point_features_dims'][i],
                        out_channels=self.config['point_features_dims'][i+1],
                        kernel_size=self.config['point_features_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['point_features_dims'][i+1]),
                    torchsparse.nn.ReLU()
                )
        )

    for i in range(len(self.config['medium_scale_voxel_features_dims'])-1):
        self.medium_scale_voxel_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['medium_scale_voxel_features_dims'][i],
                        out_channels=self.config['medium_scale_voxel_features_dims'][i+1],
                        kernel_size=self.config['medium_scale_voxel_features_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['medium_scale_voxel_features_dims'][i+1]),
                    torchsparse.nn.ReLU()
                )
        )

    for i in range(len(self.config['large_scale_voxel_features_dims'])-1):    
        self.large_scale_voxel_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['large_scale_voxel_features_dims'][i],
                        out_channels=self.config['large_scale_voxel_features_dims'][i+1],
                        kernel_size=self.config['large_scale_voxel_features_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['large_scale_voxel_features_dims'][i+1]),
                    torchsparse.nn.ReLU()
            )
        )

    for i in range(len(self.config['attention_dims'])-1):
        self.attention_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['fusion_dims'][i],
                        out_channels=self.config['fusion_dims'][i+1],
                        kernel_size=self.config['fusion_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['fusion_dims'][i+1]),
                    torchsparse.nn.ReLU()
                )   
        )

    for i in range(len(self.config['pi_dims'])-1):
        self.pi_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['pi_dims'][i],
                        out_channels=self.config['pi_dims'][i+1],
                        kernel_size=self.config['pi_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['pi_dims'][i+1]),
                    SparseSigmoid()
                )
        )

    for i in range(len(self.config['fusion_dims'])-1):
        self.fusion_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['fusion_dims'][i],
                        out_channels=self.config['fusion_dims'][i+1],
                        kernel_size=self.config['fusion_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['fusion_dims'][i+1]),
                    torchsparse.nn.ReLU()
                )
        )

def forward(self,
            input: torchsparse.SparseTensor
            ) -> dict:
    # input: SparseTensor with coordinates and features
    point_output = torchsparse.SparseTensor(feats=input.F.clone(), 
                                            coords=input.C.clone()).to('cuda:0')
    medium_scale_voxel_output = torchsparse.SparseTensor(feats=input.F.clone(), 
                                                         coords=input.C.clone()).to('cuda:0')
    large_scale_voxel_output = torchsparse.SparseTensor(feats=input.F.clone(), 
                                                        coords=input.C.clone()).to('cuda:0')

    for i in range(len(self.config['point_features_dims'])-1):
        # print(self.point_features_layer[i])
        point_output = self.point_features_layer[i](point_output)
        # print(f"point_output.shape: {point_output.shape}")

    for i in range(len(self.config['medium_scale_voxel_features_dims'])-1):
        medium_scale_voxel_output = self.medium_scale_voxel_features_layer[i](medium_scale_voxel_output)

    for i in range(len(self.config['large_scale_voxel_features_dims'])-1):
        large_scale_voxel_output = self.large_scale_voxel_features_layer[i](large_scale_voxel_output)

    # get the single dimensional features using attention weights
    point_output_attention = torchsparse.SparseTensor(feats=point_output.F.clone(), 
                                                      coords=point_output.C.clone()).to('cuda:0')
    medium_scale_voxel_output_attention = torchsparse.SparseTensor(feats=medium_scale_voxel_output.F.clone(), 
                                                                   coords=medium_scale_voxel_output.C.clone()).to('cuda:0')
    large_scale_voxel_output_attention = torchsparse.SparseTensor(feats=large_scale_voxel_output.F.clone(), 
                                                                  coords=large_scale_voxel_output.C.clone()).to('cuda:0')

    for i in range(len(self.config['attention_dims'])-1):
        point_output_attention = self.attention_layer[i](point_output_attention)
        medium_scale_voxel_output_attention = self.attention_layer[i](medium_scale_voxel_output_attention)
        large_scale_voxel_output_attention = self.attention_layer[i](large_scale_voxel_output_attention)

    # fuse the weights
    # print(f"point_output_attention.shape: {point_output_attention.F.shape}")
    all_attention_features_sparse_tensor = torchsparse.cat((point_output_attention,
                                                            medium_scale_voxel_output_attention,
                                                            large_scale_voxel_output_attention))
    all_attention_features_sparse_tensor.F = torch.nn.functional.softmax(all_attention_features_sparse_tensor.F, dim=1)

    # get the delta features for stabilization
    pi_output = all_attention_features_sparse_tensor
    for i in range(len(self.config['pi_dims'])-1):
        pi_output = self.pi_layer[i](pi_output)

    # print(f"pi_output.shape: {pi_output.F.shape}")
    # print(f"point output.shape: {point_output.F.shape}")
    # all_attention_features_sparse_tensor: N*3F, where alpha = N*F, beta = N*F, gamma = N*F
    all_attention_features_sparse_tensor.F = all_attention_features_sparse_tensor.F.reshape(all_attention_features_sparse_tensor.F.shape[0], 
                                                                                            3,
                                                                                            -1) # N*3*F 
    fused_features = all_attention_features_sparse_tensor.F[:, 0, :] * point_output.F + \
                     all_attention_features_sparse_tensor.F[:, 1, :] * medium_scale_voxel_output.F + \
                     all_attention_features_sparse_tensor.F[:, 2, :] * large_scale_voxel_output.F + \
                     pi_output.F

    fused_features = torchsparse.SparseTensor(feats=fused_features,
                                              coords=input.C.clone()).to('cuda:0')

    for i in range(len(self.config['fusion_dims'])-1):
        fused_features = self.fusion_layer[i](fused_features)

    return {'x1': point_output,
            'x2': medium_scale_voxel_output,
            'x3': large_scale_voxel_output,
            'J': fused_features}

class SqueezeExcitationTorchSparse(nn.Module): def init(self, config: dict ) -> None: super(SqueezeExcitationTorchSparse, self).init() self.config = config self.pooling_layer = torchsparse.nn.GlobalAvgPool() self.squeeze_excitation_layer = nn.Sequential(torchsparse.nn.Conv3d( in_channels=self.config['residual_convolution_dims'][-1]3, out_channels=self.config['residual_convolution_dims'][-1]3 // self.config['squeeze_ratio'], kernel_size=1, stride=1), torchsparse.nn.ReLU(), torchsparse.nn.Conv3d( in_channels=self.config['residual_convolution_dims'][-1]3 // self.config['squeeze_ratio'], out_channels=self.config['residual_convolution_dims'][-1]3, kernel_size=1, stride=1), SparseSigmoid())

def forward(self,
            input: torchsparse.SparseTensor
            ) -> torchsparse.SparseTensor:
    # input: SparseTensor with coordinates and features
    # output: SparseTensor with coordinates and features
    output = torchsparse.SparseTensor(feats=input.F.clone(), 
                                      coords=input.C.clone()).to('cuda:0')
    # print(f"input.shape: {input.F.shape}") # N*F
    output.F = self.pooling_layer(output)  # 1*F
    # print(f"input.shape: {input.F.shape}") # N*F
    # print(f"output.shape: {output.F.shape}") # 1*F
    squeeze_weights = self.squeeze_excitation_layer(output)  # N*F
    # print(f"squeeze_weights.shape: {squeeze_weights.F.shape}") # N*F
    output.F = input.F * squeeze_weights.F
    # print(f"output.shape: {output.F.shape}") # N*F
    return output

class AdaptiveFeatureSelectionTorchSparse(nn.Module): def init(self, config): super(AdaptiveFeatureSelectionTorchSparse, self).init() self.config = config self.residual_point_features_layer = nn.ModuleList() self.residual_medium_scale_voxel_features_layer = nn.ModuleList() self.residual_large_scale_voxel_features_layer = nn.ModuleList() self.squeeze_excitation_layer = nn.ModuleList()

    for i in range(len(self.config['residual_convolution_dims'])-1):
        self.residual_point_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['residual_convolution_dims'][i],
                        out_channels=self.config['residual_convolution_dims'][i+1],
                        kernel_size=self.config['residual_convolution_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['residual_convolution_dims'][i+1]),
                    # torchsparse.nn.ReLU()
                )
        )

        self.residual_medium_scale_voxel_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['residual_convolution_dims'][i],
                        out_channels=self.config['residual_convolution_dims'][i+1],
                        kernel_size=self.config['residual_convolution_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['residual_convolution_dims'][i+1]),
                    # torchsparse.nn.ReLU()
                )
        )

        self.residual_large_scale_voxel_features_layer.append(
                nn.Sequential(
                    torchsparse.nn.Conv3d(
                        in_channels=self.config['residual_convolution_dims'][i],
                        out_channels=self.config['residual_convolution_dims'][i+1],
                        kernel_size=self.config['residual_convolution_kernel_sizes'][i],
                        stride=1),
                    torchsparse.nn.BatchNorm(self.config['residual_convolution_dims'][i+1]),
                    # torchsparse.nn.ReLU()
                )
        )

    self.squeeze_excitation_layer = SqueezeExcitationTorchSparse(self.config)

def forward(self,
            x1,
            x2,
            x3):
    # x1: point features
    # x2: medium scale voxel features
    # x3: large scale voxel features

    # residual point features
    point_features = torchsparse.SparseTensor(feats=x1.F.clone(), 
                                              coords=x1.C.clone()).to('cuda:0')
    medium_scale_voxel_features = torchsparse.SparseTensor(feats=x2.F.clone(), 
                                                           coords=x2.C.clone()).to('cuda:0')
    large_scale_voxel_features = torchsparse.SparseTensor(feats=x3.F.clone(), 
                                                          coords=x3.C.clone()).to('cuda:0')

    for i in range(len(self.config['residual_convolution_dims'])-1):
        point_features = self.residual_point_features_layer[i](point_features)
        medium_scale_voxel_features = self.residual_medium_scale_voxel_features_layer[i](medium_scale_voxel_features)
        large_scale_voxel_features = self.residual_large_scale_voxel_features_layer[i](large_scale_voxel_features)

    point_features += x1
    medium_scale_voxel_features += x2
    large_scale_voxel_features += x3

    # squeeze excitation
    all_features = torchsparse.cat((point_features,
                                    medium_scale_voxel_features,
                                    large_scale_voxel_features))
    # print(f"all_features.shape: {all_features.F.shape}")
    se_output = self.squeeze_excitation_layer(all_features)
    # print(f"se_output.shape: {se_output.F.shape}") # N*F
    filtered_features = torchsparse.SparseTensor(feats=all_features.F.clone(), 
                                                 coords=all_features.C.clone()).to('cuda:0')  # N*F
    filtered_features.F = se_output.F * all_features.F  # N*F
    filtered_features.F = filtered_features.F*self.config['damping_factor'] + all_features.F
    # final_features = torchsparse.SparseTensor(feats=filtered_features, coords=all_features.C).to('cuda:0')
    final_features = filtered_features
    return final_features

class DefaultUNetTorchSparse(nn.Module): def init(self, config) -> None: super(DefaultUNetTorchSparse, self).init() self.config = config encoder_dims = self.config['conv_feature_dims'] decoder_dims = self.config['transposed_conv_feature_dims'] self.model = SparseResUNet(stem_channels=32, encoder_channels=encoder_dims, decoder_channels=decoder_dims) self.conv_block = nn.Sequential( torchsparse.nn.Conv3d( in_channels=decoder_dims[-1], out_channels=self.config['after_transposed_feature'][0], kernel_size=self.config['kernel_sizes'][0], stride=1), torchsparse.nn.BatchNorm(self.config['after_transposed_feature'][0]), torchsparse.nn.ReLU(), )

def forward(self,
            input):
    # input: SparseTensor with coordinates and features
    # output: SparseTensor with coordinates and features
    input = self.model(input)
    # print(f"input.shape: {input.F.shape}")
    input = input[-1]
    input = self.conv_block(input)
    return input

class Model(nn.Module): def init(self, config): super(Model, self).init() self.config = config self.attentive_feature_fusion = AttentiveFeatureFusionTorchSparse(self.config['AttentiveFeatureFusion']) self.adaptive_feature_selection = AdaptiveFeatureSelectionTorchSparse(self.config['AdaptiveFeatureSelection']) self.unet_torchsparse = DefaultUNetTorchSparse(self.config['UNet']) self.final_transposed_conv_block = nn.Sequential( torchsparse.nn.Conv3d( in_channels=self.config['UNet']['final_transposed_conv_feature_dims'][0], out_channels=self.config['UNet']['final_transposed_conv_feature_dims'][1], kernel_size=self.config['UNet']['kernel_sizes'][0], stride=1), torchsparse.nn.BatchNorm(self.config['UNet']['final_transposed_conv_feature_dims'][1]), torchsparse.nn.ReLU(), ) self.classification_layer = torchsparse.nn.Conv3d( in_channels=self.config['UNet']['final_transposed_conv_feature_dims'][1], out_channels=self.config['UNet']['num_classes'], kernel_size=1, stride=1, )

def forward(self,
            input):
    # input: SparseTensor with coordinates and features
    attentive_feature_fusion_output = self.attentive_feature_fusion(input)
    J = attentive_feature_fusion_output['J']
    unet_torchsparse_output = self.unet_torchsparse(J)
    adaptive_feature_selection_output = self.adaptive_feature_selection(attentive_feature_fusion_output['x1'],
                                                                        attentive_feature_fusion_output['x2'],
                                                                        attentive_feature_fusion_output['x3'])
    # print(f"unet_torchsparse_output.shape: {unet_torchsparse_output.F.shape}")
    # print(f"adaptive_feature_selection_output.shape: {adaptive_feature_selection_output.F.shape}")
    final_transposed_conv_block_output_feature = torchsparse.cat((unet_torchsparse_output,
                                                                  adaptive_feature_selection_output))
    final_transposed_conv_block_output = self.final_transposed_conv_block(final_transposed_conv_block_output_feature)
    classification_layer_output = self.classification_layer(final_transposed_conv_block_output)
    return classification_layer_output

def main(): filename = "../config/config_kitti.yaml" with open(filename, 'r') as f: config = yaml.safe_load(f)

attentive_feature_fusion = AttentiveFeatureFusionTorchSparse(config['model']['AttentiveFeatureFusion'])

# adaptive_feature_selection = AdaptiveFeatureSelectionTorchSparse(config['model']['AdaptiveFeatureSelection'])
num_points = 100_000
coords = torch.randint(0, 100, (num_points, 4)).int()
coords[:, 0] = 0  # Single batch
features = torch.randn(num_points, 3)
print(f"coords.shape: {coords.shape}, features.shape: {features.shape}")
sparse_tensor = torchsparse.SparseTensor(feats=features, 
                                         coords=coords).to('cuda:0')

model = Model(config['model']).to('cuda:0')
output = model(sparse_tensor)
print(f"output.shape: {output.F.shape}")
# main()

if name == "main":

Ensure all tensors are on the same device

# device = 'cuda:0'

# # Create a sample SparseTensor
# coords = torch.tensor([[0, 0], [1, 1], [2, 2]], device=device)
# feats = torch.tensor([[1.], [2.], [3.]], device=device)
# sparse_tensor_1 = torchsparse.SparseTensor(feats=feats, coords=coords).to(device)

# # Create a second SparseTensor from the first one without using clone()
# sparse_tensor_2 = torchsparse.SparseTensor(feats=sparse_tensor_1.F, coords=sparse_tensor_1.C).to(device)

# # Print the initial features of both tensors
# print("Initial features of tensor 1:", sparse_tensor_1.F)
# print("Initial features of tensor 2:", sparse_tensor_2.F)

# # Modify the features of the second tensor
# sparse_tensor_2.F += 1

# # Print the modified features of both tensors
# print("Modified features of tensor 1:", sparse_tensor_1.F)
# print("Modified features of tensor 2:", sparse_tensor_2.F)

# # Check if the features of the first tensor have been modified
# if torch.equal(sparse_tensor_1.F, feats):
#     print("The features of the first tensor have NOT been modified. No need to use clone().")
# else:
#     print("The features of the first tensor HAVE been modified. Use clone() when creating a new SparseTensor.")

main()

config file


label_mapping_filename: "./config/kitti_mapping.yaml"
data:
  preload_data: False
  root_filename_pcd: "./data/data_odometry_velodyne"
  root_filename_label: "./data/data_odometry_labels"
  val_sequences: ['08']
  # train_sequences: ['02']
  train_sequences: ['00', '01', '02', '03', '04', '05', '06', '07', '09', '10']
  voxel_size: 0.1
  num_workers: 1
  batch_size: 1
  use_normals: False
  use_dynamic_voxelization: True
  max_dims:
    - 80
    - 80
    - 4
  min_dims:
    - -80
    - -80
    - -4

model:
  AttentiveFeatureFusion:
    point_features_dims: [4, 64, 32, 32]
    point_features_kernel_sizes: [3, 5, 8] # len must be 1 less than point_features_dims

    medium_scale_voxel_features_dims: [4, 64, 32]
    medium_scale_voxel_features_kernel_sizes: [4, 4] # len must be 1 less than medium_scale_voxel_features_dims

    large_scale_voxel_features_dims: [4, 32]
    large_scale_voxel_features_kernel_sizes: [12] # len must be 1 less than large_scale_voxel_features_dims

    attention_dims: [32, 1]
    attention_kernel_sizes: [2]

    pi_dims: [3, 32]
    pi_kernel_sizes: [2]

    fusion_dims: [32, 32]
    fusion_kernel_sizes: [2]

  AdaptiveFeatureSelection:
    residual_convolution_dims: [32, 32]
    residual_convolution_kernel_sizes: [2]
    squeeze_ratio: 32
    damping_factor: 0.35

  UNet:
    conv_feature_dims: [32, 64, 128, 256]
    transposed_conv_feature_dims: [256, 128, 64, 32]
    after_transposed_feature: [96]
    final_transposed_conv_feature_dims: [192, 32]
    kernel_sizes: [3, 3, 3]
    num_classes: 20

hyperparameters:
  resume: False
  resume_path: "./model_best_acc.pth"
  save_path: "./checkpoints/model_best_acc_kitti.pth"
  lr: 0.001
  weight_decay: 0.0001
  momentum: 0.9
  epochs: 100
  factor: 0.5
  patience: 2
  lovasz: True
  lovasz_weight: 0.5
zhijian-liu commented 6 months ago

@ys-2020, could you please take a look at this issue when you have time? Thanks!