It fails to train Fast-SCNN by "RuntimeError: CUDA error: device-side assert triggered"

Checklist

[x] I have searched related issues but cannot get the expected help.
[x] The bug has not been fixed in the latest version.

Describe the bug

It fails to train Fast-SCNN by the following CUDA error:

RuntimeError: CUDA error: device-side assert triggered terminate called after throwing an instance of 'c10::Error'   what():  CUDA error: device-side assert triggered

Reproduction

What command or script did you run?

I tried to train Fast-SCNN with the following script by reference to https://github.com/open-mmlab/mmsegmentation/blob/master/demo/MMSegmentation_Tutorial.ipynb

I can train PSPNet though can't train Fast-SCNN.

train.py

import os.path as osp

import typer
import mmcv
from mmcv import Config
from mmseg.apis import set_random_seed, train_segmentor
from mmseg.datasets import build_dataset
from mmseg.datasets.builder import DATASETS
from mmseg.datasets.custom import CustomDataset
from mmseg.models import build_segmentor

classes = ("sky", "tree", "road", "grass", "water", "bldg", "mntn", "fg obj")
palette = [
    [128, 128, 128],
    [129, 127, 38],
    [120, 69, 125],
    [53, 125, 34],
    [0, 11, 123],
    [118, 20, 12],
    [122, 81, 25],
    [241, 134, 51],
]
img_dir = "images"
ann_dir = "labels"
data_root = "../data/iccv09Data"
# config_file = "configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py"
config_file = "configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py"

@DATASETS.register_module()
class StandfordBackgroundDataset(CustomDataset):
    CLASSES = classes
    PALETTE = palette

    def __init__(self, split, **kwargs):
        super().__init__(
            img_suffix=".jpg", seg_map_suffix=".png", split=split, **kwargs
        )
        assert osp.exists(self.img_dir) and self.split is not None

def main(batch_size: int = 20):
    cfg = Config.fromfile(config_file)
    # Since we use ony one GPU, BN is used instead of SyncBN
    cfg.norm_cfg = dict(type="BN", requires_grad=True)
    cfg.model.backbone.norm_cfg = cfg.norm_cfg
    cfg.model.decode_head.norm_cfg = cfg.norm_cfg
    # cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg
    cfg.model.auxiliary_head[0].norm_cfg = cfg.norm_cfg
    cfg.model.auxiliary_head[1].norm_cfg = cfg.norm_cfg

    # modify num classes of the model in decode/auxiliary head
    cfg.model.decode_head.num_classes = 8
    # cfg.model.auxiliary_head.num_classes = 8
    cfg.model.auxiliary_head[0].num_classes = 8
    cfg.model.auxiliary_head[1].num_classes = 8

    # Modify dataset type and path
    cfg.dataset_type = "StandfordBackgroundDataset"
    cfg.data_root = data_root

    cfg.data.samples_per_gpu = batch_size
    cfg.data.workers_per_gpu = 8

    cfg.img_norm_cfg = dict(
        mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True
    )
    cfg.crop_size = (256, 256)
    cfg.train_pipeline = [
        dict(type="LoadImageFromFile"),
        dict(type="LoadAnnotations"),
        dict(type="Resize", img_scale=(320, 240), ratio_range=(0.5, 2.0)),
        dict(type="RandomCrop", crop_size=cfg.crop_size, cat_max_ratio=0.75),
        dict(type="RandomFlip", prob=0.5),
        dict(type="PhotoMetricDistortion"),
        dict(type="Normalize", **cfg.img_norm_cfg),
        dict(type="Pad", size=cfg.crop_size, pad_val=0, seg_pad_val=255),
        dict(type="DefaultFormatBundle"),
        dict(type="Collect", keys=["img", "gt_semantic_seg"]),
    ]

    cfg.test_pipeline = [
        dict(type="LoadImageFromFile"),
        dict(
            type="MultiScaleFlipAug",
            img_scale=(320, 240),
            # img_ratios=[0.5, 0.75, 1.0, 1.25, 1.5, 1.75],
            flip=False,
            transforms=[
                dict(type="Resize", keep_ratio=True),
                dict(type="RandomFlip"),
                dict(type="Normalize", **cfg.img_norm_cfg),
                dict(type="ImageToTensor", keys=["img"]),
                dict(type="Collect", keys=["img"]),
            ],
        ),
    ]

    cfg.data.train.type = cfg.dataset_type
    cfg.data.train.data_root = cfg.data_root
    cfg.data.train.img_dir = img_dir
    cfg.data.train.ann_dir = ann_dir
    cfg.data.train.pipeline = cfg.train_pipeline
    cfg.data.train.split = "splits/train.txt"

    cfg.data.val.type = cfg.dataset_type
    cfg.data.val.data_root = cfg.data_root
    cfg.data.val.img_dir = img_dir
    cfg.data.val.ann_dir = ann_dir
    cfg.data.val.pipeline = cfg.test_pipeline
    cfg.data.val.split = "splits/val.txt"

    cfg.data.test.type = cfg.dataset_type
    cfg.data.test.data_root = cfg.data_root
    cfg.data.test.img_dir = img_dir
    cfg.data.test.ann_dir = ann_dir
    cfg.data.test.pipeline = cfg.test_pipeline
    cfg.data.test.split = "splits/val.txt"

    # We can still use the pre-trained Mask RCNN model though we do not need to
    # use the mask branch
    # cfg.load_from = (
    #     "checkpoints/pspnet_r50-d8_512x1024_40k_cityscapes_20200605_003338-2966598c.pth"
    # )

    # Set up working dir to save files and logs.
    cfg.work_dir = "./work_dirs/tutorial"

    cfg.total_iters = 200
    cfg.log_config.interval = 10
    cfg.evaluation.interval = 200
    cfg.checkpoint_config.interval = 200

    # Set seed to facitate reproducing the result
    cfg.seed = 0
    set_random_seed(0, deterministic=False)
    cfg.gpu_ids = range(1)

    # Let's have a look at the final config used for training
    print(f"Config:\n{cfg.pretty_text}")

    # Build the dataset
    datasets = [build_dataset(cfg.data.train)]

    # Build the detector
    model = build_segmentor(cfg.model)
    # Add an attribute for visualization convenience
    model.CLASSES = datasets[0].CLASSES

    # Create work_dir
    mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
    train_segmentor(model, datasets, cfg, distributed=False, validate=True, meta=dict())

if __name__ == "__main__":
    typer.run(main)

Did you make any modifications on the code or config? Did you understand what you have modified?

The difference between the above script and the tutorial code:

-- config_file = "configs/pspnet/pspnet_r50-d8_512x1024_40k_cityscapes.py"
++ config_file = "configs/fastscnn/fast_scnn_4x8_80k_lr0.12_cityscapes.py"

--    cfg.model.auxiliary_head.norm_cfg = cfg.norm_cfg
++    cfg.model.auxiliary_head[0].norm_cfg = cfg.norm_cfg
++    cfg.model.auxiliary_head[1].norm_cfg = cfg.norm_cfg

--    cfg.model.auxiliary_head.num_classes = 8
++    cfg.model.auxiliary_head[0].num_classes = 8
++    cfg.model.auxiliary_head[1].num_classes = 8

What dataset did you use?

StanfordBackgroundDataset

Environment

Please run python mmseg/utils/collect_env.py to collect necessary environment infomation and paste it here.

sys.platform: linux
Python: 3.7.10 (default, Feb 27 2021, 12:11:23) [GCC 7.5.0]
CUDA available: True
GPU 0: Tesla K80
CUDA_HOME: /usr
NVCC: Cuda compilation tools, release 9.1, V9.1.85
GCC: gcc (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0
PyTorch: 1.6.0
PyTorch compiling details: PyTorch built with:
  - GCC 7.3
  - C++ Version: 201402
  - Intel(R) Math Kernel Library Version 2019.0.5 Product Build 20190808 for Intel(R) 64 architecture applications
  - Intel(R) MKL-DNN v1.5.0 (Git Hash e2ac1fac44c5078ca927cb9b90e1b3066a0b2ed0)
  - OpenMP 201511 (a.k.a. OpenMP 4.5)
  - NNPACK is enabled
  - CPU capability usage: AVX2
  - CUDA Runtime 10.2
  - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75
  - CuDNN 7.6.5
  - Magma 2.5.2
  - Build settings: BLAS=MKL, BUILD_TYPE=Release, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DUSE_VULKAN_WRAPPER -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-sign-compare -Wno-unused-parameter -Wno-unused-variable -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Wno-stringop-overflow, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, USE_CUDA=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=ON, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_STATIC_DISPATCH=OFF, 

TorchVision: 0.7.0
OpenCV: 4.5.1
MMCV: 1.2.7
MMCV Compiler: GCC 7.5
MMCV CUDA Compiler: not available
MMSegmentation: 0.11.0+ac26f61

You may add addition that may be helpful for locating the problem, such as
- How you installed PyTorch [e.g., pip, conda, source]
- Other environment variables that may be related (such as $PATH, $LD_LIBRARY_PATH, $PYTHONPATH, etc.)

pip install torch==1.6.0 torchvision==0.7.0

I installed CUDA, following the official instructions:

https://developer.nvidia.com/cuda-10.2-download-archive?target_os=Linux&target_arch=x86_64&target_distro=Ubuntu&target_version=1804&target_type=debnetwork

Error traceback

If applicable, paste the error trackback here.

the error trackback

Config:
norm_cfg = dict(type='BN', requires_grad=True)
model = dict(
    type='EncoderDecoder',
    backbone=dict(
        type='FastSCNN',
        downsample_dw_channels=(32, 48),
        global_in_channels=64,
        global_block_channels=(64, 96, 128),
        global_block_strides=(2, 2, 1),
        global_out_channels=128,
        higher_in_channels=64,
        lower_in_channels=128,
        fusion_out_channels=128,
        out_indices=(0, 1, 2),
        norm_cfg=dict(type='BN', requires_grad=True),
        align_corners=False),
    decode_head=dict(
        type='DepthwiseSeparableFCNHead',
        in_channels=128,
        channels=128,
        concat_input=False,
        num_classes=8,
        in_index=-1,
        norm_cfg=dict(type='BN', requires_grad=True),
        align_corners=False,
        loss_decode=dict(
            type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
    auxiliary_head=[
        dict(
            type='FCNHead',
            in_channels=128,
            channels=32,
            num_convs=1,
            num_classes=8,
            in_index=-2,
            norm_cfg=dict(type='BN', requires_grad=True),
            concat_input=False,
            align_corners=False,
            loss_decode=dict(
                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4)),
        dict(
            type='FCNHead',
            in_channels=64,
            channels=32,
            num_convs=1,
            num_classes=8,
            in_index=-3,
            norm_cfg=dict(type='BN', requires_grad=True),
            concat_input=False,
            align_corners=False,
            loss_decode=dict(
                type='CrossEntropyLoss', use_sigmoid=True, loss_weight=0.4))
    ],
    train_cfg=dict(),
    test_cfg=dict(mode='whole'))
dataset_type = 'StandfordBackgroundDataset'
data_root = '../data/iccv09Data'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
crop_size = (256, 256)
train_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(type='LoadAnnotations'),
    dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),
    dict(type='RandomCrop', crop_size=(256, 256), cat_max_ratio=0.75),
    dict(type='RandomFlip', prob=0.5),
    dict(type='PhotoMetricDistortion'),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_rgb=True),
    dict(type='Pad', size=(256, 256), pad_val=0, seg_pad_val=255),
    dict(type='DefaultFormatBundle'),
    dict(type='Collect', keys=['img', 'gt_semantic_seg'])
]
test_pipeline = [
    dict(type='LoadImageFromFile'),
    dict(
        type='MultiScaleFlipAug',
        img_scale=(320, 240),
        flip=False,
        transforms=[
            dict(type='Resize', keep_ratio=True),
            dict(type='RandomFlip'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='ImageToTensor', keys=['img']),
            dict(type='Collect', keys=['img'])
        ])
]
data = dict(
    samples_per_gpu=2,
    workers_per_gpu=8,
    train=dict(
        type='StandfordBackgroundDataset',
        data_root='../data/iccv09Data',
        img_dir='images',
        ann_dir='labels',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(type='LoadAnnotations'),
            dict(type='Resize', img_scale=(320, 240), ratio_range=(0.5, 2.0)),
            dict(type='RandomCrop', crop_size=(256, 256), cat_max_ratio=0.75),
            dict(type='RandomFlip', prob=0.5),
            dict(type='PhotoMetricDistortion'),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_rgb=True),
            dict(type='Pad', size=(256, 256), pad_val=0, seg_pad_val=255),
            dict(type='DefaultFormatBundle'),
            dict(type='Collect', keys=['img', 'gt_semantic_seg'])
        ],
        split='splits/train.txt'),
    val=dict(
        type='StandfordBackgroundDataset',
        data_root='../data/iccv09Data',
        img_dir='images',
        ann_dir='labels',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(320, 240),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        split='splits/val.txt'),
    test=dict(
        type='StandfordBackgroundDataset',
        data_root='../data/iccv09Data',
        img_dir='images',
        ann_dir='labels',
        pipeline=[
            dict(type='LoadImageFromFile'),
            dict(
                type='MultiScaleFlipAug',
                img_scale=(320, 240),
                flip=False,
                transforms=[
                    dict(type='Resize', keep_ratio=True),
                    dict(type='RandomFlip'),
                    dict(
                        type='Normalize',
                        mean=[123.675, 116.28, 103.53],
                        std=[58.395, 57.12, 57.375],
                        to_rgb=True),
                    dict(type='ImageToTensor', keys=['img']),
                    dict(type='Collect', keys=['img'])
                ])
        ],
        split='splits/val.txt'))
log_config = dict(
    interval=10, hooks=[dict(type='TextLoggerHook', by_epoch=False)])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
cudnn_benchmark = True
optimizer = dict(type='SGD', lr=0.12, momentum=0.9, weight_decay=4e-05)
optimizer_config = dict()
lr_config = dict(policy='poly', power=0.9, min_lr=0.0001, by_epoch=False)
runner = dict(type='IterBasedRunner', max_iters=80000)
checkpoint_config = dict(by_epoch=False, interval=200)
evaluation = dict(interval=200, metric='mIoU')
work_dir = './work_dirs/tutorial'
total_iters = 200
seed = 0
gpu_ids = range(0, 1)

2021-02-27 22:13:09,041 - mmseg - INFO - Loaded 572 images
2021-02-27 22:13:11,058 - mmseg - INFO - Loaded 143 images
2021-02-27 22:13:11,059 - mmseg - INFO - Start running, host: hrsma2i@segmentation-gpu, work_dir: /home/hrsma2i/segmentation/work_dirs/tutorial
2021-02-27 22:13:11,060 - mmseg - INFO - workflow: [('train', 1)], max: 80000 iters
Traceback (most recent call last):
  File "train.py", line 156, in <module>
    typer.run(main)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/typer/main.py", line 859, in run
    app()
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/typer/main.py", line 214, in __call__
    return get_command(self)(*args, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/click/core.py", line 829, in __call__
    return self.main(*args, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/click/core.py", line 782, in main
    rv = self.invoke(ctx)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/click/core.py", line 1066, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/click/core.py", line 610, in invoke
    return callback(*args, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/typer/main.py", line 497, in wrapper
    return callback(**use_params)  # type: ignore
  File "train.py", line 152, in main
    train_segmentor(model, datasets, cfg, distributed=False, validate=True, meta=dict())
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/apis/train.py", line 116, in train_segmentor
    runner.run(data_loaders, cfg.workflow)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 131, in run
    iter_runner(iter_loaders[i], **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmcv/runner/iter_based_runner.py", line 60, in train
    outputs = self.model.train_step(data_batch, self.optimizer, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmcv/parallel/data_parallel.py", line 67, in train_step
    return self.module.train_step(*inputs[0], **kwargs[0])
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/segmentors/base.py", line 152, in train_step
    losses = self(**data_batch)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 84, in new_func
    return old_func(*args, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/segmentors/base.py", line 122, in forward
    return self.forward_train(img, img_metas, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/segmentors/encoder_decoder.py", line 158, in forward_train
    gt_semantic_seg)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/segmentors/encoder_decoder.py", line 102, in _decode_head_forward_train
    self.train_cfg)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/decode_heads/decode_head.py", line 187, in forward_train
    losses = self.losses(seg_logits, gt_semantic_seg)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmcv/runner/fp16_utils.py", line 164, in new_func
    return old_func(*args, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/decode_heads/decode_head.py", line 232, in losses
    ignore_index=self.ignore_index)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/nn/modules/module.py", line 722, in _call_impl
    result = self.forward(*input, **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/losses/cross_entropy_loss.py", line 197, in forward
    **kwargs)
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/mmseg/models/losses/cross_entropy_loss.py", line 92, in binary_cross_entropy
    pred, label.float(), pos_weight=class_weight, reduction='none')
  File "/home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/nn/functional.py", line 2540, in binary_cross_entropy_with_logits
    return torch.binary_cross_entropy_with_logits(input, target, weight, pos_weight, reduction_enum)
RuntimeError: CUDA error: device-side assert triggered
terminate called after throwing an instance of 'c10::Error'
  what():  CUDA error: device-side assert triggered
Exception raised from create_event_internal at /pytorch/c10/cuda/CUDACachingAllocator.cpp:687 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x42 (0x7f55ed5991e2 in /home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #1: c10::cuda::CUDACachingAllocator::raw_delete(void*) + 0xad2 (0x7f55ed7e7f92 in /home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/lib/libc10_cuda.so)
frame #2: c10::TensorImpl::release_resources() + 0x4d (0x7f55ed5879cd in /home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/lib/libc10.so)
frame #3: <unknown function> + 0x541322 (0x7f5639120322 in /home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
frame #4: <unknown function> + 0x5413c6 (0x7f56391203c6 in /home/hrsma2i/.cache/pypoetry/virtualenvs/segmentation-qAzmWibJ-py3.7/lib/python3.7/site-packages/torch/lib/libtorch_python.so)
<omitting python frames>
frame #29: __libc_start_main + 0xe7 (0x7f564491fbf7 in /lib/x86_64-linux-gnu/libc.so.6)

Aborted (core dumped)

Bug fix

If you have already identified the reason, you can provide the information here. If you are willing to create a PR to fix it, please also leave a comment here and that would be much appreciated!

open-mmlab / mmsegmentation

It fails to train Fast-SCNN by "RuntimeError: CUDA error: device-side assert triggered" #390