open-mmlab / mmaction2

OpenMMLab's Next Generation Video Understanding Toolbox and Benchmark
https://mmaction2.readthedocs.io
Apache License 2.0
4.23k stars 1.24k forks source link

Train models using RawframeDataset and modality flow #610

Closed silvi98 closed 3 years ago

silvi98 commented 3 years ago

I was trying to train a model using the RawframeDataset that I extracted using your script with mode tvl1. But all configs I've tried have resulted in the exception:

`Traceback (most recent call last):
  File "tools/train.py", line 178, in <module>
    main()
  File "tools/train.py", line 174, in main
    meta=meta)
  File "/media/diskb/silvi98/mmaction2/mmaction/apis/train.py", line 156, in train_model
    runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 125, in run
    epoch_runner(data_loaders[i], **kwargs)
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 47, in train
    for i, data_batch in enumerate(self.data_loader):
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 435, in __next__
    data = self._next_data()
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 475, in _next_data
    data = self._dataset_fetcher.fetch(index)  # may raise StopIteration
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/media/diskb/silvi98/mmaction2/mmaction/datasets/base.py", line 280, in __getitem__
    return self.prepare_train_frames(idx)
  File "/media/diskb/silvi98/mmaction2/mmaction/datasets/rawframe_dataset.py", line 172, in prepare_train_frames
    return self.pipeline(results)
  File "/media/diskb/silvi98/mmaction2/mmaction/datasets/pipelines/compose.py", line 41, in __call__
    data = t(data)
  File "/media/diskb/silvi98/mmaction2/mmaction/datasets/pipelines/augmentations.py", line 1182, in __call__
    h, w = results['imgs'][0].shape

It seems the images have the last shape digit 3 and they're still in RGB. Here is my configuration, is that the problem or is it the images? Is there a way to fix this? Thank you.

# model settings
model = dict(
    type='Recognizer3D',
    backbone=dict(
        type='ResNet3dSlowOnly',
        depth=50,
        pretrained='torchvision://resnet50',
        lateral=False,
        conv1_kernel=(1, 7, 7),
        conv1_stride_t=1,
        pool1_stride_t=1,
        inflate=(0, 0, 1, 1),
        norm_eval=False),
    cls_head=dict(
        type='I3DHead',
        in_channels=2048,
        num_classes=2,
        spatial_type='avg',
        dropout_ratio=0.5))
train_cfg = None
test_cfg = dict(average_clips='prob')

dataset_type = 'RawframeDataset'
data_root = 'data/trailer/rawframes/'
data_root_val = 'data/trailer/rawframes/'
ann_file_train = 'data/trailer/flow_movement_train_list.txt' 
ann_file_val = 'data/trailer/flow_movement_val_list.txt'
ann_file_test = 'data/trailer/flow_movement_val_list.txt' 
img_norm_cfg = dict(mean=[128, 128], std=[128, 128])

train_pipeline = [
    dict(type='SampleFrames', clip_len=8, frame_interval=8, num_clips=1),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='RandomResizedCrop'),
    dict(type='Resize', scale=(224, 224), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=8,
        frame_interval=8,
        num_clips=1,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='CenterCrop', crop_size=224),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=8,
        frame_interval=8,
        num_clips=4,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='ThreeCrop', crop_size=256),
    dict(type='Flip', flip_ratio=0),
    dict(type='Normalize', **img_norm_cfg),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
data = dict(
    videos_per_gpu=8,
    workers_per_gpu=0,
    train=dict(
        type=dataset_type,
        ann_file=ann_file_train,
        data_prefix=data_root,
        modality='Flow',
        start_index=0,
        filename_tmpl='flow_{}_{:05d}.jpg',
        pipeline=train_pipeline),
    val=dict(
        type=dataset_type,
        ann_file=ann_file_val,
        data_prefix=data_root_val,
        start_index=0,
        modality='Flow',
        filename_tmpl='flow_{}_{:05d}.jpg',
        pipeline=val_pipeline),
    test=dict(
        type=dataset_type,
        ann_file=ann_file_test,
        data_prefix=data_root_val,
        start_index=0,
        modality='Flow',
        filename_tmpl='flow_{}_{:05d}.jpg',
        pipeline=test_pipeline))
evaluation = dict(
    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'])

# optimizer
optimizer = dict(
    type='SGD', lr=0.001, momentum=0.9,
    weight_decay=0.0001)  # this lr is used for 8 gpus
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
# learning policy
lr_config = dict(policy='CosineAnnealing', min_lr=0)
total_epochs = 5

# runtime settings
checkpoint_config = dict(interval=5)
evaluation = dict(
    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'])
log_config = dict(
    interval=20,
    hooks=[
        dict(type='TextLoggerHook'),
        # dict(type='TensorboardLoggerHook'),
    ])
dist_params = dict(backend='nccl')
log_level= 'INFO'
workflow = [('train', 1)]
load_from = None
resume_from = None
work_dir = './work_dirs/slowfast_movement_bi_r50_3d_8x8x1_60e_trailer_flow'
find_unused_parameters = False
kennymckormick commented 3 years ago

Hi, the error message you paste is not complete, can you paste the complete one? Also, the filenametmpl should be 'flow{}{:05d}.jpg' or 'flow{}{:05d}.jpg'? It seems that ur config file is not consistent.

silvi98 commented 3 years ago

2021-02-10 11:08:42,214 - mmaction - INFO - load model from: torchvision://resnet50 2021-02-10 11:08:42,504 - mmaction - INFO - These parameters in the 2d checkpoint are not loaded: {'fc.weight', 'fc.bias'} 2021-02-10 11:08:45,367 - mmaction - INFO - Start running, host: ... 2021-02-10 11:08:45,368 - mmaction - INFO - workflow: [('train', 1)], max: 60 epochs Traceback (most recent call last): File "tools/train.py", line 178, in <module> main() File "tools/train.py", line 174, in main meta=meta) File "/media/diskb/silvi98/mmaction2/mmaction/apis/train.py", line 156, in train_model runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs) File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 125, in run epoch_runner(data_loaders[i], **kwargs) File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/mmcv/runner/epoch_based_runner.py", line 47, in train for i, data_batch in enumerate(self.data_loader): File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 435, in __next__ data = self._next_data() File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/dataloader.py", line 475, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch data = [self.dataset[idx] for idx in possibly_batched_index] File "/home/silvi98/anaconda3/envs/mmaction/lib/python3.7/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp> data = [self.dataset[idx] for idx in possibly_batched_index] File "/media/diskb/silvi98/mmaction2/mmaction/datasets/base.py", line 280, in __getitem__ return self.prepare_train_frames(idx) File "/media/diskb/silvi98/mmaction2/mmaction/datasets/rawframe_dataset.py", line 172, in prepare_train_frames return self.pipeline(results) File "/media/diskb/silvi98/mmaction2/mmaction/datasets/pipelines/compose.py", line 41, in __call__ data = t(data) File "/media/diskb/silvi98/mmaction2/mmaction/datasets/pipelines/augmentations.py", line 1182, in __call__ h, w = results['imgs'][0].shape ValueError: too many values to unpack (expected 2)

I'm not exactly sure why but flow{}{_05d}.jpg is the type of name the images have, I did use the build_rawframes.py script a few weeks ago.

androbaza commented 3 years ago

Hi @silvi98, did you resolve your problem? I am having the same issue now.

2021-03-29 00:08:15,510 - mmaction - INFO - load model from: https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth
2021-03-29 00:08:15,511 - mmaction - INFO - Use load_from_http loader
2021-03-29 00:08:15,830 - mmaction - INFO - load checkpoint from checkpoints/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb_20200812-9037a758.pth
2021-03-29 00:08:15,831 - mmaction - INFO - Use load_from_local loader
2021-03-29 00:08:15,981 - mmaction - WARNING - The model and loaded state dict do not match exactly

size mismatch for cls_head.fc_cls.weight: copying a param with shape torch.Size([400, 2048]) from checkpoint, the shape in current model is torch.Size([7, 2048]).
size mismatch for cls_head.fc_cls.bias: copying a param with shape torch.Size([400]) from checkpoint, the shape in current model is torch.Size([7]).
2021-03-29 00:08:15,983 - mmaction - INFO - Start running, host: actrec@actrec-HP-Z4-G4-Workstation, work_dir: /home/actrec/.virtualenvs/mmaction/mmaction2/childact-checkpoints/childact-csn
2021-03-29 00:08:15,984 - mmaction - INFO - workflow: [('train', 1)], max: 51 epochs

---------------------------------------------------------------------------
AssertionError                            Traceback (most recent call last)
<ipython-input-14-be92e24cff32> in <module>
     18 # Create work_dir
     19 mmcv.mkdir_or_exist(osp.abspath(cfg.work_dir))
---> 20 train_model(model, datasets, cfg, distributed=False, validate=True)

~/.virtualenvs/mmaction/mmaction2/mmaction/apis/train.py in train_model(model, dataset, cfg, distributed, validate, timestamp, meta)
    154     if cfg.omnisource:
    155         runner_kwargs = dict(train_ratio=train_ratio)
--> 156     runner.run(data_loaders, cfg.workflow, cfg.total_epochs, **runner_kwargs)

~/.virtualenvs/mmaction/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py in run(self, data_loaders, workflow, max_epochs, **kwargs)
    123                     if mode == 'train' and self.epoch >= self._max_epochs:
    124                         break
--> 125                     epoch_runner(data_loaders[i], **kwargs)
    126 
    127         time.sleep(1)  # wait for some hooks like loggers to finish

~/.virtualenvs/mmaction/lib/python3.6/site-packages/mmcv/runner/epoch_based_runner.py in train(self, data_loader, **kwargs)
     45         self.call_hook('before_train_epoch')
     46         time.sleep(2)  # Prevent possible deadlock during epoch transition
---> 47         for i, data_batch in enumerate(self.data_loader):
     48             self._inner_iter = i
     49             self.call_hook('before_train_iter')

~/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/dataloader.py in __next__(self)
    433         if self._sampler_iter is None:
    434             self._reset()
--> 435         data = self._next_data()
    436         self._num_yielded += 1
    437         if self._dataset_kind == _DatasetKind.Iterable and \

~/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _next_data(self)
   1083             else:
   1084                 del self._task_info[idx]
-> 1085                 return self._process_data(data)
   1086 
   1087     def _try_put_index(self):

~/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/dataloader.py in _process_data(self, data)
   1109         self._try_put_index()
   1110         if isinstance(data, ExceptionWrapper):
-> 1111             data.reraise()
   1112         return data
   1113 

~/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/_utils.py in reraise(self)
    426             # have message field
    427             raise self.exc_type(message=msg)
--> 428         raise self.exc_type(msg)
    429 
    430 

AssertionError: Caught AssertionError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/actrec/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/_utils/worker.py", line 198, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/actrec/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/actrec/.virtualenvs/mmaction/lib/python3.6/site-packages/torch/utils/data/_utils/fetch.py", line 44, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/actrec/.virtualenvs/mmaction/mmaction2/mmaction/datasets/base.py", line 280, in __getitem__
    return self.prepare_train_frames(idx)
  File "/home/actrec/.virtualenvs/mmaction/mmaction2/mmaction/datasets/rawframe_dataset.py", line 170, in prepare_train_frames
    return self.pipeline(results)
  File "/home/actrec/.virtualenvs/mmaction/mmaction2/mmaction/datasets/pipelines/compose.py", line 41, in __call__
    data = t(data)
  File "/home/actrec/.virtualenvs/mmaction/mmaction2/mmaction/datasets/pipelines/augmentations.py", line 1243, in __call__
    assert self.mean.shape[0] == 2
AssertionError

the config is as follows:

Config:
model = dict(
    type='Recognizer3D',
    backbone=dict(
        type='ResNet3dCSN',
        pretrained2d=False,
        pretrained=
        'https://download.openmmlab.com/mmaction/recognition/csn/ircsn_from_scratch_r152_ig65m_20200807-771c4135.pth',
        depth=152,
        with_pool2=False,
        bottleneck_mode='ir',
        norm_eval=True,
        zero_init_residual=False,
        bn_frozen=True),
    cls_head=dict(
        type='I3DHead',
        num_classes=7,
        in_channels=2048,
        spatial_type='avg',
        dropout_ratio=0.5,
        init_std=0.01),
    train_cfg=None,
    test_cfg=dict(average_clips='prob'))
dataset_type = 'RawframeDataset'
data_root = 'data/childact_rawframe/train/'
data_root_val = 'data/childact_rawframe/val/'
ann_file_train = 'data/childact_rawframe/childact_train_rawframe.txt'
ann_file_val = 'data/childact_rawframe/childact_val_rawframe.txt'
ann_file_test = 'data/childact_rawframe/childact_test_rawframe.txt'
img_norm_cfg = dict(
    mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False)
train_pipeline = [
    dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='RandomResizedCrop'),
    dict(type='Resize', scale=(224, 224), keep_ratio=False),
    dict(type='Flip', flip_ratio=0.5),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_bgr=False),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs', 'label'])
]
val_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=2,
        num_clips=1,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='CenterCrop', crop_size=224),
    dict(type='Flip', flip_ratio=0),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_bgr=False),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
test_pipeline = [
    dict(
        type='SampleFrames',
        clip_len=32,
        frame_interval=2,
        num_clips=10,
        test_mode=True),
    dict(type='RawFrameDecode'),
    dict(type='Resize', scale=(-1, 256)),
    dict(type='ThreeCrop', crop_size=256),
    dict(type='Flip', flip_ratio=0),
    dict(
        type='Normalize',
        mean=[123.675, 116.28, 103.53],
        std=[58.395, 57.12, 57.375],
        to_bgr=False),
    dict(type='FormatShape', input_format='NCHW_Flow'),
    dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
    dict(type='ToTensor', keys=['imgs'])
]
data = dict(
    videos_per_gpu=3,
    workers_per_gpu=4,
    train=dict(
        type='RawframeDataset',
        ann_file='data/childact_rawframe/childact_train_rawframe.txt',
        data_prefix='data/childact_rawframe/train/',
        pipeline=[
            dict(
                type='SampleFrames',
                clip_len=32,
                frame_interval=2,
                num_clips=1),
            dict(type='FrameSelector'),
            dict(type='Resize', scale=(-1, 256)),
            dict(type='RandomResizedCrop'),
            dict(type='Resize', scale=(224, 224), keep_ratio=False),
            dict(type='Flip', flip_ratio=0.5),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_bgr=False),
            dict(type='FormatShape', input_format='NCTHW'),
            dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
            dict(type='ToTensor', keys=['imgs', 'label'])
        ],
        modality='Flow',
        start_index=0,
        filename_tmpl='flow_{}_{:05d}.jpg'),
    val=dict(
        type='RawframeDataset',
        ann_file='data/childact_rawframe/childact_val_rawframe.txt',
        data_prefix='data/childact_rawframe/val/',
        pipeline=[
            dict(
                type='SampleFrames',
                clip_len=32,
                frame_interval=2,
                num_clips=1,
                test_mode=True),
            dict(type='FrameSelector'),
            dict(type='Resize', scale=(-1, 256)),
            dict(type='CenterCrop', crop_size=224),
            dict(type='Flip', flip_ratio=0),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_bgr=False),
            dict(type='FormatShape', input_format='NCTHW'),
            dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
            dict(type='ToTensor', keys=['imgs'])
        ],
        modality='Flow',
        start_index=0,
        filename_tmpl='flow_{}_{:05d}.jpg'),
    test=dict(
        type='RawframeDataset',
        ann_file='data/childact_rawframe/childact_test_rawframe.txt',
        data_prefix='data/childact_rawframe/test/',
        pipeline=[
            dict(
                type='SampleFrames',
                clip_len=32,
                frame_interval=2,
                num_clips=10,
                test_mode=True),
            dict(type='FrameSelector'),
            dict(type='Resize', scale=(-1, 256)),
            dict(type='ThreeCrop', crop_size=256),
            dict(type='Flip', flip_ratio=0),
            dict(
                type='Normalize',
                mean=[123.675, 116.28, 103.53],
                std=[58.395, 57.12, 57.375],
                to_bgr=False),
            dict(type='FormatShape', input_format='NCTHW'),
            dict(type='Collect', keys=['imgs', 'label'], meta_keys=[]),
            dict(type='ToTensor', keys=['imgs'])
        ],
        modality='Flow',
        start_index=0,
        filename_tmpl='flow_{}_{:05d}.jpg'))
optimizer = dict(type='SGD', lr=0.000125, momentum=0.9, weight_decay=0.0001)
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2))
lr_config = dict(
    policy='cyclic',
    target_ratio=(10, 1e-05),
    cyclic_times=1,
    step_ratio_up=0.4)
total_epochs = 51
checkpoint_config = dict(interval=12)
evaluation = dict(
    interval=5, metrics=['top_k_accuracy', 'mean_class_accuracy'])
log_config = dict(
    interval=25,
    hooks=[dict(type='TextLoggerHook'),
           dict(type='TensorboardLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
work_dir = './childact-checkpoints/childact-csn'
load_from = 'checkpoints/ircsn_ig65m_pretrained_bnfrozen_r152_32x2x1_58e_kinetics400_rgb_20200812-9037a758.pth'
resume_from = None
workflow = [('train', 1)]
find_unused_parameters = True
omnisource = False
momentum_config = dict(
    policy='cyclic',
    target_ratio=(0.8947368421052632, 1),
    cyclic_times=1,
    step_ratio_up=0.4)
seed = 42
gpu_ids = range(0, 1)
output_config = dict(out='./childact-checkpoints/childact-csn/results.json')

upd: the issue was resolved in https://github.com/open-mmlab/mmaction2/issues/764