Media-Smart / vedastr

A scene text recognition toolbox based on PyTorch
Apache License 2.0
534 stars 100 forks source link

TypeError: forward() takes 2 positional arguments but 3 were given #83

Closed choozhenbo closed 2 years ago

choozhenbo commented 2 years ago

After I modify the tps_resnet_bilstm_attn.py to tps_resnet_bilstm_ctc.py, I got this error message below. Is my configuration file modified wrongly? python tools/train.py configs/tps_resnet_bilstm_ctc.py "0" 2022-04-13 12:52:19,405 - INFO - Use GPU 0 2022-04-13 12:52:19,405 - INFO - Set cudnn deterministic False 2022-04-13 12:52:19,405 - INFO - Set cudnn benchmark True 2022-04-13 12:52:19,405 - INFO - Set seed 1111 2022-04-13 12:52:19,406 - INFO - Build model 2022-04-13 12:52:19,630 - INFO - GResNet init weights 2022-04-13 12:52:19,825 - INFO - CTCHead init weights 2022-04-13 12:52:21,666 - INFO - current dataset length is 1200 in /home/tham/vedastr/data/data_lmdb_release/training/CAR_PLATE 2022-04-13 12:52:21,667 - INFO - The truly used batch ratios are [1.] 2022-04-13 12:52:21,669 - INFO - current dataset length is 150 in /home/tham/vedastr/data/data_lmdb_release/validation/CAR_PLATE 2022-04-13 12:52:21,670 - INFO - Start train... Traceback (most recent call last): File "tools/train.py", line 44, in main() File "tools/train.py", line 40, in main runner() File "tools/../vedastr/runners/train_runner.py", line 148, in call self._train_batch(img, label) File "tools/../vedastr/runners/train_runner.py", line 103, in _train_batch pred = self.model((img, label_input)) File "/home/tham/anaconda3/envs/vedastr/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(*input, *kwargs) File "tools/../vedastr/models/model.py", line 22, in forward out = self.head(x, inputs[1]) File "/home/tham/anaconda3/envs/vedastr/lib/python3.6/site-packages/torch/nn/modules/module.py", line 1102, in _call_impl return forward_call(input, **kwargs) TypeError: forward() takes 2 positional arguments but 3 were given

###############################################################################

1. deploy

size = (32, 100) mean, std = 0.5, 0.5

sensitive = False character = 'abcdefghijklmnopqrstuvwxyz0123456789' batch_max_length = 25

F = 20 hidden_dim = 256 norm_cfg = dict(type='BN') num_class = len(character) + 2 num_steps = batch_max_length + 1

deploy = dict( transform=[ dict(type='Sensitive', sensitive=sensitive, need_character=character), dict(type='ToGray'), dict(type='Resize', size=size), dict(type='Normalize', mean=mean, std=std), dict(type='ToTensor'), ], converter=dict( type='CTCConverter', character=character, batch_max_length=batch_max_length, ), model=dict( type='GModel', need_text=True, body=dict( type='GBody', pipelines=[ dict( type='RectificatorComponent', from_layer='input', to_layer='rect', arch=dict( type='TPS_STN', F=F, input_size=size, output_size=size, stn=dict( feature_extractor=dict( encoder=dict( backbone=dict( type='GVGG', layers=[ ('conv', dict(type='ConvModule', in_channels=1, out_channels=64, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=2)), ('conv', dict(type='ConvModule', in_channels=64, out_channels=128, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=2)), ('conv', dict(type='ConvModule', in_channels=128, out_channels=256, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=2)), ('conv', dict(type='ConvModule', in_channels=256, out_channels=512, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ], ), ), collect=dict(type='CollectBlock', from_layer='c3') ), pool=dict(type='AdaptiveAvgPool2d', output_size=1), head=[ dict(type='FCModule', in_channels=512, out_channels=256), dict(type='FCModule', in_channels=256, out_channels=F * 2, activation=None) ], ), ), ), dict( type='FeatureExtractorComponent', from_layer='rect', to_layer='cnn_feat', arch=dict( encoder=dict( backbone=dict( type='GResNet', layers=[ ('conv', dict(type='ConvModule', in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('conv', dict(type='ConvModule', in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=2, padding=0)), ('block', dict(block_name='BasicBlock', planes=128, blocks=1, stride=1)), ('conv', dict(type='ConvModule', in_channels=128, out_channels=128, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=2, padding=0)), ('block', dict(block_name='BasicBlock', planes=256, blocks=2, stride=1)), ('conv', dict(type='ConvModule', in_channels=256, out_channels=256, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('pool', dict(type='MaxPool2d', kernel_size=2, stride=(2, 1), padding=(0, 1))), ('block', dict(block_name='BasicBlock', planes=512, blocks=5, stride=1)), ('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=3, stride=1, padding=1, norm_cfg=norm_cfg)), ('block', dict(block_name='BasicBlock', planes=512, blocks=3, stride=1)), ('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=2, stride=(2, 1), padding=(0, 1), norm_cfg=norm_cfg)), ('conv', dict(type='ConvModule', in_channels=512, out_channels=512, kernel_size=2, stride=1, padding=0, norm_cfg=norm_cfg)), ], ), ), collect=dict(type='CollectBlock', from_layer='c4'), ), ), dict( type='SequenceEncoderComponent', from_layer='cnn_feat', to_layer='rnn_feat', arch=dict( type='RNN', input_pool=dict(type='AdaptiveAvgPool2d', output_size=(1, None)), layers=[ ('rnn', dict(type='LSTM', input_size=512, hidden_size=256, bidirectional=True, batch_first=True)), ('fc', dict(type='Linear', in_features=512, out_features=256)), ('rnn', dict(type='LSTM', input_size=256, hidden_size=256, bidirectional=True, batch_first=True)), ('fc', dict(type='Linear', in_features=512, out_features=256)), ], ), ), ], ), head=dict( type='CTCHead', from_layer='cnn_feat', num_class=num_class, in_channels=512, pool=dict( type='AdaptiveAvgPool2d', output_size=(1, None), ), ), ), )

###############################################################################

2.common

common = dict( seed=1111, logger=dict( handlers=( dict(type='StreamHandler', level='INFO'), dict(type='FileHandler', level='INFO'), ), ), cudnn_deterministic=False, cudnn_benchmark=True, metric=dict(type='Accuracy'), ) ############################################################################### dataset_params = dict( batch_max_length=batch_max_length, data_filter=True, character=character, )

test_dataset_params = dict( batch_max_length=batch_max_length, data_filter=False, character=character, )

data_root = '/home/tham/vedastr/data/data_lmdb_release/'

###############################################################################

3. test

batch_size = 64

data

test_root = data_root + 'evaluation/' test_folder_names = ['CAR_PLATE']

test_dataset = [dict(type='LmdbDataset', root=test_root + f_name, **test_dataset_params) for f_name in test_folder_names]

test = dict( data=dict( dataloader=dict( type='DataLoader', batch_size=batch_size, num_workers=4, shuffle=False, ), dataset=test_dataset, transform=deploy['transform'], ), postprocess_cfg=dict( sensitive=sensitive, character=character, ), )

###############################################################################

4. train

work directory

root_workdir = 'workdir'

data

train_root = data_root + 'training/'

MJ dataset

train_root_mj = train_root + 'MJ/' mj_folder_names = ['/MJ_test', 'MJ_valid', 'MJ_train']

ST dataset

train_root_st = train_root + 'ST/'

train_dataset_mj = [dict(type='LmdbDataset', root=train_root_mj + folder_name) for folder_name in mj_folder_names] train_dataset_st = [dict(type='LmdbDataset', root=train_root_st)]

CAR_PLATE dataset

train_root_car_plate = train_root + 'CAR_PLATE/'

train_dataset_car_plate = [dict(type='LmdbDataset', root=train_root_car_plate)]

valid

valid_root = data_root + 'validation/CAR_PLATE' valid_dataset = dict(type='LmdbDataset', root=valid_root, **dataset_params)

train transforms

train_transforms = [ dict(type='Sensitive', sensitive=sensitive, need_character=character), dict(type='ToGray'), dict(type='Resize', size=size), dict(type='Normalize', mean=mean, std=std), dict(type='ToTensor'), ]

max_iterations = 300000 milestones = [150000, 250000]

train = dict( data=dict( train=dict( dataloader=dict( type='DataLoader', batch_size=batch_size, num_workers=4, ), sampler=dict( type='BalanceSampler', batch_size=batch_size, shuffle=True, oversample=True, ), dataset=dict( type='ConcatDatasets', datasets=[ dict( type='ConcatDatasets', datasets=train_dataset_car_plate, ) ], batch_ratio=[1.0], **dataset_params, ), transform=train_transforms, ), val=dict( dataloader=dict( type='DataLoader', batch_size=batch_size, num_workers=4, shuffle=False, ), dataset=valid_dataset, transform=deploy['transform'], ), ), optimizer=dict(type='Adadelta', lr=1.0, rho=0.95, eps=1e-8), criterion=dict(type='CTCLoss', zero_infinity=True), lr_scheduler=dict(type='StepLR', iter_based=True, milestones=milestones, ), max_iterations=max_iterations, log_interval=10, trainval_ratio=2000, snapshot_interval=20000, save_best=True, resume=None, )

ChaseMonsterAway commented 2 years ago

Please set need_text=False in model part.

choozhenbo commented 2 years ago

Thank you. It works.