JiaquanYe / TableMASTER-mmocr

2nd solution of ICDAR 2021 Competition on Scientific Literature Parsing, Task B.
Apache License 2.0
442 stars 104 forks source link

when tun table_inference.py with one gpu of 2080ti load epoch_16_0.7767.pth model #33

Closed cqray1990 closed 3 years ago

cqray1990 commented 3 years ago

when tun table_inference.py with one gpu of 2080ti load epoch_16_0.7767.pth model and config files as follows:

base = [ '../../base/default_runtime.py' ]

alphabet_file = '/tools/data/alphabet/structure_alphabet.txt' alphabet_len = len(open(alphabet_file, 'r').readlines()) max_seq_len = 500

start_end_same = False label_convertor = dict( type='TableMasterConvertor', dict_file=alphabet_file, max_seq_len=max_seq_len, start_end_same=start_end_same, with_unknown=True)

if start_end_same: PAD = alphabet_len + 2 else: PAD = alphabet_len + 3

model = dict( type='TABLEMASTER', backbone=dict( type='TableResNetExtra', input_dim=3, gcb_config=dict( ratio=0.0625, headers=1, att_scale=False, fusion_type="channel_add", layers=[False, True, True, True], ), layers=[1,2,5,3]), encoder=dict( type='PositionalEncoding', d_model=512, dropout=0.2, max_len=5000), decoder=dict( type='TableMasterDecoder', N=3, decoder=dict( self_attn=dict( headers=8, d_model=512, dropout=0.), src_attn=dict( headers=8, d_model=512, dropout=0.), feed_forward=dict( d_model=512, d_ff=2024, dropout=0.), size=512, dropout=0.), d_model=512), loss=dict(type='MASTERTFLoss', ignore_index=PAD, reduction='mean'), bbox_loss=dict(type='TableL1Loss', reduction='sum'), label_convertor=label_convertor, max_seq_len=max_seq_len)

TRAIN_STATE = True img_norm_cfg = dict(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]) train_pipeline = [ dict(type='LoadImageFromNdarrayV2'), dict( type='TableResize', keep_ratio=True, long_size=480), dict( type='TablePad', size=(480, 480), pad_val=0, return_mask=True, mask_ratio=(8, 8), train_state=TRAIN_STATE), dict(type='TableBboxEncode'), dict(type='ToTensorOCR'), dict(type='NormalizeOCR', **img_norm_cfg), dict( type='Collect', keys=['img'], meta_keys=[ 'filename', 'ori_shape', 'img_shape', 'text', 'scale_factor', 'bbox', 'bbox_masks', 'pad_shape' ]), ]

valid_pipeline = [ dict(type='LoadImageFromNdarrayV2'), dict( type='TableResize', keep_ratio=True, long_size=480), dict( type='TablePad', size=(480, 480), pad_val=0, return_mask=True, mask_ratio=(8, 8), train_state=TRAIN_STATE), dict(type='TableBboxEncode'), dict(type='ToTensorOCR'), dict(type='NormalizeOCR', **img_norm_cfg), dict( type='Collect', keys=['img'], meta_keys=[ 'filename', 'ori_shape', 'img_shape', 'scale_factor', 'img_norm_cfg', 'ori_filename', 'bbox', 'bbox_masks', 'pad_shape' ]), ]

test_pipeline = [ dict(type='LoadImageFromNdarrayV2'), dict( type='TableResize', keep_ratio=True, long_size=480), dict( type='TablePad', size=(480, 480), pad_val=0, return_mask=True, mask_ratio=(8, 8), train_state=TRAIN_STATE),

dict(type='TableBboxEncode'),

dict(type='ToTensorOCR'),
dict(type='NormalizeOCR', **img_norm_cfg),
dict(
    type='Collect',
    keys=['img'],
    meta_keys=[
        'filename', 'ori_shape', 'img_shape', 'scale_factor',
        'img_norm_cfg', 'ori_filename', 'pad_shape'
    ]),

]

dataset_type = 'OCRDataset'

train_img_prefix = '/pubtabnet/pubtabnet/train'

train_anno_file1 = /StructureLabel_train'

train_img_prefix = "pubtabnet/pubtabnet/train" train_anno_file1 = "StructureLabel_train"

train_img_prefix = ''

train_anno_file1 = ''

train1 = dict( type=dataset_type, img_prefix=train_img_prefix, ann_file=train_anno_file1, loader=dict( type='TableMASTERLmdbLoader', repeat=1, max_seq_len=max_seq_len, parser=dict( type='TableMASTERLmdbParser', keys=['filename', 'text'], keys_idx=[0, 1], separator=' ')), pipeline=train_pipeline, test_mode=False)

valid_img_prefix = /pubtabnet/pubtabnet/val'

valid_anno_file1 = /StructureLabel_val'

valid_img_prefix = '/pubtabnet/pubtabnet/val' valid_anno_file1 = '/StructureLabel_val' valid = dict( type=dataset_type, img_prefix=valid_img_prefix, ann_file=valid_anno_file1, loader=dict( type='TableMASTERLmdbLoader', repeat=1, max_seq_len=max_seq_len, parser=dict( type='TableMASTERLmdbParser', keys=['filename', 'text'], keys_idx=[0, 1], separator=' ')), pipeline=valid_pipeline, dataset_info='table_master_dataset', test_mode=True)

test_img_prefix = /pubtabnet/pubtabnet/val'

test_anno_file1 = '/StructureLabel_val'

test_img_prefix = '/pubtabnet/pubtabnet/val' test_anno_file1 = '/StructureLabel_val' test = dict( type=dataset_type, img_prefix=test_img_prefix, ann_file=test_anno_file1, loader=dict( type='TableMASTERLmdbLoader', repeat=1, max_seq_len=max_seq_len, parser=dict( type='TableMASTERLmdbParser', keys=['filename', 'text'], keys_idx=[0, 1], separator=' ')), pipeline=test_pipeline, dataset_info='table_master_dataset', test_mode=True)

data = dict( samples_per_gpu=4, workers_per_gpu=2, train=dict(type='ConcatDataset', datasets=[train1]), val=dict(type='ConcatDataset', datasets=[valid]), test=dict(type='ConcatDataset', datasets=[test]))

optimizer

optimizer = dict(type='Ranger', lr=1e-3) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))

optimizer_config = dict(grad_clip=None)

learning policy

lr_config = dict( policy='step', warmup='linear', warmup_iters=50, warmup_ratio=1.0 / 3, step=[12, 15]) total_epochs = 17

evaluation

evaluation = dict(interval=1, metric='acc')

fp16

fp16 = dict(loss_scale='dynamic')

checkpoint setting

checkpoint_config = dict(interval=1)

log_config

log_config = dict( interval=100, hooks=[ dict(type='TextLoggerHook')

])

yapf:enable

dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)]

if raise find unused_parameters, use this.

find_unused_parameters = True

ret = input.softmax(dim) RuntimeError: CUDA out of memory. Tried to allocate 30.00 MiB (GPU 0; 10.76 GiB total capacity; 444.10 MiB already allocated; 24.56 MiB free; 604.00 MiB reserved in total by PyTorch)

it seems need more gpu memory