data = dict(
samples_per_gpu=2,
workers_per_gpu=0,
sampler=dict(
type='DistBatchBalancedSampler', # BatchBalancedSampler and DistBatchBalancedSampler
mode=1,
model 0: Balance in batch, calculate the epoch according to the first iterative data set
# model 1: Balance in batch, calculate the epoch according to the last iterative data set
# model 2: Balance in batch, record unused data
# model -1: Each dataset is directly connected and shuffled
),
train=dict(
batch_ratios=['1.0'],
dataset=dict(
ann_file=ann_files,
img_prefix=img_prefixes,
)
),
val=dict(
ann_file='/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/convert_test_without_iob.json',
img_prefix='/home/mdisk3/bianzhewu/dataset/wildreceipt/',
),
test=dict(
ann_file='/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/convert_test_without_iob.json',
img_prefix='/home/mdisk3/bianzhewu/dataset/wildreceipt/',
pipeline=test_pipeline
)
model 0: Balance in batch, calculate the epoch according to the first iterative data set
# model 1: Balance in batch, calculate the epoch according to the last iterative data set
# model 2: Balance in batch, record unused data
# model -1: Each dataset is directly connected and shuffled
),
train=dict(
type=train_dataset_type,
batch_ratios=['1.0'],
dataset=dict(
type=test_dataset_type,
ann_file=train_ann_files,
img_prefix=train_img_prefixes,
test_mode=False,
pipeline=train_pipeline)
),
val=dict(
type=test_dataset_type,
ann_file=test_ann_files,
img_prefix=test_img_prefixes,
pipeline=test_pipeline,
# classes='/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/class_list.txt'
),
test=dict(
type=test_dataset_type,
ann_file=test_ann_files,
img_prefix=test_img_prefixes,
pipeline=test_pipeline,
# classes='/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/class_list.txt'
))
您好!我在使用maskrcnn的config作用于wildreceipt数据集时,config文件如下 """ ####################################################################################################
Copyright Info : Copyright (c) Davar Lab @ Hikvision Research Institute. All rights reserved.
Filename : mask_rcnn_r50_r32_e2e_finetune_ic13.py
Abstract : Model settings for mask rcnn spotter end-to-end finetune on realdata.
Current Version: 1.0.0
Date : 2021-06-24
###################################################################################################### """ base = "./base.py" batch_max_length = 60
model = dict( rcg_roi_extractor=dict( type='MaskRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=(32, 200), sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32], delete=True, ), rcg_backbone=dict( type='ResNet32', input_channel=256, output_channel=256, delete=True, ), rcg_sequence_module=dict( rnn_modules=[ dict( type='BidirectionalLSTM', input_size=256, hidden_size=256, output_size=256, with_linear=True, bidirectional=True,), dict( type='BidirectionalLSTM', input_size=256, hidden_size=256, output_size=256, with_linear=True, bidirectional=True,), ]), rcg_sequence_head=dict( input_size=256, converter=dict( type='AttnLabelConverter', with_unknown=True, ), ), )
File prefix path of the traning dataset
img_prefixes = [ '/home/mdisk3/bianzhewu/dataset/wildreceipt/', ]
Dataset Name
ann_files = [ '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/convert_train_without_iob.json', ]
img_norm_cfg = dict(mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True)
train_pipeline = [ dict(type='DavarResize', img_scale=[(512, 512)], multiscale_mode='range', keep_ratio=True), ]
test_pipeline = [ dict(type='DavarLoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(512, 512), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='Normalize', **img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='DavarCollect', keys=['img']), ]) ]
data = dict( samples_per_gpu=2, workers_per_gpu=0, sampler=dict( type='DistBatchBalancedSampler', # BatchBalancedSampler and DistBatchBalancedSampler mode=1,
model 0: Balance in batch, calculate the epoch according to the first iterative data set
)
optimizer = dict(type='Adadelta', lr=1.0, weight_decay=1e-5) lr_config = dict(step=[40, 80, 120]) runner = dict(max_epochs=150) checkpoint_config = dict(interval=10, filename_tmpl='checkpoint/mask_rcnn_r50_r32_e2e_finetuneepoch{}.pth') work_dir = '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_spotting/mask_rcnn_spot/workspace/wildreceipt_img512_32_200_len60/log/' load_from = None evaluation = dict( interval=1, )
得到的精度如下: 2023-02-06 12:45:04,094 - davarocr - INFO - Detection evaluation results: Precision: 0.8855601146647181, Recall: 0.8610230626297956, hmean: 0.8731192330072876 2023-02-06 12:45:04,094 - davarocr - INFO - Spotting evaluation results: Precision: 0.6957450396267776, Recall: 0.6764673734834408, hmean: 0.6859707944248941
但是当我想在trie中使用maskrcnn,即将maskrcnn的config参数copy至trie中复现其检测识别的结果时(先取消信息抽取模块),其config文件如下: character = '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/flatten_dict.txt' batch_max_length = 60 type="SPOTTER"
model = dict( type='MaskRCNN_Trie', pretrained=None, backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(0, 1, 2, 3), frozen_stages=-1, style='pytorch'), neck=dict( type='FPN', in_channels=[256, 512, 1024, 2048], out_channels=256, num_outs=5), rpn_head=dict( type='RPNHead', in_channels=256, feat_channels=256, anchor_generator=dict( type='AnchorGenerator', scales=[8], ratios=[0.1, 0.2, 0.4, 0.8, 1.6, 3.2], strides=[4, 8, 16, 32, 64]), bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[.0, .0, .0, .0], target_stds=[1.0, 1.0, 1.0, 1.0]), loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=1.0)), roi_head=dict( type='StandardRoIHead', bbox_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=7, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), bbox_head=dict( type='Shared2FCBBoxHead', in_channels=256, fc_out_channels=1024, roi_feat_size=7, num_classes=1, bbox_coder=dict( type='DeltaXYWHBBoxCoder', target_means=[0., 0., 0., 0.], target_stds=[0.1, 0.1, 0.2, 0.2]), reg_class_agnostic=False, loss_cls=dict( type='CrossEntropyLoss', use_sigmoid=False, loss_weight=1.0), loss_bbox=dict(type='L1Loss', loss_weight=1.0)), mask_roi_extractor=dict( type='SingleRoIExtractor', roi_layer=dict(type='RoIAlign', output_size=14, sampling_ratio=0), out_channels=256, featmap_strides=[4, 8, 16, 32]), mask_head=dict( type='FCNMaskHead', num_convs=4, in_channels=256, conv_out_channels=256, num_classes=1, loss_mask=dict( type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))),
)
training and testing settings
train_cfg = dict() test_cfg = dict()
dataset settings
train_dataset_type = 'DavarMultiDataset' test_dataset_type = 'TextSpotDataset'
File prefix path of the traning dataset
train_img_prefixes = [ '/home/mdisk3/bianzhewu/dataset/wildreceipt/' ]
test_img_prefixes='/home/mdisk3/bianzhewu/dataset/wildreceipt/'
Dataset Name
train_ann_files = [ '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/convert_train_without_iob.json' ]
test_ann_files = '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/datalist/wildreceipt/convert_test_without_iob.json'
img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) train_pipeline = [ dict(type='DavarLoadImageFromFile',), dict(type='DavarLoadAnnotations', with_bbox=True, # Bounding Rect with_poly_mask=True, # Mask with_poly_bbox=True, # bouding poly with_label=True, # Bboxes' labels with_entity_label=True, with_care=True, # Ignore or not with_text=True, # Transcription with_cbbox=False, # Character bounding text_profile=dict(text_max_length=batch_max_length, sensitive='same', filtered=False) ), dict(type='ColorJitter', brightness=32.0 / 255, saturation=0.5), dict(type='Normalize', img_norm_cfg), dict(type='DavarRandomCrop', instance_key='gt_bboxes'), dict(type='RandomRotate', angles=[-15, 15], borderValue=(0, 0, 0)), dict(type='DavarResize', img_scale=[(512, 512)], multiscale_mode='value', keep_ratio=True), dict(type='Pad', size_divisor=32), dict(type='DavarDefaultFormatBundle'), dict(type='DavarCollect', keys=['img', 'gt_bboxes', 'gt_labels', 'gt_texts', 'gt_masks','gt_entity_labels']), ] test_pipeline = [ dict(type='DavarLoadImageFromFile',), dict( type='MultiScaleFlipAug', img_scale=(512, 512), flip=False, transforms=[ dict(type='Resize', keep_ratio=True), dict(type='Normalize', img_norm_cfg), dict(type='Pad', size_divisor=32), dict(type='ImageToTensor', keys=['img']), dict(type='DavarCollect', keys=['img',]), ]) ] data = dict( samples_per_gpu=2, workers_per_gpu=0, sampler=dict( type='DistBatchBalancedSampler', # BatchBalancedSampler and DistBatchBalancedSampler mode=1,
model 0: Balance in batch, calculate the epoch according to the first iterative data set
optimizer
find_unused_parameters = True
optimizer = dict(type='AdamW', betas=(0.9, 0.999), eps=1e-8, lr=1e-3, weight_decay=0)
optimizer = dict(type='Adadelta', lr=1.0, weight_decay=1e-5) optimizer_config = dict(grad_clip=dict(max_norm=5, norm_type=2)) lr_config = dict( policy='step', warmup='linear', warmup_iters=200, warmup_ratio=1.0 / 3, step=[40, 80, 120]) runner = dict(type='EpochBasedRunner', max_epochs=150)
checkpoint_config = dict(type='DavarCheckpointHook', interval=10, save_mode='general', metric='hmean', filenametmpl='checkpoint/wildreceipt{}.pth', save_last=False)
yapf:disable
log_config = dict( interval=50, hooks=[ dict(type='TextLoggerHook'), ])
yapf:enable
runtime settings
dist_params = dict(backend='nccl') log_level = 'INFO' work_dir = '/home/mdisk3/bianzhewu/DAVAR-Lab-OCR-dev/demo/text_ie/trie/log/wildreceipt_maskrcnn_e2e_img512_roi_32_200_checkspotter' load_from = None resume_from = None workflow = [('train', 1)]
evaluation = dict( model_type=type, type="DavarEvalHook", interval=1, eval_func_params=dict(
SPECIALCHARACTERS="[]+-#$()@=!?,:;/.%&'\">*|<`{~}^\ ",
) 其精度如下: 2023-02-06 12:50:39,579 - davarocr - INFO - Detection evaluation results: Precision: 0.9122613326406027, Recall: 0.7676795278172478, hmean: 0.8337488129154795 2023-02-06 12:50:39,579 - davarocr - INFO - Spotting evaluation results: Precision: 0.5935186387842577, Recall: 0.49945349218493823, hmean: 0.5424382716049382 识别部分差距很大,请问maskrcnn的config文件做过特殊处理吗,为什么近乎相同的config文件会得出差距如此大的结果
谢谢!