Closed kevinzezel closed 3 months ago
how to u annotate the datset..could u explain.. thx in advance @kevinzezel
ch_
is used for Chinese, 'en_' is used for English
@tzktok @GreatV Thanks for the quick response. Follow my files
Sample of Train.txt:
/root/train_dataset/v2-det/imgs/Train/d1ea1a2e6a3740048f5ea6d091b545c7.jpg [{"transcription": "AZT7235", "points": [[87, 37], [163, 35], [163, 52], [88, 54]]}]
/root/train_dataset/v2-det/imgs/Train/00d537f64c814ddd832d5caaf78ffa16.jpg [{"transcription": "AOZ4677", "points": [[101, 47], [183, 48], [183, 69], [100, 68]]}]
/root/train_dataset/v2-det/imgs/Train/65f2a95eee1a4ef987c20f697962444c.jpg [{"transcription": "ARO2I68", "points": [[87, 34], [150, 35], [150, 50], [87, 49]]}]
/root/train_dataset/v2-det/imgs/Train/a2ad35b967cd4aad82639b71668ec0e5.jpg [{"transcription": "FPZ8I83", "points": [[98, 42], [171, 42], [171, 58], [98, 58]]}]
/root/train_dataset/v2-det/imgs/Train/8433e07446b640d5be3604e446826662.jpg [{"transcription": "ARN7E15", "points": [[92, 37], [162, 36], [163, 53], [92, 55]]}]
/root/train_dataset/v2-det/imgs/Train/401f5fefa5104fb391864d9107bc408d.jpg [{"transcription": "HHK2374", "points": [[78, 38], [148, 40], [147, 55], [78, 54]]}]
/root/train_dataset/v2-det/imgs/Train/657a590260ab44eb857e44d0660c9b68.jpg [{"transcription": "MHG", "points": [[129, 219], [175, 157], [203, 177], [157, 240]]}, {"transcription": "0131", "points": [[151, 246], [194, 182], [222, 201], [178, 265]]}]
Sample of Val.txt:
/root/train_dataset/v2-det/imgs/Val/9273322ce5d642e488233bef39bd7ab4.jpg [{"transcription": "AYB2776", "points": [[83, 34], [155, 38], [155, 53], [83, 49]]}]
/root/train_dataset/v2-det/imgs/Val/96df43de1ec64905958169a1b55943f9.jpg [{"transcription": "REE1491", "points": [[86, 33], [158, 35], [157, 52], [86, 50]]}]
/root/train_dataset/v2-det/imgs/Val/d37a0268537643e2821965ca692b41a5.jpg [{"transcription": "RYU7068", "points": [[97, 71], [160, 88], [154, 111], [91, 94]]}]
/root/train_dataset/v2-det/imgs/Val/09fba0919dde4f8492b082e91fece609.jpg [{"transcription": "ATX6I80", "points": [[94, 45], [168, 45], [168, 65], [94, 65]]}]
Sample of img dataset:
Pretrained model:
en_PP-OCRv3_det_distill_train/student.pdparams
ch_PP-OCRv3_det_student.yml
Global:
debug: false
use_gpu: true
epoch_num: 500
log_smooth_window: 20
print_batch_step: 10
save_model_dir: ./output/det_ppocr_v3_Train_v2
save_epoch_step: 100
eval_batch_step:
- 0
- 400
cal_metric_during_train: false
pretrained_model: /root/src/pretrain_models/en_PP-OCRv3_det_distill_train/student.pdparams
checkpoints: null
save_inference_dir: null
use_visualdl: false
infer_img: /root/paddle/PaddleOCR/doc/imgs_words/ch/word_1.jpg
save_res_path: ./output/det/predicts_ppocrv3_Train_v2.txt
distributed: true
Architecture:
model_type: det
algorithm: DB
Transform:
Backbone:
name: MobileNetV3
scale: 0.5
model_name: large
disable_se: True
Neck:
name: RSEFPN
out_channels: 96
shortcut: True
Head:
name: DBHead
k: 50
Loss:
name: DBLoss
balance_loss: true
main_loss_type: DiceLoss
alpha: 5
beta: 10
ohem_ratio: 3
Optimizer:
name: Adam
beta1: 0.9
beta2: 0.999
lr:
name: Cosine
learning_rate: 0.0001
warmup_epoch: 2
regularizer:
name: L2
factor: 0
PostProcess:
name: DBPostProcess
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
Metric:
name: DetMetric
main_indicator: hmean
Train:
dataset:
name: SimpleDataSet
data_dir: /root/train_dataset/v2-det/imgs/Train
label_file_list:
- /root/train_dataset/v2-det/imgs/Train.txt
ratio_list: [1.0]
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- DetLabelEncode: null
- IaaAugment:
augmenter_args:
- type: Fliplr
args:
p: 0.5
- type: Affine
args:
rotate:
- -10
- 10
- type: Resize
args:
size:
- 0.5
- 3
- EastRandomCropData:
size:
- 960
- 960
max_tries: 50
keep_ratio: true
- MakeBorderMap:
shrink_ratio: 0.4
thresh_min: 0.3
thresh_max: 0.7
- MakeShrinkMap:
shrink_ratio: 0.4
min_text_size: 8
- NormalizeImage:
scale: 1./255.
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- image
- threshold_map
- threshold_mask
- shrink_map
- shrink_mask
loader:
shuffle: true
drop_last: false
batch_size_per_card: 8
num_workers: 1
Eval:
dataset:
name: SimpleDataSet
data_dir: /root/train_dataset/v2-det/imgs/Val
label_file_list:
- /root/train_dataset/v2-det/imgs/Val.txt
transforms:
- DecodeImage:
img_mode: BGR
channel_first: false
- DetLabelEncode: null
- DetResizeForTest: null
- NormalizeImage:
scale: 1./255.
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
order: hwc
- ToCHWImage: null
- KeepKeys:
keep_keys:
- image
- shape
- polys
- ignore_tags
loader:
shuffle: false
drop_last: false
batch_size_per_card: 1
num_workers: 1
问题描述 / Problem Description
Hi,
Which .yml file should I use to perform text detection finetuning in English?
And which pretrained model should I start to do the finetuning in English?
I tried using the template ch_PP-OCRv3_det_student.yml and pre trained model en_PP-OCRv3_det_distill_train/student.pdparams. However, the loss error never drops with each iteration, it is always fixed at 1.
运行环境 / Runtime Environment
复现代码 / Reproduction Code
python3 /root/paddle/PaddleOCR/tools/train.py -c /root/src/ch_PP-OCRv3_det_student.yml
完整报错 / Complete Error Message