Closed zouhan6806504 closed 1 month ago
新配置改成这样能运行了,不过eval的时候neck还是不会经过数据,这是正常的吗?
metric: COCO
num_classes: 1
TrainDataset:
name: COCODataSet
image_dir: ali_fake
anno_path: train.json
#- dataset_dir: /home/aistudio/data
#= dataset_dir: /root/paddlejob/workspace/train_data/datasets/GAIIC2024s1/train
data_fields: ['image', 'gt_bbox', 'gt_class', 'is_crowd']
EvalDataset:
name: COCODataSet
image_dir: ali_fake
anno_path: eval.json
#- dataset_dir: /home/aistudio/data
#= dataset_dir: /root/paddlejob/workspace/train_data/datasets/GAIIC2024s1/val
allow_empty: true
TestDataset:
name: ImageFolder
anno_path: test.json # also support txt (like VOC's label_list.txt)
#- dataset_dir: /home/aistudio/data # if set, anno_path will be 'dataset_dir/anno_path'
#= dataset_dir: /root/paddlejob/workspace
use_gpu: true
use_xpu: false
use_mlu: false
use_npu: false
log_iter: 100
#-save_dir: /home/aistudio/output
#=save_dir: /root/paddlejob/workspace/output
snapshot_epoch: 1
print_flops: false
print_params: false
# Exporting the model
export:
post_process: True # Whether post-processing is included in the network when export model.
nms: True # Whether NMS is included in the network when export model.
benchmark: False # It is used to testing model performance, if set `True`, post-process and NMS will not be exported.
fuse_conv_bn: False
epoch: 10
#=weights: /root/paddlejob/workspace/output/rtdetr_swin
#-weights: /home/aistudio/output/rtdetr
find_unused_parameters: True
LearningRate:
base_lr: 0.00005
schedulers:
- name: CosineDecay
max_epochs: 12
- name: LinearWarmup
start_factor: 0.
epochs: 1
OptimizerBuilder:
clip_grad_by_norm: 0.1
regularizer: false
optimizer:
type: AdamW
weight_decay: 0.0001
param_groups:
- params: ['absolute_pos_embed', 'relative_position_bias_table', 'norm']
weight_decay: 0.0
architecture: DETR
norm_type: sync_bn
use_ema: True
ema_decay: 0.9999
ema_decay_type: "exponential"
ema_filter_no_grad: True
hidden_dim: 256
use_focal_loss: True
eval_size: [512, 512]
pretrain_weights: https://bj.bcebos.com/v1/paddledet/models/dino_swin_large_384_4scale_3x_coco.pdparams
DETR:
backbone: SwinTransformer_EAEF
neck: HybridEncoder
transformer: RTDETRTransformer
detr_head: DINOHead
post_process: DETRPostProcess
#SwinTransformer_EAEF:
# is_gcm: false
HybridEncoder:
hidden_dim: 256
in_channels: [192, 384, 768, 1536]
feat_strides: [4, 8, 16, 32]
use_encoder_idx: [2]
num_encoder_layers: 6 #
encoder_layer:
name: TransformerLayer
d_model: 256
nhead: 8
dim_feedforward: 2048 #
dropout: 0.
activation: 'gelu'
expansion: 1.0
RTDETRTransformer:
num_queries: 300
position_embed_type: sine
backbone_feat_channels: [192, 384, 768, 1536]
feat_strides: [4, 8, 16, 32]
num_levels: 4
nhead: 8
num_decoder_layers: 6
dim_feedforward: 2048 #
dropout: 0.0
activation: relu
num_denoising: 100
label_noise_ratio: 0.5
box_noise_scale: 1.0
learnt_init_query: False
DINOHead:
loss:
name: DINOLoss
loss_coeff: {class: 1, bbox: 5, giou: 2}
aux_loss: True
use_vfl: True
matcher:
name: HungarianMatcher
matcher_coeff: {class: 2, bbox: 5, giou: 2}
DETRPostProcess:
num_top_queries: 300
worker_num: 1
TrainReader:
sample_transforms:
- Decode: {}
# - RandomDistort: {prob: 0.5, saturation: [0.5, 3, 0.5], contrast: [0.5, 2, 0.5]}
# - RandomExpand: {fill_value: [123.675, 116.28, 103.53]}
# - RandomCrop: {prob: 0.8}
- RandomFlip: {}
batch_transforms:
- BatchRandomResize: {target_size: [512], random_size: True, random_interp: True, keep_ratio: False}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- NormalizeBox: {}
- BboxXYXY2XYWH: {}
- Permute: {}
batch_size: 2
shuffle: true
drop_last: true
collate_batch: false
use_shared_memory: false
EvalReader:
sample_transforms:
- Decode: {}
- Resize: {target_size: [512, 512], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
TestReader:
inputs_def:
image_shape: [3, 512, 512]
sample_transforms:
- Decode: {}
- Resize: {target_size: [512, 512], keep_ratio: False, interp: 2}
- NormalizeImage: {mean: [0., 0., 0.], std: [1., 1., 1.], norm_type: none}
- Permute: {}
batch_size: 1
shuffle: false
drop_last: false
看下训练和评估是不是走的不是同一个forward
看下训练和评估是不是走的不是同一个forward
醉了,现在去运行又是没问题的了。。估计是魔改中动到什么别的地方,导致异常,当时又没发现。现在改完能运行了,此问题先关了
问题确认 Search before asking
请提出你的问题 Please ask your question
配置如下,backbone是一个魔改网络
启动命令 !python /home/aistudio/PaddleDetection-release-2.7/tools/train.py -c /home/aistudio/rtdetr_swin_convnext.yml --eval
eval阶段报异常
我追踪了一下architecture: DETR里面的代码
发现在eval阶段,neck里面不打印,也就是self.neck is None,只输出了backbone feature bf in bf [1, 384, 64, 64] bf [1, 768, 32, 32] bf [1, 1536, 16, 16] 为什么会出现这种情况?