双svtr distill，竖排，batchsize设置到2都会Out of memory error on GPU

aistudio 4卡gpu环境，paddleocr 2.6.0 中文古语识别，竖排文字，所以dataset里面的w、h我调换了顺序，设置成了[3, 640, 32] ， reconaug设置了[640, 32, 3] 文本最长50字，已经根据gt切成了小图启动命令

!python3 -m paddle.distributed.launch --ips="localhost" --gpus '0,1,2,3' /home/aistudio/PaddleOCR-release-2.6/tools/train.py \
    -c /home/aistudio/rec_svtrnet_dml.yml

详细配置

Global:
  debug: false
  use_gpu: true
  epoch_num: 1000
  log_smooth_window: 20
  print_batch_step: 100
  save_model_dir: /home/aistudio/rec_svtrbet_dml
  save_epoch_step: 1
  eval_batch_step: [0, 318]
  cal_metric_during_train: false
  pretrained_model:
  checkpoints:
  save_inference_dir:
  use_visualdl: false
  infer_img: /home/aistudio/infer1.png
  character_dict_path: /home/aistudio/code2.txt
  max_text_length: &max_text_length 50
  infer_mode: false
  use_space_char: true
  distributed: true
  save_res_path: /home/aistudio/rec_svtrbet_dml_srp.txt

Optimizer:
  name: AdamW
  beta1: 0.9
  beta2: 0.99
  epsilon: 8.0e-08
#  weight_decay: 0.05
  no_weight_decay_name: norm pos_embed
  one_dim_param_no_weight_decay: true
  lr:
    name: Cosine
    learning_rate: 0.0005
    warmup_epoch: 5
  regularizer:
    name: L2
    factor: 3.0e-05

Architecture:
  model_type: &model_type "rec"
  name: DistillationModel
  algorithm: Distillation
  Models:
    Teacher:
      pretrained:
      freeze_params: false
      return_all_feats: true
      model_type: *model_type
      algorithm: SVTR
      Transform:
      Backbone:
        name: SVTRNet
        img_size:
          - 640
          - 32
        out_char_num: 15158
        out_channels: 96
        patch_merging: Conv
        embed_dim:
          - 64
          - 128
          - 256
        depth:
          - 3
          - 6
          - 3
        num_heads:
          - 2
          - 4
          - 8
        mixer:
          - Local
          - Local
          - Local
          - Local
          - Local
          - Local
          - Global
          - Global
          - Global
          - Global
          - Global
          - Global
        local_mixer:
          - - 7
            - 11
          - - 7
            - 11
          - - 7
            - 11
        last_stage: true
        prenorm: false
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 64
                depth: 2
                hidden_dims: 120
                use_guide: True
              Head:
                fc_decay: 0.00001
          - SARHead:
              enc_dim: 512
              max_text_length: *max_text_length
    Student:
      pretrained:
      freeze_params: false
      return_all_feats: true
      model_type: *model_type
      algorithm: SVTR
      Transform:
      Backbone:
        name: SVTRNet
        img_size:
          - 640
          - 32
        out_char_num: 15158
        out_channels: 96
        patch_merging: Conv
        embed_dim:
          - 64
          - 128
          - 256
        depth:
          - 3
          - 6
          - 3
        num_heads:
          - 2
          - 4
          - 8
        mixer:
          - Local
          - Local
          - Local
          - Local
          - Local
          - Local
          - Global
          - Global
          - Global
          - Global
          - Global
          - Global
        local_mixer:
          - - 7
            - 11
          - - 7
            - 11
          - - 7
            - 11
        last_stage: true
        prenorm: false
      Head:
        name: MultiHead
        head_list:
          - CTCHead:
              Neck:
                name: svtr
                dims: 64
                depth: 2
                hidden_dims: 120
                use_guide: True
              Head:
                fc_decay: 0.00001
          - SARHead:
              enc_dim: 512
              max_text_length: *max_text_length

Loss:
  name: CombinedLoss
  loss_config_list:
  - DistillationDMLLoss:
      weight: 1.0
      act: "softmax"
      use_log: true
      model_name_pairs:
      - ["Student", "Teacher"]
      key: head_out
      multi_head: True
      dis_head: ctc
      name: dml_ctc
  - DistillationDMLLoss:
      weight: 0.5
      act: "softmax"
      use_log: true
      model_name_pairs:
      - ["Student", "Teacher"]
      key: head_out
      multi_head: True
      dis_head: sar
      name: dml_sar
  - DistillationDistanceLoss:
      weight: 1.0
      mode: "l2"
      model_name_pairs:
      - ["Student", "Teacher"]
      key: backbone_out
  - DistillationCTCLoss:
      weight: 1.0
      model_name_list: ["Student", "Teacher"]
      key: head_out
      multi_head: True
  - DistillationSARLoss:
      weight: 1.0
      model_name_list: ["Student", "Teacher"]
      key: head_out
      multi_head: True

PostProcess:
  name: DistillationCTCLabelDecode
  model_name: ["Student", "Teacher"]
  key: head_out
  multi_head: True

Metric:
  name: DistillationMetric
  base_metric_name: RecMetric
  main_indicator: acc
  key: "Student"
  ignore_space: False

Train:
  dataset:
    name: SimpleDataSet
    data_dir: /home/aistudio/data/det_images
    ext_op_transform_idx: 1
    label_file_list:
    - /home/aistudio/rec_train_0.8_222.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - RecConAug:
        prob: 0.5
        ext_data_num: 2
        image_shape: [640, 32, 3]
    - RecAug:
    - MultiLabelEncode:
    - RecResizeImg:
        image_shape: [3, 640, 32]
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_sar
        - length
        - valid_ratio
  loader:
    shuffle: true
    batch_size_per_card: 2
    drop_last: true
    num_workers: 0

Eval:
  dataset:
    name: SimpleDataSet
    data_dir: /home/aistudio/data/det_images
    label_file_list:
    - /home/aistudio/rec_eval_0.8_222.txt
    transforms:
    - DecodeImage:
        img_mode: BGR
        channel_first: false
    - MultiLabelEncode:
    - RecResizeImg:
        image_shape: [3, 640, 32]
    - KeepKeys:
        keep_keys:
        - image
        - label_ctc
        - label_sar
        - length
        - valid_ratio
  loader:
    shuffle: false
    drop_last: false
    batch_size_per_card: 2
    num_workers: 0

检查图片 image_0_100_s0 image_0_100_s29 image_6_100_s14 image_8_66_s10 image_10_94_s63

这个问题可能由什么引起？4卡128g都爆了

PaddlePaddle / PaddleOCR

双svtr distill，竖排，batchsize设置到2都会Out of memory error on GPU #7662