open-mmlab / mmpose

OpenMMLab Pose Estimation Toolbox and Benchmark.
https://mmpose.readthedocs.io/en/latest/
Apache License 2.0
5.67k stars 1.23k forks source link

[Bug] Add new loss can't backward #2788

Closed jackweiwang closed 11 months ago

jackweiwang commented 11 months ago

Prerequisite

Environment

OrderedDict([('sys.platform', 'linux'), ('Python', '3.8.17 (default, Jul 5 2023, 21:04:15) [GCC 11.2.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0', 'NVIDIA GeForce RTX 3070 Ti'), ('CUDA_HOME', '/usr/local/cuda'), ('NVCC', 'Cuda compilation tools, release 11.4, V11.4.152'), ('GCC', 'gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0'), ('PyTorch', '1.12.1+cu113'), ('PyTorch compiling details', 'PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2023.1-Product Build 20230303 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.3\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86\n - CuDNN 8.9.2 (built against CUDA 11.8)\n - Built with CuDNN 8.3.2\n - Magma 2.5.2\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.3, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -fabi-version=11 -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.1, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n'), ('TorchVision', '0.13.1+cu113'), ('OpenCV', '4.6.0'), ('MMEngine', '0.9.0'), ('MMPose', '1.2.0+')])

mmaction2 1.1.0 /home/ww/work/mmaction2-main mmcv 2.1.0 mmcv-full 1.7.1 mmdeploy 1.3.0 /home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmdeploy-1.3.0-py3.8-linux-x86_64.egg mmdet 3.2.0 mmengine 0.9.0 mmpose 1.2.0 /home/ww/work/hand_landmarks/mmpose-main timm 0.8.13.dev0

Reproduces the problem - code sample

In rtmcc_head.py , i add a new loss function "self.loss_module_two(pre_cls1,cls_label)"

def loss( self, feats: Tuple[Tensor], batch_data_samples: OptSampleList, train_cfg: OptConfigType = {}, ) -> dict:

    pred_x, pred_y, pre_cls = self.forward(feats)

    gt_x = torch.cat([
        d.gt_instance_labels.keypoint_x_labels for d in batch_data_samples
    ],
                     dim=0)
    gt_y = torch.cat([
        d.gt_instance_labels.keypoint_y_labels for d in batch_data_samples
    ],
                     dim=0)

    #cls_label = torch.cat(,dim=0)
    cls_label = torch.tensor([
        d.gt_instances.category_id[0] for d in batch_data_samples
    ],device=gt_x.device)

    pre_cls1 = torch.tensor(pre_cls,device=gt_x.device)

    keypoint_weights = torch.cat(
        [
            d.gt_instance_labels.keypoint_weights
            for d in batch_data_samples
        ],
        dim=0,
    )
    #print(keypoint_weights)
    pred_simcc = (pred_x, pred_y)
    gt_simcc = (gt_x, gt_y)

    # calculate losses
    losses = dict()

    loss = self.loss_module(pred_simcc, gt_simcc, keypoint_weights)

    losses.update(loss_kpt=loss)

    #losses1 = dict()

    #cls_label.requires_grad_(True)
    loss1 = self.loss_module_two(pre_cls1,cls_label)
    losses.update(loss_cls=loss1)

    # calculate accuracy
    _, avg_acc, _ = simcc_pck_accuracy(
        output=to_numpy(pred_simcc),
        target=to_numpy(gt_simcc),
        simcc_split_ratio=self.simcc_split_ratio,
        mask=to_numpy(keypoint_weights) > 0,
    )

    acc_pose = torch.tensor(avg_acc, device=gt_x.device)
    losses.update(acc_pose=acc_pose)

    return losses

In rtmpose-m_8xb256-210e_hand5-128x128_finetune_1030.py add loss_cls

`

model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[0, 0, 0], std=[1, 1, 1], bgr_to_rgb=True), backbone=dict( type='fasternet_m', style='pytorch',

init_cfg=dict(

    #     type='Pretrained',
    #     checkpoint='../model_ckpt/fasternet_s-epoch=299-val_acc1=81.2840.pth',
    #     ),
    init_cfg=None,
    ),
head=dict(
    type='RTMCCHead',
    in_channels=1152,
    out_channels=21,
    input_size=codec['input_size'],
    in_featuremap_size=tuple([s // 32 for s in codec['input_size']]),
    simcc_split_ratio=codec['simcc_split_ratio'],
    final_layer_kernel_size=7,
    gau_cfg=dict(
        hidden_dims=128,
        s=128,
        expansion_factor=2,
        dropout_rate=0.,
        drop_path=0.,
        act_fn='SiLU',
        use_rel_bias=False,
        pos_enc=False),
    loss=dict(
        type='KLDiscretLoss',
        use_target_weight=True,
        beta=10.,
        label_softmax=True),
    loss_cls=dict(
        type='CrossEntropyLoss', loss_weight=1.0, avg_non_ignore=True),
    decoder=codec),
test_cfg=dict(flip_test=True, ))

`

Reproduces the problem - command or script

python tools/train.py configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-128x128_finetune_1030.py

Reproduces the problem - error message

The log loss_cls can't backward 11/01 09:45:34 - mmengine - INFO - Epoch(train) [1][ 50/1573] base_lr: 2.452928e-04 lr: 2.452928e-04 eta: 5:15:54 time: 0.150680 data_time: 0.033244 memory: 2289 loss: 2.395269 loss_kpt: 0.740435 loss_cls: 1.654834 acc_pose: 0.268350 11/01 09:45:40 - mmengine - INFO - Epoch(train) [1][ 100/1573] base_lr: 4.955405e-04 lr: 4.955405e-04 eta: 4:39:16 time: 0.115845 data_time: 0.016294 memory: 2289 loss: 2.079498 loss_kpt: 0.518610 loss_cls: 1.560888 acc_pose: 0.452777 11/01 09:45:46 - mmengine - INFO - Epoch(train) [1][ 150/1573] base_lr: 7.457883e-04 lr: 7.457883e-04 eta: 4:26:18 time: 0.114858 data_time: 0.014727 memory: 2289 loss: 1.972523 loss_kpt: 0.456859 loss_cls: 1.515664 acc_pose: 0.528104 11/01 09:45:52 - mmengine - INFO - Epoch(train) [1][ 200/1573] base_lr: 9.960360e-04 lr: 9.960360e-04 eta: 4:21:00 time: 0.117196 data_time: 0.016933 memory: 2289 loss: 1.873337 loss_kpt: 0.402409 loss_cls: 1.470928 acc_pose: 0.495523 11/01 09:45:57 - mmengine - INFO - Epoch(train) [1][ 250/1573] base_lr: 1.246284e-03 lr: 1.246284e-03 eta: 4:15:23 time: 0.111495 data_time: 0.011913 memory: 2289 loss: 1.897515 loss_kpt: 0.381719 loss_cls: 1.515797 acc_pose: 0.681330 11/01 09:46:03 - mmengine - INFO - Epoch(train) [1][ 300/1573] base_lr: 1.496532e-03 lr: 1.496532e-03 eta: 4:12:19 time: 0.113485 data_time: 0.013815 memory: 2289 loss: 1.787673 loss_kpt: 0.367470 loss_cls: 1.420203 acc_pose: 0.572073 11/01 09:46:09 - mmengine - INFO - Epoch(train) [1][ 350/1573] base_lr: 1.746779e-03 lr: 1.746779e-03 eta: 4:10:30 time: 0.114870 data_time: 0.013414 memory: 2289 loss: 1.864649 loss_kpt: 0.366143 loss_cls: 1.498507 acc_pose: 0.590961 11/01 09:46:14 - mmengine - INFO - Epoch(train) [1][ 400/1573] base_lr: 1.997027e-03 lr: 1.997027e-03 eta: 4:08:42 time: 0.113243 data_time: 0.013339 memory: 2289 loss: 1.993153 loss_kpt: 0.348369 loss_cls: 1.644783 acc_pose: 0.588824 11/01 09:46:20 - mmengine - INFO - Epoch(train) [1][ 450/1573] base_lr: 2.247275e-03 lr: 2.247275e-03 eta: 4:07:44 time: 0.115209 data_time: 0.013628 memory: 2289 loss: 2.079970 loss_kpt: 0.325085 loss_cls: 1.754886 acc_pose: 0.668368 11/01 09:46:26 - mmengine - INFO - Epoch(train) [1][ 500/1573] base_lr: 2.497523e-03 lr: 2.497523e-03 eta: 4:06:30 time: 0.113188 data_time: 0.012969 memory: 2289 loss: 1.924996 loss_kpt: 0.323537 loss_cls: 1.601459 acc_pose: 0.664533 11/01 09:46:32 - mmengine - INFO - Epoch(train) [1][ 550/1573] base_lr: 2.747770e-03 lr: 2.747770e-03 eta: 4:05:52 time: 0.115178 data_time: 0.015006 memory: 2289 loss: 1.903998 loss_kpt: 0.321639 loss_cls: 1.582359 acc_pose: 0.730886 11/01 09:46:37 - mmengine - INFO - Epoch(train) [1][ 600/1573] base_lr: 2.998018e-03 lr: 2.998018e-03 eta: 4:05:07 time: 0.113956 data_time: 0.013701 memory: 2289 loss: 2.005498 loss_kpt: 0.309604 loss_cls: 1.695894 acc_pose: 0.731256 11/01 09:46:43 - mmengine - INFO - Epoch(train) [1][ 650/1573] base_lr: 3.248266e-03 lr: 3.248266e-03 eta: 4:04:25 time: 0.113692 data_time: 0.013738 memory: 2289 loss: 2.069419 loss_kpt: 0.305414 loss_cls: 1.764004 acc_pose: 0.794643 11/01 09:46:49 - mmengine - INFO - Epoch(train) [1][ 700/1573] base_lr: 3.498514e-03 lr: 3.498514e-03 eta: 4:04:02 time: 0.115251 data_time: 0.015262 memory: 2289 loss: 2.028992 loss_kpt: 0.309558 loss_cls: 1.719434 acc_pose: 0.656206 11/01 09:46:54 - mmengine - INFO - Epoch(train) [1][ 750/1573] base_lr: 3.748761e-03 lr: 3.748761e-03 eta: 4:03:30 time: 0.113911 data_time: 0.013939 memory: 2289 loss: 1.946195 loss_kpt: 0.300587 loss_cls: 1.645608 acc_pose: 0.648774 11/01 09:47:00 - mmengine - INFO - Epoch(train) [1][ 800/1573] base_lr: 3.999009e-03 lr: 3.999009e-03 eta: 4:03:13 time: 0.115278 data_time: 0.015364 memory: 2289 loss: 2.078924 loss_kpt: 0.290376 loss_cls: 1.788548 acc_pose: 0.755123 11/01 09:47:06 - mmengine - INFO - Epoch(train) [1][ 850/1573] base_lr: 4.249257e-03 lr: 4.249257e-03 eta: 4:02:51 time: 0.114483 data_time: 0.014610 memory: 2289 loss: 2.188359 loss_kpt: 0.294796 loss_cls: 1.893563 acc_pose: 0.763538 11/01 09:47:12 - mmengine - INFO - Epoch(train) [1][ 900/1573] base_lr: 4.499505e-03 lr: 4.499505e-03 eta: 4:02:23 time: 0.113429 data_time: 0.013569 memory: 2289 loss: 1.990718 loss_kpt: 0.283184 loss_cls: 1.707534 acc_pose: 0.767852

Additional information

1.I hope, both kpt and cls loss backward

Ben-Louis commented 11 months ago

The cls loss should have generated a backward gradient according to your implementation. Are you asking why the loss is not reducing?

jackweiwang commented 11 months ago

The cls loss should have generated a backward gradient according to your implementation. Are you asking why the loss is not reducing?

What i means is the loss function can't backward.

exp: If I use this function alone loss1 = self.loss_module_two(pre_cls1,cls_label) losses.update(loss_cls=loss1)

log: python tools/train.py configs/hand_2d_keypoint/rtmpose/hand5/rtmpose-m_8xb256-210e_hand5-128x128_finetune.py 11/02 10:59:07 - mmengine - INFO - System environment: sys.platform: linux Python: 3.8.17 (default, Jul 5 2023, 21:04:15) [GCC 11.2.0] CUDA available: True numpy_random_seed: 21 GPU 0: NVIDIA GeForce RTX 3070 Ti CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.4, V11.4.152 GCC: gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0 PyTorch: 1.12.1+cu113 PyTorch compiling details: PyTorch built with:

Runtime environment: cudnn_benchmark: False mp_cfg: {'mp_start_method': 'fork', 'opencv_num_threads': 0} dist_cfg: {'backend': 'nccl'} seed: 21 Distributed launcher: none Distributed training: False GPU number: 1

11/02 10:59:07 - mmengine - INFO - Config: auto_scale_lr = dict(base_batch_size=32) backend_args = dict(backend='local') base_lr = 0.005 codec = dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False) custom_hooks = [ dict( ema_type='ExpMomentumEMA', momentum=0.0002, priority=49, type='EMAHook', update_buffers=True), dict( switch_epoch=70, switch_pipeline=[ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict( rotate_factor=180, scale_factor=[ 0.75, 1.25, ], shift_factor=0.0, type='RandomBBoxTransform'), dict(direction='horizontal', type='RandomFlip'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='mmdet.YOLOXHSVRandomAug'), dict( transforms=[ dict(p=0.2, type='Blur'), dict(p=0.2, type='MedianBlur'), dict( max_height=0.4, max_holes=1, max_width=0.4, min_height=0.2, min_holes=1, min_width=0.2, p=0.5, type='CoarseDropout'), ], type='Albumentation'), dict( encoder=dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False), type='GenerateTarget'), dict(type='PackPoseInputs'), ], type='mmdet.PipelineSwitchHook'), ] data_mode = 'topdown' data_root = '/home/ww/work/hand_landmarks/mmpose-main/data/ourdataset' dataset_type = 'OneHand10KDataset' default_hooks = dict( badcase=dict( badcase_thr=5, enable=False, metric_type='loss', out_dir='badcase', type='BadCaseAnalysisHook'), checkpoint=dict( interval=10, max_keep_ckpts=1, rule='greater', save_best='AUC', type='CheckpointHook'), logger=dict(interval=50, type='LoggerHook'), param_scheduler=dict(type='ParamSchedulerHook'), sampler_seed=dict(type='DistSamplerSeedHook'), timer=dict(type='IterTimerHook'), visualization=dict(enable=False, type='PoseVisualizationHook')) default_scope = 'mmpose' env_cfg = dict( cudnn_benchmark=False, dist_cfg=dict(backend='nccl'), mp_cfg=dict(mp_start_method='fork', opencv_num_threads=0)) launcher = 'none' load_from = None log_level = 'INFO' log_processor = dict( by_epoch=True, num_digits=6, type='LogProcessor', window_size=50) max_epochs = 80 model = dict( backbone=dict(init_cfg=None, style='pytorch', type='fasternet_m'), data_preprocessor=dict( bgr_to_rgb=True, mean=[ 0, 0, 0, ], std=[ 1, 1, 1, ], type='PoseDataPreprocessor'), head=dict( decoder=dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False), final_layer_kernel_size=7, gau_cfg=dict( act_fn='SiLU', drop_path=0.0, dropout_rate=0.0, expansion_factor=2, hidden_dims=128, pos_enc=False, s=128, use_rel_bias=False), in_channels=1152, in_featuremap_size=( 4, 4, ), input_size=( 128, 128, ), loss=dict( beta=10.0, label_softmax=True, type='KLDiscretLoss', use_target_weight=True), loss_cls=dict( avg_non_ignore=True, loss_weight=1.0, type='CrossEntropyLoss'), out_channels=21, simcc_split_ratio=2.0, type='RTMCCHead'), test_cfg=dict(flip_test=True), type='TopdownPoseEstimator') optim_wrapper = dict( optimizer=dict(lr=0.005, type='AdamW', weight_decay=0.05), paramwise_cfg=dict( bias_decay_mult=0, bypass_duplicate=True, norm_decay_mult=0), type='OptimWrapper') param_scheduler = [ dict( begin=0, by_epoch=False, end=1000, start_factor=1e-05, type='LinearLR'), dict( T_max=40, begin=40, by_epoch=True, convert_to_iter_based=True, end=80, eta_min=0.00025, type='CosineAnnealingLR'), ] randomness = dict(seed=21) resume = False stage2_num_epochs = 10 test_cfg = dict() test_dataloader = dict( batch_size=32, dataset=dict( ann_file='te_v2.txt.json', data_mode='topdown', data_prefix=dict(img='/media/dataset/gesture/our_dataset/ourdataset'), data_root='/home/ww/work/hand_landmarks/mmpose-main/data/ourdataset', metainfo=dict(from_file='configs/base/datasets/onehand10k.py'), pipeline=[ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='PackPoseInputs'), ], test_mode=True, type='OneHand10KDataset'), drop_last=False, num_workers=5, persistent_workers=True, sampler=dict(round_up=False, shuffle=False, type='DefaultSampler')) test_evaluator = [ dict(thr=0.2, type='PCKAccuracy'), dict(type='AUC'), dict(type='EPE'), ] train_cfg = dict(by_epoch=True, max_epochs=80, val_interval=20) train_dataloader = dict( batch_size=32, dataset=dict( ann_file='tr_v2.txt.json', data_mode='topdown', data_prefix=dict(img='/media/dataset/gesture/our_dataset/ourdataset'), data_root='/home/ww/work/hand_landmarks/mmpose-main/data/ourdataset', metainfo=dict(from_file='configs/base/datasets/onehand10k.py'), pipeline=[ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict( rotate_factor=180, scale_factor=[ 0.5, 1.5, ], type='RandomBBoxTransform'), dict(direction='horizontal', type='RandomFlip'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='mmdet.YOLOXHSVRandomAug'), dict( transforms=[ dict(p=0.1, type='Blur'), dict(p=0.1, type='MedianBlur'), dict( max_height=0.4, max_holes=1, max_width=0.4, min_height=0.2, min_holes=1, min_width=0.2, p=1.0, type='CoarseDropout'), ], type='Albumentation'), dict( encoder=dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False), type='GenerateTarget'), dict(type='PackPoseInputs'), ], test_mode=False, type='OneHand10KDataset'), num_workers=5, persistent_workers=True, sampler=dict(shuffle=True, type='DefaultSampler')) train_pipeline = [ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict( rotate_factor=180, scale_factor=[ 0.5, 1.5, ], type='RandomBBoxTransform'), dict(direction='horizontal', type='RandomFlip'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='mmdet.YOLOXHSVRandomAug'), dict( transforms=[ dict(p=0.1, type='Blur'), dict(p=0.1, type='MedianBlur'), dict( max_height=0.4, max_holes=1, max_width=0.4, min_height=0.2, min_holes=1, min_width=0.2, p=1.0, type='CoarseDropout'), ], type='Albumentation'), dict( encoder=dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False), type='GenerateTarget'), dict(type='PackPoseInputs'), ] train_pipeline_stage2 = [ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict( rotate_factor=180, scale_factor=[ 0.75, 1.25, ], shift_factor=0.0, type='RandomBBoxTransform'), dict(direction='horizontal', type='RandomFlip'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='mmdet.YOLOXHSVRandomAug'), dict( transforms=[ dict(p=0.2, type='Blur'), dict(p=0.2, type='MedianBlur'), dict( max_height=0.4, max_holes=1, max_width=0.4, min_height=0.2, min_holes=1, min_width=0.2, p=0.5, type='CoarseDropout'), ], type='Albumentation'), dict( encoder=dict( input_size=( 128, 128, ), normalize=False, sigma=( 5.66, 5.66, ), simcc_split_ratio=2.0, type='SimCCLabel', use_dark=False), type='GenerateTarget'), dict(type='PackPoseInputs'), ] val_cfg = dict() val_dataloader = dict( batch_size=32, dataset=dict( ann_file='te_v2.txt.json', data_mode='topdown', data_prefix=dict(img='/media/dataset/gesture/our_dataset/ourdataset'), data_root='/home/ww/work/hand_landmarks/mmpose-main/data/ourdataset', metainfo=dict(from_file='configs/base/datasets/onehand10k.py'), pipeline=[ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='PackPoseInputs'), ], test_mode=True, type='OneHand10KDataset'), drop_last=False, num_workers=5, persistent_workers=True, sampler=dict(round_up=False, shuffle=False, type='DefaultSampler')) val_evaluator = [ dict(thr=0.2, type='PCKAccuracy'), dict(type='AUC'), dict(type='EPE'), ] val_pipeline = [ dict(backend_args=dict(backend='local'), type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(input_size=( 128, 128, ), type='TopdownAffine'), dict(type='PackPoseInputs'), ] vis_backends = [ dict(type='LocalVisBackend'), ] visualizer = dict( name='visualizer', type='PoseLocalVisualizer', vis_backends=[ dict(type='LocalVisBackend'), ]) work_dir = './work_dirs/rtmpose-m_8xb256-210e_hand5-128x128_finetune'

11/02 10:59:08 - mmengine - INFO - Distributed training is not used, all SyncBatchNorm (SyncBN) layers in the model will be automatically reverted to BatchNormXd layers if they are used. 11/02 10:59:08 - mmengine - INFO - Hooks will be executed in the following order: before_run: (VERY_HIGH ) RuntimeInfoHook
(49 ) EMAHook
(BELOW_NORMAL) LoggerHook


after_load_checkpoint: (49 ) EMAHook


before_train: (VERY_HIGH ) RuntimeInfoHook
(49 ) EMAHook
(NORMAL ) IterTimerHook
(VERY_LOW ) CheckpointHook


before_train_epoch: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook
(NORMAL ) DistSamplerSeedHook
(NORMAL ) PipelineSwitchHook


before_train_iter: (VERY_HIGH ) RuntimeInfoHook
(NORMAL ) IterTimerHook


after_train_iter: (VERY_HIGH ) RuntimeInfoHook
(49 ) EMAHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_train_epoch: (NORMAL ) IterTimerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


before_val: (VERY_HIGH ) RuntimeInfoHook


before_val_epoch: (49 ) EMAHook
(NORMAL ) IterTimerHook


before_val_iter: (NORMAL ) IterTimerHook


after_val_iter: (NORMAL ) IterTimerHook
(NORMAL ) PoseVisualizationHook
(BELOW_NORMAL) LoggerHook


after_val_epoch: (VERY_HIGH ) RuntimeInfoHook
(49 ) EMAHook
(NORMAL ) IterTimerHook
(BELOW_NORMAL) LoggerHook
(LOW ) ParamSchedulerHook
(VERY_LOW ) CheckpointHook


after_val: (VERY_HIGH ) RuntimeInfoHook


before_save_checkpoint: (49 ) EMAHook


after_train: (VERY_HIGH ) RuntimeInfoHook
(VERY_LOW ) CheckpointHook


before_test: (VERY_HIGH ) RuntimeInfoHook


before_test_epoch: (49 ) EMAHook
(NORMAL ) IterTimerHook


before_test_iter: (NORMAL ) IterTimerHook


after_test_iter: (NORMAL ) IterTimerHook
(NORMAL ) PoseVisualizationHook
(NORMAL ) BadCaseAnalysisHook
(BELOW_NORMAL) LoggerHook


after_test_epoch: (VERY_HIGH ) RuntimeInfoHook
(49 ) EMAHook
(NORMAL ) IterTimerHook
(NORMAL ) BadCaseAnalysisHook
(BELOW_NORMAL) LoggerHook


after_test: (VERY_HIGH ) RuntimeInfoHook


after_run: (BELOW_NORMAL) LoggerHook


/home/ww/work/hand_landmarks/mmpose-main/mmpose/datasets/transforms/common_transforms.py:656: UserWarning: Blur is not pixel-level transformations. Please use with caution. warnings.warn( /home/ww/work/hand_landmarks/mmpose-main/mmpose/datasets/transforms/common_transforms.py:656: UserWarning: MedianBlur is not pixel-level transformations. Please use with caution. warnings.warn( /home/ww/work/hand_landmarks/mmpose-main/mmpose/datasets/transforms/common_transforms.py:656: UserWarning: CoarseDropout is not pixel-level transformations. Please use with caution. warnings.warn( loading annotations into memory... Done (t=0.39s) creating index... index created! 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.patch_embed.norm.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.patch_embed.norm.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.0.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.0.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.1.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.1.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.2.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.0.blocks.2.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.1.norm.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.1.norm.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.0.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.0.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.1.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.1.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.2.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.2.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.3.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.2.blocks.3.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.3.norm.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.3.norm.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.0.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.0.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.1.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.1.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.2.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.2.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.3.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.3.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.4.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.4.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.5.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.5.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.6.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.6.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.7.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.7.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.8.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.8.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.9.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.9.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.10.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.10.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.11.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.11.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.12.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.12.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.13.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.13.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.14.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.14.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.15.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.15.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.16.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.16.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.17.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.4.blocks.17.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.5.norm.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.5.norm.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.0.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.0.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.1.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.1.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.2.mlp.1.weight:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- backbone.stages.6.blocks.2.mlp.1.bias:weight_decay=0.0 11/02 10:59:12 - mmengine - INFO - paramwise_options -- head.final_layer.bias:weight_decay=0.0 loading annotations into memory... Done (t=0.02s) creating index... index created! 11/02 10:59:13 - mmengine - WARNING - The prefix is not set in metric class PCKAccuracy. 11/02 10:59:13 - mmengine - WARNING - The prefix is not set in metric class AUC. 11/02 10:59:13 - mmengine - WARNING - The prefix is not set in metric class EPE. 11/02 10:59:13 - mmengine - WARNING - "FileClient" will be deprecated in future. Please use io functions in https://mmengine.readthedocs.io/en/latest/api/fileio.html#file-io 11/02 10:59:13 - mmengine - WARNING - "HardDiskBackend" is the alias of "LocalBackend" and the former will be deprecated in future. 11/02 10:59:13 - mmengine - INFO - Checkpoints will be saved to /home/ww/work/hand_landmarks/mmpose-main/work_dirs/rtmpose-m_8xb256-210e_hand5-128x128_finetune. /home/ww/work/hand_landmarks/mmpose-main/mmpose/models/heads/coord_cls_heads/rtmcc_head.py:346: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.clone().detach() or sourceTensor.clone().detach().requiresgrad(True), rather than torch.tensor(sourceTensor). pre_cls1 = torch.tensor(pre_cls,device=gt_x.device) Traceback (most recent call last): File "tools/train.py", line 162, in main() File "tools/train.py", line 158, in main runner.train() File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1777, in train model = self.train_loop.run() # type: ignore File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 96, in run self.run_epoch() File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 112, in run_epoch self.run_iter(idx, data_batch) File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 128, in run_iter outputs = self.runner.model.train_step( File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 116, in train_step optim_wrapper.update_params(parsed_losses) File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/optim/optimizer/optimizer_wrapper.py", line 196, in update_params self.backward(loss) File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/optim/optimizer/optimizer_wrapper.py", line 220, in backward loss.backward(**kwargs) File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/torch/_tensor.py", line 396, in backward torch.autograd.backward(self, gradient, retain_graph, create_graph, inputs=inputs) File "/home/ww/anaconda3/envs/openmmlab/lib/python3.8/site-packages/torch/autograd/init.py", line 173, in backward Variable._execution_engine.run_backward( # Calls into the C++ engine to run the backward pass RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn `

Ben-Louis commented 11 months ago

Sorry, I don't know your detailed implementation and I am unsure which code corresponds to which log you have attached.

jackweiwang commented 11 months ago

Sorry, I don't know your detailed implementation and I am unsure which code corresponds to which log you have attached.

Sorry, this is my rtmcc_head.py, loss_cls is nn.CrossEntropyLoss() `

Copyright (c) OpenMMLab. All rights reserved.

import warnings from typing import Optional, Sequence, Tuple, Union

import torch from mmengine.dist import get_dist_info from mmengine.structures import PixelData from torch import Tensor, nn import torch.nn.functional as F

from mmpose.codecs.utils import get_simcc_normalized from mmpose.evaluation.functional import simcc_pck_accuracy from mmpose.models.utils.rtmcc_block import RTMCCBlock, ScaleNorm from mmpose.models.utils.tta import flip_vectors from mmpose.registry import KEYPOINT_CODECS, MODELS from mmpose.utils.tensor_utils import to_numpy from mmpose.utils.typing import (ConfigType, InstanceList, OptConfigType, OptSampleList) from ..base_head import BaseHead

OptIntSeq = Optional[Sequence[int]]

class focal_loss(nn.Module): def init(self, alpha=0.25, gamma=2, num_classes = 3, size_average=True): super(focal_loss,self).init() self.size_average = size_average if isinstance(alpha,list): assert len(alpha)==num_classes
self.alpha = torch.Tensor(alpha) else: assert alpha<1
self.alpha = torch.zeros(num_classes) self.alpha[0] += alpha self.alpha[1:] += (1-alpha)

    self.gamma = gamma

def forward(self, preds, labels):
    # assert preds.dim()==2 and labels.dim()==1
    preds = preds.view(-1,preds.size(-1))
    self.alpha = self.alpha.to(preds.device)
    preds_softmax = F.softmax(preds, dim=1) 
    preds_logsoft = torch.log(preds_softmax)

    #focal_loss func, Loss = -α(1-yi)**γ *ce_loss(xi,yi)
    preds_softmax = preds_softmax.gather(1,labels.view(-1,1)) 
    preds_logsoft = preds_logsoft.gather(1,labels.view(-1,1))
    self.alpha = self.alpha.gather(0,labels.view(-1))
    # torch.pow((1-preds_softmax), self.gamma) 为focal loss中 (1-pt)**γ
    loss = -torch.mul(torch.pow((1-preds_softmax), self.gamma), preds_logsoft) 

    loss = torch.mul(self.alpha, loss.t())
    if self.size_average:
        loss = loss.mean()
    else:
        loss = loss.sum()
    return loss

@MODELS.register_module() class RTMCCHead(BaseHead): """Top-down head introduced in RTMPose (2023). The head is composed of a large-kernel convolutional layer, a fully-connected layer and a Gated Attention Unit to generate 1d representation from low-resolution feature maps.

Args:
    in_channels (int | sequence[int]): Number of channels in the input
        feature map.
    out_channels (int): Number of channels in the output heatmap.
    input_size (tuple): Size of input image in shape [w, h].
    in_featuremap_size (int | sequence[int]): Size of input feature map.
    simcc_split_ratio (float): Split ratio of pixels.
        Default: 2.0.
    final_layer_kernel_size (int): Kernel size of the convolutional layer.
        Default: 1.
    gau_cfg (Config): Config dict for the Gated Attention Unit.
        Default: dict(
            hidden_dims=256,
            s=128,
            expansion_factor=2,
            dropout_rate=0.,
            drop_path=0.,
            act_fn='ReLU',
            use_rel_bias=False,
            pos_enc=False).
    loss (Config): Config of the keypoint loss. Defaults to use
        :class:`KLDiscretLoss`
    decoder (Config, optional): The decoder config that controls decoding
        keypoint coordinates from the network output. Defaults to ``None``
    init_cfg (Config, optional): Config to control the initialization. See
        :attr:`default_init_cfg` for default settings
"""

def __init__(
    self,
    in_channels: Union[int, Sequence[int]],
    out_channels: int,
    input_size: Tuple[int, int],
    in_featuremap_size: Tuple[int, int],
    simcc_split_ratio: float = 2.0,
    final_layer_kernel_size: int = 1,
    gau_cfg: ConfigType = dict(
        hidden_dims=256,
        s=128,
        expansion_factor=2,
        dropout_rate=0.,
        drop_path=0.,
        act_fn='ReLU',
        use_rel_bias=False,
        pos_enc=False),
    loss: ConfigType = dict(type='KLDiscretLoss', use_target_weight=True),
    loss_cls: ConfigType = dict(type='CrossEntropyLoss'),
    decoder: OptConfigType = None,
    init_cfg: OptConfigType = None,
):

    if init_cfg is None:
        init_cfg = self.default_init_cfg

    super().__init__(init_cfg)

    self.in_channels = in_channels
    self.out_channels = out_channels
    self.input_size = input_size
    self.in_featuremap_size = in_featuremap_size
    self.simcc_split_ratio = simcc_split_ratio

    self.loss_module = MODELS.build(loss)
    self.loss_module_two = MODELS.build(loss_cls) #nn.CrossEntropyLoss()
    self.loss_module_three = focal_loss(alpha=[0.05,0.2,0.2,0.2,0.2,0.15], gamma=2, num_classes=6)
    if decoder is not None:
        self.decoder = KEYPOINT_CODECS.build(decoder)
    else:
        self.decoder = None

    if isinstance(in_channels, (tuple, list)):
        raise ValueError(
            f'{self.__class__.__name__} does not support selecting '
            'multiple input features.')

    # Define SimCC layers
    flatten_dims = self.in_featuremap_size[0] * self.in_featuremap_size[1]
    # print('flatten_dims:',flatten_dims)
    # print('gau_cfg:',gau_cfg['hidden_dims']),
    self.final_layer = nn.Conv2d(
        in_channels,
        out_channels,
        kernel_size=final_layer_kernel_size,
        stride=1,
        padding=final_layer_kernel_size // 2)

    self.avgpool_pre_head = nn.AdaptiveAvgPool2d(1)
    #self.conv3 = nn.Conv2d(256, 64, kernel_size=1, stride=1, padding=0, bias=False)
    self.act = nn.ReLU()
    self.fc = nn.Linear(1152, 6, bias=False)
    self.flatten = nn.Sequential(nn.Flatten())

    self.mlp = nn.Sequential(
        ScaleNorm(flatten_dims),

        nn.Linear(flatten_dims, gau_cfg['hidden_dims'], bias=False))

    W = int(self.input_size[0] * self.simcc_split_ratio)
    H = int(self.input_size[1] * self.simcc_split_ratio)

    self.gau = RTMCCBlock(
        self.out_channels,
        gau_cfg['hidden_dims'],
        gau_cfg['hidden_dims'],
        s=gau_cfg['s'],
        expansion_factor=gau_cfg['expansion_factor'],
        dropout_rate=gau_cfg['dropout_rate'],
        drop_path=gau_cfg['drop_path'],
        attn_type='self-attn',
        act_fn=gau_cfg['act_fn'],
        use_rel_bias=gau_cfg['use_rel_bias'],
        pos_enc=gau_cfg['pos_enc'])

    self.cls_x = nn.Linear(gau_cfg['hidden_dims'], W, bias=False)
    self.cls_y = nn.Linear(gau_cfg['hidden_dims'], H, bias=False)

def forward(self, feats: Tuple[Tensor]) -> Tuple[Tensor, Tensor]:
    """Forward the network.

    The input is the featuremap extracted by backbone and the
    output is the simcc representation.

    Args:
        feats (Tuple[Tensor]): Multi scale feature maps.

    Returns:
        pred_x (Tensor): 1d representation of x.
        pred_y (Tensor): 1d representation of y.
    """

    x = self.avgpool_pre_head(feats)
    x = self.act(x)
    x = self.flatten(x)

    cls_id = self.fc(x)

    feats = self.final_layer(feats)  # -> B, K, H, W

    feats = torch.flatten(feats, 2)

    feats = self.mlp(feats)  # -> B, K, hidden

    feats = self.gau(feats)

    pred_x = self.cls_x(feats)
    pred_y = self.cls_y(feats)

    return pred_x, pred_y, cls_id

def predict(
    self,
    feats: Tuple[Tensor],
    batch_data_samples: OptSampleList,
    test_cfg: OptConfigType = {},
) -> InstanceList:
    """Predict results from features.

    Args:
        feats (Tuple[Tensor] | List[Tuple[Tensor]]): The multi-stage
            features (or multiple multi-stage features in TTA)
        batch_data_samples (List[:obj:`PoseDataSample`]): The batch
            data samples
        test_cfg (dict): The runtime config for testing process. Defaults
            to {}

    Returns:
        List[InstanceData]: The pose predictions, each contains
        the following fields:
            - keypoints (np.ndarray): predicted keypoint coordinates in
                shape (num_instances, K, D) where K is the keypoint number
                and D is the keypoint dimension
            - keypoint_scores (np.ndarray): predicted keypoint scores in
                shape (num_instances, K)
            - keypoint_x_labels (np.ndarray, optional): The predicted 1-D
                intensity distribution in the x direction
            - keypoint_y_labels (np.ndarray, optional): The predicted 1-D
                intensity distribution in the y direction
    """
    print("test-----------------")
    if test_cfg.get('flip_test', False):
        # TTA: flip test -> feats = [orig, flipped]
        assert isinstance(feats, list) and len(feats) == 2
        flip_indices = batch_data_samples[0].metainfo['flip_indices']

        _feats, _feats_flip = feats

        _batch_pred_x, _batch_pred_y,cls_pred = self.forward(_feats)
        print(_batch_pred_x.shape)
        _batch_pred_x_flip, _batch_pred_y_flip,_ = self.forward(_feats_flip)
        _batch_pred_x_flip, _batch_pred_y_flip = flip_vectors(
            _batch_pred_x_flip,
            _batch_pred_y_flip,
            flip_indices=flip_indices)

        batch_pred_x = (_batch_pred_x + _batch_pred_x_flip) * 0.5
        batch_pred_y = (_batch_pred_y + _batch_pred_y_flip) * 0.5
    else:
        batch_pred_x, batch_pred_y,cls_pred= self.forward(feats)

    preds = self.decode((batch_pred_x, batch_pred_y))
    #print(batch_pred_x.shape)

    if test_cfg.get('output_heatmaps', False):
        rank, _ = get_dist_info()
        if rank == 0:
            warnings.warn('The predicted simcc values are normalized for '
                          'visualization. This may cause discrepancy '
                          'between the keypoint scores and the 1D heatmaps'
                          '.')

        # normalize the predicted 1d distribution
        batch_pred_x = get_simcc_normalized(batch_pred_x)
        batch_pred_y = get_simcc_normalized(batch_pred_y)

        B, K, _ = batch_pred_x.shape
        # B, K, Wx -> B, K, Wx, 1
        x = batch_pred_x.reshape(B, K, 1, -1)
        # B, K, Wy -> B, K, 1, Wy
        y = batch_pred_y.reshape(B, K, -1, 1)
        # B, K, Wx, Wy
        batch_heatmaps = torch.matmul(y, x)
        pred_fields = [
            PixelData(heatmaps=hm) for hm in batch_heatmaps.detach()
        ]

        for pred_instances, pred_x, pred_y in zip(preds,
                                                  to_numpy(batch_pred_x),
                                                  to_numpy(batch_pred_y)):

            pred_instances.keypoint_x_labels = pred_x[None]
            pred_instances.keypoint_y_labels = pred_y[None]

        return preds, pred_fields
    else:
        return preds

def loss(
    self,
    feats: Tuple[Tensor],
    batch_data_samples: OptSampleList,
    train_cfg: OptConfigType = {},
) -> dict:
    """Calculate losses from a batch of inputs and data samples."""

    pred_x, pred_y, pre_cls = self.forward(feats)

    gt_x = torch.cat([
        d.gt_instance_labels.keypoint_x_labels for d in batch_data_samples
    ],
                     dim=0)
    te = [
        d.gt_instance_labels.keypoint_x_labels for d in batch_data_samples
    ]

    gt_y = torch.cat([
        d.gt_instance_labels.keypoint_y_labels for d in batch_data_samples
    ],
                     dim=0)

    #cls_label = torch.cat(,dim=0)
    cls_label = torch.tensor([
        d.gt_instances.category_id[0] for d in batch_data_samples
    ],device=gt_x.device)

    pre_cls1 = torch.tensor(pre_cls,device=gt_x.device)

    keypoint_weights = torch.cat(
        [
            d.gt_instance_labels.keypoint_weights
            for d in batch_data_samples
        ],
        dim=0,
    )

    pred_simcc = (pred_x, pred_y)

    gt_simcc = (gt_x, gt_y)

    # calculate losses
    losses = dict()

    # loss = self.loss_module(pred_simcc, gt_simcc, keypoint_weights)

    # losses.update(loss_kpt=loss)

    loss1 = self.loss_module_two(pre_cls1,cls_label)
    losses.update(loss_cls=loss1)

    # calculate accuracy
    _, avg_acc, _ = simcc_pck_accuracy(
        output=to_numpy(pred_simcc),
        target=to_numpy(gt_simcc),
        simcc_split_ratio=self.simcc_split_ratio,
        mask=to_numpy(keypoint_weights) > 0,
    )

    acc_pose = torch.tensor(avg_acc, device=gt_x.device)
    losses.update(acc_pose=acc_pose)

    return losses

@property
def default_init_cfg(self):
    init_cfg = [
        dict(type='Normal', layer=['Conv2d'], std=0.001),
        dict(type='Constant', layer='BatchNorm2d', val=1),
        dict(type='Normal', layer=['Linear'], std=0.01, bias=0),
    ]
    return init_cfg

`

Ben-Louis commented 11 months ago

I think there is no need to create a new tensor pre_cls1 in the following snippet. Maybe you can try to use pre_cls in loss_module_two directly

    pre_cls1 = torch.tensor(pre_cls,device=gt_x.device)
    loss1 = self.loss_module_two(pre_cls1,cls_label)
    losses.update(loss_cls=loss1)
jackweiwang commented 11 months ago

I think there is no need to create a new tensor pre_cls1 in the following snippet. Maybe you can try to use pre_cls in loss_module_two directly

    pre_cls1 = torch.tensor(pre_cls,device=gt_x.device)
    loss1 = self.loss_module_two(pre_cls1,cls_label)
    losses.update(loss_cls=loss1)

I think I made a foolish mistake, and I even made revisions all day :) ,thanks