Open Winnie202 opened 11 months ago
I've got the same problem:(
File "D:\VS_project\mmpose-main\mmpose-main\mmpose\models\losses\heatmap_loss.py", line 63, in forward _mask = self._get_mask(target, target_weights, mask) File "D:\VS_project\mmpose-main\mmpose-main\mmpose\models\losses\heatmap_loss.py", line 93, in _get_mask assert (target_weights.ndim in (2, 4) and target_weights.shape AssertionError: target_weights and target have mismatched shapes torch.Size([128, 17]) v.s. torch.Size([32, 17, 64, 48])
the same problem:
same problem
same problem
Prerequisite
Environment
OrderedDict([('sys.platform', 'linux'), ('Python', '3.8.17 | packaged by conda-forge | (default, Jun 16 2023, 07:06:00) [GCC 11.4.0]'), ('CUDA available', True), ('numpy_random_seed', 2147483648), ('GPU 0,1', 'NVIDIA GeForce RTX 3090'), ('CUDA_HOME', '/usr/local/cuda-11.0'), ('NVCC', 'Cuda compilation tools, release 11.0, V11.0.221'), ('GCC', 'gcc (Ubuntu 9.4.0-1ubuntu1~20.04.2) 9.4.0'), ('PyTorch', '1.12.0'), ('PyTorch compiling details', 'PyTorch built with:\n - GCC 9.3\n - C++ Version: 201402\n - Intel(R) oneAPI Math Kernel Library Version 2021.4-Product Build 20210904 for Intel(R) 64 architecture applications\n - Intel(R) MKL-DNN v2.6.0 (Git Hash 52b5f107dd9cf10910aaa19cb47f3abf9b349815)\n - OpenMP 201511 (a.k.a. OpenMP 4.5)\n - LAPACK is enabled (usually provided by MKL)\n - NNPACK is enabled\n - CPU capability usage: AVX2\n - CUDA Runtime 11.6\n - NVCC architecture flags: -gencode;arch=compute_37,code=sm_37;-gencode;arch=compute_50,code=sm_50;-gencode;arch=compute_60,code=sm_60;-gencode;arch=compute_61,code=sm_61;-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_37,code=compute_37\n - CuDNN 8.3.2 (built against CUDA 11.5)\n - Magma 2.6.1\n - Build settings: BLAS_INFO=mkl, BUILD_TYPE=Release, CUDA_VERSION=11.6, CUDNN_VERSION=8.3.2, CXX_COMPILER=/opt/rh/devtoolset-9/root/usr/bin/c++, CXX_FLAGS= -Wno-deprecated -fvisibility-inlines-hidden -DUSE_PTHREADPOOL -fopenmp -DNDEBUG -DUSE_KINETO -DUSE_FBGEMM -DUSE_QNNPACK -DUSE_PYTORCH_QNNPACK -DUSE_XNNPACK -DSYMBOLICATE_MOBILE_DEBUG_HANDLE -DEDGE_PROFILER_USE_KINETO -O2 -fPIC -Wno-narrowing -Wall -Wextra -Werror=return-type -Wno-missing-field-initializers -Wno-type-limits -Wno-array-bounds -Wno-unknown-pragmas -Wno-unused-parameter -Wno-unused-function -Wno-unused-result -Wno-unused-local-typedefs -Wno-strict-overflow -Wno-strict-aliasing -Wno-error=deprecated-declarations -Wno-stringop-overflow -Wno-psabi -Wno-error=pedantic -Wno-error=redundant-decls -Wno-error=old-style-cast -fdiagnostics-color=always -faligned-new -Wno-unused-but-set-variable -Wno-maybe-uninitialized -fno-math-errno -fno-trapping-math -Werror=format -Werror=cast-function-type -Wno-stringop-overflow, LAPACK_INFO=mkl, PERF_WITH_AVX=1, PERF_WITH_AVX2=1, PERF_WITH_AVX512=1, TORCH_VERSION=1.12.0, USE_CUDA=ON, USE_CUDNN=ON, USE_EXCEPTION_PTR=1, USE_GFLAGS=OFF, USE_GLOG=OFF, USE_MKL=ON, USE_MKLDNN=OFF, USE_MPI=OFF, USE_NCCL=ON, USE_NNPACK=ON, USE_OPENMP=ON, USE_ROCM=OFF, \n'), ('TorchVision', '0.13.0'), ('OpenCV', '4.8.0'), ('MMEngine', '0.8.4'), ('MMPose', '1.1.0+')])
mmcv 2.0.1 mmdet 3.1.0 mmengine 0.8.4 mmpose 1.1.0 /work/xy/mmpose
Reproduces the problem - code sample
def loss(self, feats: MSMUFeatures, batch_data_samples: OptSampleList, train_cfg: OptConfigType = {}) -> dict: """Calculate losses from a batch of inputs and data samples.
Reproduces the problem - command or script
python tools/train.py
Reproduces the problem - error message
File "tools/train.py", line 161, in
main()
File "tools/train.py", line 157, in main
runner.train()
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/runner.py", line 1745, in train
model = self.train_loop.run() # type: ignore
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 96, in run
self.run_epoch()
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 112, in run_epoch
self.run_iter(idx, data_batch)
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/runner/loops.py", line 128, in run_iter
outputs = self.runner.model.train_step(
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 114, in train_step
losses = self._run_forward(data, mode='loss') # type: ignore
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/mmengine/model/base_model/base_model.py", line 340, in _run_forward
results = self(data, mode=mode)
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(*input, *kwargs)
File "/media/veily6/work/xy/mmpose/mmpose/models/pose_estimators/base.py", line 136, in forward
return self.loss(inputs, data_samples)
File "/media/veily6/work/xy/mmpose/mmpose/models/pose_estimators/topdown.py", line 74, in loss
self.head.loss(feats, data_samples, train_cfg=self.train_cfg))
File "/media/veily6/work/xy/mmpose/mmpose/models/heads/heatmap_heads/mspn_head.py", line 415, in loss
loss_i = loss_func(msmu_pred_heatmaps[i], gt_heatmaps,
File "/home/veily6/anaconda3/envs/openmmlab/lib/python3.8/site-packages/torch/nn/modules/module.py", line 1130, in _call_impl
return forward_call(input, kwargs)
File "/media/veily6/work/xy/mmpose/mmpose/models/losses/heatmap_loss.py", line 63, in forward
_mask = self._get_mask(target, target_weights, mask)
File "/media/veily6/work/xy/mmpose/mmpose/models/losses/heatmap_loss.py", line 93, in _get_mask
assert (target_weights.ndim in (2, 4) and target_weights.shape
AssertionError: target_weights and target have mismatched shapes torch.Size([160, 4]) v.s. torch.Size([32, 4, 64, 48])
Additional information
myconfig is: base = ['../../../base/default_runtime.py','../../../base/datasets/custom.py']
runtime
train_cfg = dict(max_epochs=210, val_interval=10)
optimizer
optim_wrapper = dict(optimizer=dict( type='Adam', lr=5e-3, ))
learning policy
param_scheduler = [ dict( type='LinearLR', begin=0, end=500, start_factor=0.001, by_epoch=False), # warm-up dict( type='MultiStepLR', begin=0, end=210, milestones=[170, 200], gamma=0.1, by_epoch=True) ]
automatically scaling LR based on the actual training batch size
auto_scale_lr = dict(base_batch_size=256)
hooks
default_hooks = dict(checkpoint=dict(save_best='PCK', rule='greater'))
codec settings
multiple kernel_sizes of heatmap gaussian for 'Megvii' approach.
kernel_sizes = [15, 11, 9, 7, 5] codec = [ dict( type='MegviiHeatmap', input_size=(192, 256), heatmap_size=(48, 64), kernel_size=kernel_size) for kernel_size in kernel_sizes ]
model settings
model = dict( type='TopdownPoseEstimator', data_preprocessor=dict( type='PoseDataPreprocessor', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], bgr_to_rgb=True), backbone=dict( type='MSPN', unit_channels=256, num_stages=4, num_units=4, num_blocks=[3, 4, 6, 3], norm_cfg=dict(type='BN'), init_cfg=dict( type='Pretrained', checkpoint='torchvision://resnet50', )), head=dict( type='MSPNHead', out_shape=(64, 48), unit_channels=256, out_channels=4, num_stages=4, num_units=4, norm_cfg=dict(type='BN'),
each sub list is for a stage
base dataset settings
dataset_type = 'CustomDataset' data_mode = 'topdown' data_root = '0921'
pipelines
train_pipeline = [ dict(type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(type='RandomFlip', direction='horizontal'), dict(type='RandomHalfBody'), dict(type='RandomBBoxTransform'), dict(type='TopdownAffine', input_size=codec[0]['input_size']), dict(type='GenerateTarget', multilevel=True, encoder=codec), dict(type='PackPoseInputs') ] val_pipeline = [ dict(type='LoadImage'), dict(type='GetBBoxCenterScale'), dict(type='TopdownAffine', input_size=codec[0]['input_size']), dict(type='PackPoseInputs') ]
data loaders
train_dataloader = dict( batch_size=32, num_workers=4, persistent_workers=True, sampler=dict(type='DefaultSampler', shuffle=True), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='train', pipeline=train_pipeline, metainfo=dict(from_file='configs/base/datasets/custom.py') )) val_dataloader = dict( batch_size=32, num_workers=4, persistent_workers=True, drop_last=False, sampler=dict(type='DefaultSampler', shuffle=False, round_up=False), dataset=dict( type=dataset_type, data_root=data_root, data_mode=data_mode, ann_file='val', pipeline=val_pipeline, metainfo=dict(from_file='configs/base/datasets/custom.py'), test_mode=True )) test_dataloader = val_dataloader
evaluators
val_evaluator = [ dict(type='PCKAccuracy', thr=0.5), dict(type='AUC'), dict(type='EPE'), ] test_evaluator = val_evaluator