############# Starting Epoch 0 | LR: 0.001 #############
0%| | 0/113 [00:00<?, ?it/s]the gpu is 0
the skeleton shape is torch.Size([16, 24, 3])
the global orient is None
the batch size is 16
0%| | 0/113 [02:26<?, ?it/s]
Traceback (most recent call last):
File "/home/ndip/HybrIK/scripts/train_smpl.py", line 398, in <module>
main()
File "/home/ndip/HybrIK/scripts/train_smpl.py", line 246, in main
mp.spawn(main_worker, nprocs=ngpus_per_node, args=(opt, cfg))
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 246, in spawn
return start_processes(fn, args, nprocs, join, daemon, start_method="spawn")
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 202, in start_processes
while not context.join():
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 163, in join
raise ProcessRaisedException(msg, error_index, failed_process.pid)
torch.multiprocessing.spawn.ProcessRaisedException:
-- Process 0 terminated with the following error:
Traceback (most recent call last):
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/multiprocessing/spawn.py", line 74, in _wrap
fn(i, *args)
File "/home/ndip/HybrIK/scripts/train_smpl.py", line 346, in main_worker
loss, acc17 = train(opt, train_loader, m, criterion, optimizer, writer)
File "/home/ndip/HybrIK/scripts/train_smpl.py", line 61, in train
output = m(inps, trans_inv, intrinsic_param, root, depth_factor, None)
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/parallel/distributed.py", line 1519, in forward
else self._run_ddp_forward(*inputs, **kwargs)
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/parallel/distributed.py", line 1355, in _run_ddp_forward
return self.module(*inputs, **kwargs) # type: ignore[index]
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
return self._call_impl(*args, **kwargs)
File "/home/ndip/miniconda3/envs/mmlab/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
return forward_call(*args, **kwargs)
File "/home/ndip/HybrIK/hybrik/models/simple3dposeBaseSMPL24.py", line 335, in forward
output = self.smpl.hybrik(
File "/home/ndip/HybrIK/hybrik/models/layers/smpl/SMPL.py", line 263, in hybrik
vertices, new_joints, rot_mats, joints_from_verts = hybrik(
File "/home/ndip/HybrIK/hybrik/models/layers/smpl/lbs.py", line 357, in hybrik
rot_mats, rotate_rest_pose = batch_inverse_kinematics_transform_naive(
File "/home/ndip/HybrIK/hybrik/models/layers/smpl/lbs.py", line 868, in batch_inverse_kinematics_transform_naive
rot_mat_chain[:, indices] = torch.matmul(
RuntimeError: The size of tensor a (2) must match the size of tensor b (5) at non-singleton dimension 1
@LoyalBlanc @biansy000 , I received the following error after getting my data loaded for training.. I followed the structure of the processed data file of h36m which you have given on my own dataset..
Could you tell me where can I trace this error or could be the potential solution..
I noticed that
if leaf_thetas is not None:
rot_mat = leaf_rot_mats[:, :, :, :]
print(rot_mat.shape)
print(rot_mat_chain[:, parents[indices]].shape)
upon printing,
torch.Size([32, 5, 3, 3])
torch.Size([32, 2, 3, 3])
NOTE: my data includes 24 joints and remaining 5 are 0s
I have 2 Gpus in my system but it is only using gpu 0 for some reason. could this be the reason?? or which possible tensors have different shapes?
Please do not use configs/256x192_adam_lr1e-3-res34_smpl_24_3d_base_2x_mix.yaml and use 29-kpts version of HybrIK instead. The 24-kpt version of HybrIk is stale.
@LoyalBlanc @biansy000 , I received the following error after getting my data loaded for training.. I followed the structure of the processed data file of h36m which you have given on my own dataset..
Could you tell me where can I trace this error or could be the potential solution..
I noticed that
upon printing,
torch.Size([32, 5, 3, 3]) torch.Size([32, 2, 3, 3])
NOTE: my data includes 24 joints and remaining 5 are 0s
I have 2 Gpus in my system but it is only using gpu 0 for some reason. could this be the reason?? or which possible tensors have different shapes?