python -m torch.distributed.launch --nproc_per_node=8 metro/tools/run_metro_bodymesh.py for training:
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 235, in run
pred_camera, pred_3d_joints, pred_vertices_sub2, pred_vertices_sub, pred_vertices = METRO_model(images, smpl, mesh_sampler, meta_masks=meta_masks, is_train=True)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "...../MeshTransformer/metro/modeling/bert/modeling_metro.py", line 280, in forward
features = featuresmeta_masks + constant_tensor(1-meta_masks)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/tensor.py", line 394, in rsub
return _C._VariableFunctions.rsub(self, other)
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 221, in run
gt_vertices_sub2 = mesh_sampler.downsample(gt_vertices, n1=0, n2=2)
File .....MeshTransformer/metro/modeling/_smpl.py", line 272, in downsample
y = spmm(self._D[j], y)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 172, in spmm
return SparseMM.apply(sparse, dense)
File "......MeshTransformer/metro/modeling/_smpl.py", line 161, in forward
return torch.matmul(sparse, dense)
RuntimeError: CUDA error: initialization error when calling cusparseCreate(handle)
Traceback (most recent call last):
File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 220, in run
gt_vertices = smpl(gt_pose, gt_betas)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(input, **kwargs)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 89, in forward
v_shaped = torch.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling cublasCreate(handle)
Also despite having 64 GB memory and reducing batch-size to 2 , still I get :
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
While running
python -m torch.distributed.launch --nproc_per_node=8 metro/tools/run_metro_bodymesh.py for training:
Traceback (most recent call last): File "metro/tools/run_metro_bodymesh.py", line 717, in
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 235, in run
pred_camera, pred_3d_joints, pred_vertices_sub2, pred_vertices_sub, pred_vertices = METRO_model(images, smpl, mesh_sampler, meta_masks=meta_masks, is_train=True)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward(*input, *kwargs)
File "...../MeshTransformer/metro/modeling/bert/modeling_metro.py", line 280, in forward
features = featuresmeta_masks + constant_tensor(1-meta_masks)
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 221, in run
gt_vertices_sub2 = mesh_sampler.downsample(gt_vertices, n1=0, n2=2)
File .....MeshTransformer/metro/modeling/_smpl.py", line 272, in downsample
y = spmm(self._D[j], y)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 172, in spmm
return SparseMM.apply(sparse, dense)
File "......MeshTransformer/metro/modeling/_smpl.py", line 161, in forward
return torch.matmul(sparse, dense)
RuntimeError: CUDA error: initialization error when calling
main(args)
File "metro/tools/run_metro_bodymesh.py", line 711, in main
run(args, train_dataloader, val_dataloader, _metro_network, smpl, mesh_sampler, renderer)
File "metro/tools/run_metro_bodymesh.py", line 220, in run
gt_vertices = smpl(gt_pose, gt_betas)
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/nn/modules/module.py", line 532, in call
result = self.forward( input, **kwargs)
File ".....MeshTransformer/metro/modeling/_smpl.py", line 89, in forward
v_shaped = torch.matmul(shapedirs, beta).view(-1, 6890, 3) + v_template
RuntimeError: CUDA error: CUBLAS_STATUS_NOT_INITIALIZED when calling
File "....../miniconda3/envs/metro/lib/python3.7/site-packages/torch/tensor.py", line 394, in rsub return _C._VariableFunctions.rsub(self, other) RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch) Traceback (most recent call last): File "metro/tools/run_metro_bodymesh.py", line 717, in
cusparseCreate(handle)
Traceback (most recent call last): File "metro/tools/run_metro_bodymesh.py", line 717, incublasCreate(handle)
python=3.7 pytorch==1.4.0 torchvision==0.5.0 cudatoolkit=10.1
Also despite having 64 GB memory and reducing batch-size to 2 , still I get :
RuntimeError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 15.78 GiB total capacity; 1.52 GiB already allocated; 9.00 MiB free; 1.55 GiB reserved in total by PyTorch)
torch.cuda.empty_cache() did not help.