I have a question - Githubissues

leelang7 commented 3 years ago

I tested in linux-server.. it was the same in co lab setting.(same to unsup3d) <-- pytorch 1.2

sh scripts/run_car.sh No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' No CUDA runtime is found, using CUDA_HOME='/usr/local/cuda' StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Load config from yml file: configs/car.yml Loading configs from configs/car.yml Traceback (most recent call last): File "run.py", line 26, in init_dist(args.launcher, backend='nccl') File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 20, in init_dist _init_dist_pytorch(backend, kwargs) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 33, in _init_dist_pytorch torch.cuda.set_device(rank % num_gpus) ZeroDivisionError: integer division or modulo by zero Load config from yml file: configs/car.yml Loading configs from configs/car.yml Traceback (most recent call last): File "run.py", line 26, in init_dist(args.launcher, backend='nccl') File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 20, in init_dist _init_dist_pytorch(backend, kwargs) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 33, in _init_dist_pytorch torch.cuda.set_device(rank % num_gpus) ZeroDivisionError: integer division or modulo by zero Load config from yml file: configs/car.yml Loading configs from configs/car.yml Traceback (most recent call last): File "run.py", line 26, in init_dist(args.launcher, backend='nccl') File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 20, in init_dist _init_dist_pytorch(backend, kwargs) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 33, in _init_dist_pytorch torch.cuda.set_device(rank % num_gpus) ZeroDivisionError: integer division or modulo by zero Load config from yml file: configs/car.yml Loading configs from configs/car.yml Traceback (most recent call last): File "run.py", line 26, in init_dist(args.launcher, backend='nccl') File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 20, in init_dist _init_dist_pytorch(backend, kwargs) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 33, in _init_dist_pytorch torch.cuda.set_device(rank % num_gpus) ZeroDivisionError: integer division or modulo by zero Traceback (most recent call last): File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/torch/distributed/launch.py", line 246, in main() File "/home/adminuser/anaconda3/envs/3D/lib/python3.6/site-packages/torch/distributed/launch.py", line 242, in main cmd=cmd) subprocess.CalledProcessError: Command '['/home/adminuser/anaconda3/envs/3D/bin/python', '-u', 'run.py', '--local_rank=3', '--launcher', 'pytorch', '--config', 'configs/car.yml']' returned non-zero exit status 1.

Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.

sh scripts/run_car.sh <-- pytorch 1.1 StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "/home/adminuser/anaconda3/envs/test/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/adminuser/anaconda3/envs/test/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/torch/distributed/launch.py", line 235, in main() File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/torch/distributed/launch.py", line 231, in main cmd=process.args) subprocess.CalledProcessError: Command '['/home/adminuser/anaconda3/envs/test/bin/python', '-u', 'run.py', '--local_rank=0', '--launcher', 'pytorch', '--config', 'configs/car.yml']' returned non-zero exit status 1. (test) adminuser@DGX-Station:~/GAN2Shape$ python run.py --config /configs/car.yml StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory (test) adminuser@DGX-Station:~/GAN2Shape$ (test) adminuser@DGX-Station:~/GAN2Shape$ ^C (test) adminuser@DGX-Station:~/GAN2Shape$ (test) adminuser@DGX-Station:~/GAN2Shape$ sh scripts/run_car.sh StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "run.py", line 7, in from gan2shape import setup_runtime, Trainer, GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/init.py", line 3, in from .model import GAN2Shape File "/home/adminuser/GAN2Shape/gan2shape/model.py", line 17, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/init.py", line 1, in from .renderer import Renderer File "/home/adminuser/GAN2Shape/gan2shape/renderer/renderer.py", line 6, in import neural_renderer as nr File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/init.py", line 3, in from .load_obj import load_obj File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/neural_renderer/load_obj.py", line 8, in import neural_renderer.cuda.load_textures as load_textures_cuda ImportError: libcudart.so.9.2: cannot open shared object file: No such file or directory Traceback (most recent call last): File "/home/adminuser/anaconda3/envs/test/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/home/adminuser/anaconda3/envs/test/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/torch/distributed/launch.py", line 235, in main() File "/home/adminuser/anaconda3/envs/test/lib/python3.6/site-packages/torch/distributed/launch.py", line 231, in main cmd=process.args) subprocess.CalledProcessError: Command '['/home/adminuser/anaconda3/envs/test/bin/python', '-u', 'run.py', '--local_rank=0', '--launcher', 'pytorch', '--config', 'configs/car.yml']' returned non-zero exit status 1.

additionally, pip install neural_renderer_pytorch StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. {'config': '/configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': False} Setting up Perceptual loss... Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/checkpoints/vgg16-397923af.pth 100% 528M/528M [00:03<00:00, 152MB/s] Loading model from: /content/drive/My Drive/GAN2Shape/gan2shape/stylegan2/stylegan2-pytorch/lpips/weights/v0.1/vgg.pth ...[net-lin [vgg]] initialized ...Done Traceback (most recent call last): File "run.py", line 31, in trainer = Trainer(cfgs, GAN2Shape) File "/content/drive/My Drive/GAN2Shape/gan2shape/trainer.py", line 23, in init self.model = model(cfgs) File "/content/drive/My Drive/GAN2Shape/gan2shape/model.py", line 98, in init self.init_VL_sampler() File "/content/drive/My Drive/GAN2Shape/gan2shape/model.py", line 346, in init_VL_sampler light_mvn = torch.load(light_mvn_path) File "/usr/local/lib/python3.6/site-packages/torch/serialization.py", line 381, in load f = open(f, 'rb') FileNotFoundError: [Errno 2] No such file or directory: 'checkpoints/view_light/view_light.pth'

XingangPan commented 3 years ago

@leelang7 Do you have 4 GPUs in your server? The script would require 4 GPUs by default. If not, you may change the script to:

python run.py \
    --config configs/${CONFIG}.yml \
    2>&1 | tee results/${EXP}/log.txt

Besides, make sure to run sh script/download.sh before running other scripts.

leelang7 commented 3 years ago

i tried but i got the same result.

StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Load config from yml file: configs/car.yml Loading configs from configs/car.yml {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': False} Setting up Perceptual loss... Loading model from: /content/drive/My Drive/GAN2Shape/gan2shape/stylegan2/stylegan2-pytorch/lpips/weights/v0.1/vgg.pth ...[net-lin [vgg]] initialized ...Done Loading images... Initializing the depth net to output ellipsoid ... Iter: 0, Loss: 0.001553 Iter: 100, Loss: 0.000014 Iter: 200, Loss: 0.000011 Iter: 300, Loss: 0.000026 Iter: 400, Loss: 0.000009 Iter: 500, Loss: 0.000010 Iter: 600, Loss: 0.000008 Iter: 700, Loss: 0.000008 Iter: 800, Loss: 0.000008 Iter: 900, Loss: 0.000008 E0000/T00000/step1/ 0.0Hz loss: 1.05303 E0000/T00001/step1/ 0.1Hz loss: 1.04649 E0000/T00002/step1/ 0.7Hz loss: 1.03471 E0000/T00003/step1/ 1.2Hz loss: 1.01915 E0000/T00004/step1/ 1.7Hz loss: 1.00167 E0000/T00005/step1/ 2.1Hz loss: 0.98326 E0000/T00006/step1/ 2.5Hz loss: 0.96459 E0000/T00007/step1/ 2.8Hz loss: 0.94606 E0000/T00008/step1/ 3.1Hz loss: 0.92788 E0000/T00009/step1/ 3.4Hz loss: 0.91007 E0000/T00010/step1/ 3.7Hz loss: 0.89263 E0000/T00011/step1/ 3.9Hz loss: 0.87544 E0000/T00012/step1/ 4.1Hz loss: 0.85845 E0000/T00013/step1/ 4.3Hz loss: 0.84165 E0000/T00014/step1/ 4.4Hz loss: 0.82490 E0000/T00015/step1/ 4.6Hz loss: 0.80816 E0000/T00016/step1/ 4.7Hz loss: 0.79124 E0000/T00017/step1/ 4.8Hz loss: 0.77385 E0000/T00018/step1/ 4.9Hz loss: 0.75603 E0000/T00019/step1/ 5.0Hz loss: 0.73793 E0000/T00020/step1/ 5.1Hz loss: 0.71979 E0000/T00021/step1/ 5.2Hz loss: 0.70169 E0000/T00022/step1/ 5.3Hz loss: 0.68356 E0000/T00023/step1/ 5.3Hz loss: 0.66546 E0000/T00024/step1/ 5.4Hz loss: 0.64739 E0000/T00025/step1/ 5.5Hz loss: 0.62937 E0000/T00026/step1/ 5.5Hz loss: 0.61140 E0000/T00027/step1/ 5.5Hz loss: 0.59349 E0000/T00028/step1/ 5.6Hz loss: 0.57570 E0000/T00029/step1/ 5.6Hz loss: 0.55801 E0000/T00030/step1/ 5.7Hz loss: 0.54044 E0000/T00031/step1/ 5.7Hz loss: 0.52303 E0000/T00032/step1/ 5.7Hz loss: 0.50588 E0000/T00033/step1/ 5.7Hz loss: 0.48904 E0000/T00034/step1/ 5.7Hz loss: 0.47253 E0000/T00035/step1/ 5.7Hz loss: 0.45641 E0000/T00036/step1/ 5.7Hz loss: 0.44073 E0000/T00037/step1/ 5.8Hz loss: 0.42551 E0000/T00038/step1/ 5.8Hz loss: 0.41077 E0000/T00039/step1/ 5.8Hz loss: 0.3965 . . .

!sh scripts/run_car.sh

StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op FusedLeakyReLU not available, using native PyTorch fallback. StyleGAN2: Optimized CUDA op UpFirDn2d not available, using native PyTorch fallback. Load config from yml file: configs/car.yml Loading configs from configs/car.yml {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': True} Load config from yml file: configs/car.yml Loading configs from configs/car.yml Load config from yml file: configs/car.yml Loading configs from configs/car.yml Load config from yml file: configs/car.yml Loading configs from configs/car.yml {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': True} Traceback (most recent call last): File "run.py", line 26, in init_dist(args.launcher, backend='nccl') File "/usr/local/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 20, in init_dist _init_dist_pytorch(backend, kwargs) File "/usr/local/lib/python3.6/site-packages/mmcv/runner/dist_utils.py", line 34, in _init_dist_pytorch dist.init_process_group(backend=backend, kwargs) File "/usr/local/lib/python3.6/site-packages/torch/distributed/distributed_c10d.py", line 406, in init_process_group store, rank, world_size = next(rendezvous(url)) File "/usr/local/lib/python3.6/site-packages/torch/distributed/rendezvous.py", line 143, in _env_rendezvous_handler store = TCPStore(master_addr, master_port, world_size, start_daemon) RuntimeError: Address already in use {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': True} Traceback (most recent call last): File "/usr/local/lib/python3.6/runpy.py", line 193, in _run_module_as_main "main", mod_spec) File "/usr/local/lib/python3.6/runpy.py", line 85, in _run_code exec(code, run_globals) File "/usr/local/lib/python3.6/site-packages/torch/distributed/launch.py", line 246, in main() File "/usr/local/lib/python3.6/site-packages/torch/distributed/launch.py", line 242, in main cmd=cmd) subprocess.CalledProcessError: Command '['/usr/local/bin/python', '-u', 'run.py', '--local_rank=3', '--launcher', 'pytorch', '--config', 'configs/car.yml']' returned non-zero exit status 1.

Setting OMP_NUM_THREADS environment variable for each process to be 1 in default, to avoid your system being overloaded, please further tune the variable for optimal performance in your application as needed.

XingangPan commented 3 years ago

@leelang7 For the first error, you should specify what ${CONFIG} and ${EXP} are. For example:

python run.py \
    --config configs/car.yml \
    2>&1 | tee results/car/log.txt

For the second error, you may try changing the value of PORT in the script to another value.

namratha03 commented 3 years ago

We are running this on google colab and I tried changing the run_car.sh to run for 1 GPU : run_car.sh

EXP=car
CONFIG=car
GPUS=1
PORT=${PORT:-29578}

mkdir -p results/${EXP}

python run.py \
    --config configs/${CONFIG}.yml \
    2>&1 | tee results/${EXP}/log.txt

The error we get is: Load config from yml file: configs/car.yml Loading configs from configs/car.yml {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'rand_light': [-1, 1, -0.2, 0.8, -0.1, 0.6, -0.6], 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': False} Setting up Perceptual loss... Loading model from: /content/GAN2Shape/gan2shape/stylegan2/stylegan2-pytorch/lpips/weights/v0.1/vgg.pth ...[net-lin [vgg]] initialized ...Done Traceback (most recent call last): File "run.py", line 31, in trainer = Trainer(cfgs, GAN2Shape) File "/content/GAN2Shape/gan2shape/trainer.py", line 23, in init self.model = model(cfgs) File "/content/GAN2Shape/gan2shape/model.py", line 92, in init self.renderer = Renderer(cfgs, self.image_size) File "/content/GAN2Shape/gan2shape/renderer/renderer.py", line 46, in init self.inv_K = self.inv_K_origin.clone() RuntimeError: CUDA error: invalid device function

Can you please tell us what changes need to be made or what port number should be changed?

mathXin112 commented 3 years ago

We are running this on google colab and I tried changing the run_car.sh to run for 1 GPU : run_car.sh
EXP=car
CONFIG=car
GPUS=1
PORT=${PORT:-29578}

mkdir -p results/${EXP}

python run.py \
    --config configs/${CONFIG}.yml \
    2>&1 | tee results/${EXP}/log.txt
The error we get is: Load config from yml file: configs/car.yml Loading configs from configs/car.yml {'checkpoint_dir': 'results/car', 'save_checkpoint_freq': 500, 'keep_num_checkpoint': 2, 'use_logger': True, 'log_freq': 100, 'joint_train': False, 'independent': False, 'reset_weight': True, 'save_results': True, 'num_stage': 4, 'flip1_cfg': [False, False, False, False], 'flip3_cfg': [False, False, False, False], 'stage_len_dict': {'step1': 700, 'step2': 700, 'step3': 600}, 'stage_len_dict2': {'step1': 200, 'step2': 500, 'step3': 400}, 'image_size': 128, 'load_gt_depth': False, 'img_list_path': 'data/car/list.txt', 'img_root': 'data/car', 'latent_root': 'data/car/latents', 'model_name': 'gan2shape_car', 'category': 'car', 'share_weight': True, 'relative_enc': False, 'use_mask': True, 'add_mean_L': True, 'add_mean_V': True, 'min_depth': 0.9, 'max_depth': 1.1, 'xyz_rotation_range': 60, 'xy_translation_range': 0.1, 'z_translation_range': 0, 'collect_iters': 100, 'batchsize': 8, 'lr': 0.0001, 'lam_perc': 0.5, 'lam_smooth': 0.01, 'lam_regular': 0.01, 'view_mvn_path': 'checkpoints/view_light/view_mvn.pth', 'light_mvn_path': 'checkpoints/view_light/light_mvn.pth', 'rand_light': [-1, 1, -0.2, 0.8, -0.1, 0.6, -0.6], 'channel_multiplier': 2, 'gan_size': 512, 'gan_ckpt': 'checkpoints/stylegan2/stylegan2-car-config-f.pt', 'F1_d': 2, 'rot_center_depth': 1.0, 'fov': 10, 'tex_cube_size': 2, 'config': 'configs/car.yml', 'seed': 0, 'num_workers': 4, 'distributed': False} Setting up Perceptual loss... Loading model from: /content/GAN2Shape/gan2shape/stylegan2/stylegan2-pytorch/lpips/weights/v0.1/vgg.pth ...[net-lin [vgg]] initialized ...Done Traceback (most recent call last): File "run.py", line 31, in trainer = Trainer(cfgs, GAN2Shape) File "/content/GAN2Shape/gan2shape/trainer.py", line 23, in init self.model = model(cfgs) File "/content/GAN2Shape/gan2shape/model.py", line 92, in init self.renderer = Renderer(cfgs, self.image_size) File "/content/GAN2Shape/gan2shape/renderer/renderer.py", line 46, in init self.inv_K = self.inv_K_origin.clone() RuntimeError: CUDA error: invalid device function

Can you please tell us what changes need to be made or what port number should be changed?

I meet the same problem, so, I wanna know, have you solved this？And, what's the version of your cuda, cudnn, torch?

XingangPan / GAN2Shape

I have a question #4