Closed rkhilnani9 closed 3 years ago
Hello @rkhilnani9 ,
What Pytorch and CUDA version are you using? Since I also fail with a similar error on my first try, I thought that you may be using CUDA version or Pytorch version that doesn't fit the code. According to this(https://github.com/PeikeLi/Self-Correction-Human-Parsing/pull/22), I successfully trained the model with Pytorch1.2.0 and CUDA10.0 (Pytorch1.5.0 with higher CUDA version should be fine) without popping out the error.
I also have the problem of this one. I think it's also related your gcc version. @jingya-hello . What is your gcc version? I used gcc 7.5, pytorch 1.6 and cuda 10.1. I have this error under this setting.
I also have the problem of this one. I think it's also related your gcc version. @jingya-hello . What is your gcc version? I used gcc 7.5, pytorch 1.6 and cuda 10.1. I have this error under this setting.
I run it successfully by downgrading gcc from 7.5 to 6.5.
CalledProcessError Traceback (most recent call last) /opt/conda/lib/python3.7/site-packages/torch/utils/cpp_extension.py in _run_ninja_build(build_directory, verbose, error_prefix) 1520 check=True, -> 1521 env=env) 1522 else: /opt/conda/lib/python3.7/subprocess.py in run(input, capture_output, timeout, check, *popenargs, *kwargs) 486 raise CalledProcessError(retcode, process.args, --> 487 output=stdout, stderr=stderr) 488 return CompletedProcess(process.args, retcode, stdout, stderr) CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1. The above exception was the direct cause of the following exception: RuntimeError Traceback (most recent call last) /data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/train.py in
23 from torch.utils import data
24
---> 25 import networks
26 import utils.schp as schp
27 from datasets.datasets import LIPDataSet
/data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/networks/init.py in
1 from future import absolute_import
2
----> 3 from networks.AugmentCE2P import resnet101
4
5 __factory = {
/data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/networks/AugmentCE2P.py in
18 # Note here we adopt the InplaceABNSync implementation from https://github.com/mapillary/inplace_abn
19 # By default, the InplaceABNSync module contains a BatchNorm Layer and a LeakyReLu layer
---> 20 from modules import InPlaceABNSync
21 #device=torch.device('cpu')
22 BatchNorm2d = functools.partial(InPlaceABNSync, activation='none')
/data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/init.py in
----> 1 from .bn import ABN, InPlaceABN, InPlaceABNSync
2 from .functions import ACT_RELU, ACT_LEAKY_RELU, ACT_ELU, ACT_NONE
3 from .misc import GlobalAvgPool2d, SingleGPU
4 from .residual import IdentityResidualBlock
5 from .dense import DenseModule
/data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/bn.py in
8 from Queue import Queue
9
---> 10 from .functions import
11
12
/data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/functions.py in
18 "inplace_abn_cuda_half.cu"
19 ]],
---> 20 extra_cuda_cflags=["--expt-extended-lambda"])
21
22 # Activation names
/opt/conda/lib/python3.7/site-packages/torch/utils/cpp_extension.py in load(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, keep_intermediates)
977 with_cuda,
978 is_python_module,
--> 979 keep_intermediates=keep_intermediates)
980
981
/opt/conda/lib/python3.7/site-packages/torch/utils/cpp_extension.py in _jit_compile(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda, is_python_module, keep_intermediates)
1182 build_directory=build_directory,
1183 verbose=verbose,
-> 1184 with_cuda=with_cuda)
1185 finally:
1186 baton.release()
/opt/conda/lib/python3.7/site-packages/torch/utils/cpp_extension.py in _write_ninja_file_and_build_library(name, sources, extra_cflags, extra_cuda_cflags, extra_ldflags, extra_include_paths, build_directory, verbose, with_cuda)
1280 build_directory,
1281 verbose,
-> 1282 error_prefix="Error building extension '{}'".format(name))
1283
1284
/opt/conda/lib/python3.7/site-packages/torch/utils/cpp_extension.py in _run_ninja_build(build_directory, verbose, error_prefix)
1535 if hasattr(error, 'output') and error.output: # type: ignore
1536 message += ": {}".format(error.output.decode()) # type: ignore
-> 1537 raise RuntimeError(message) from e
1538
1539
RuntimeError: Error building extension 'inplace_abn': [1/3] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=inplace_abn -DTORCH_API_INCLUDE_EXTENSION_H -isystem /opt/conda/lib/python3.7/site-packages/torch/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.7/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' --expt-extended-lambda -std=c++14 -c /data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/src/inplace_abn_cuda_half.cu -o inplace_abn_cuda_half.cuda.o
FAILED: inplace_abn_cuda_half.cuda.o
/usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=inplace_abn -DTORCH_API_INCLUDE_EXTENSION_H -isystem /opt/conda/lib/python3.7/site-packages/torch/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.7/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' --expt-extended-lambda -std=c++14 -c /data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/src/inplace_abn_cuda_half.cu -o inplace_abn_cuda_half.cuda.o
nvcc fatal : Value 'c++14' is not defined for option 'std'
[2/3] /usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=inplace_abn -DTORCH_API_INCLUDE_EXTENSION_H -isystem /opt/conda/lib/python3.7/site-packages/torch/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.7/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' --expt-extended-lambda -std=c++14 -c /data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/src/inplace_abn_cuda.cu -o inplace_abn_cuda.cuda.o
FAILED: inplace_abn_cuda.cuda.o
/usr/local/cuda/bin/nvcc -DTORCH_EXTENSION_NAME=inplace_abn -DTORCH_API_INCLUDE_EXTENSION_H -isystem /opt/conda/lib/python3.7/site-packages/torch/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /opt/conda/lib/python3.7/site-packages/torch/include/TH -isystem /opt/conda/lib/python3.7/site-packages/torch/include/THC -isystem /usr/local/cuda/include -isystem /opt/conda/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -DCUDA_NO_HALF_OPERATORS -DCUDA_NO_HALF_CONVERSIONS -DCUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr -gencode=arch=compute_70,code=sm_70 --compiler-options '-fPIC' --expt-extended-lambda -std=c++14 -c /data/aradml/data/vis/vgupta67/abhinav/panniculectomy/Amandeep/modules/src/inplace_abn_cuda.cu -o inplace_abn_cuda.cuda.o
nvcc fatal : Value 'c++14' is not defined for option 'std'
ninja: build stopped: subcommand failed.
Getting this error while running train.py. Any ideas how we can replace inplaceabn with some other layers/any fix for this error?