Closed aininot260 closed 4 years ago
I failed at compiling the caldgemm.The log is:
(tensorrt) nvidia@Hewlett-Packard:~/caldgemm$ make -j8 /bin/sh: 1: Syntax error: redirection unexpected /bin/sh: 1: [: -a: unexpected operator makefiles/makefile:7: Unknown Architecture: 0, defaulting to x86_64-pc-linux-gnu makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/benchmark.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/timer.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qmalloc.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_cpu.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/affinity.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/threadserver.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qsem.d: No such file or directory makefiles/makefile:334: release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_adl.d: No such file or directory /bin/sh: 1: Syntax error: redirection unexpected /bin/sh: 1: [: -a: unexpected operator makefiles/makefile:7: Unknown Architecture: 0, defaulting to x86_64-pc-linux-gnu /usr/local/cuda/bin/nvcc --compiler-bindir c++ --use_fast_math --maxrregcount 255 -O4 -Xptxas -v -Xptxas -O4 -Xcompiler -O4 -m64 `for i in 35 61; do echo -n -gencode arch=compute_$i,code=sm_$i\ ;done` --compiler-options -I/home/nvidia/intel/mkl/include --compiler-options -I/usr/local/openmpi/include/vampirtrace --compiler-options -I"/usr/local/cuda/include" --compiler-options -I"/usr/local/cuda/sdk/common/inc" --compiler-options -DCALDGEMM_CUDA --compiler-options -DCALDGEMM_CUDA_CUBLAS --compiler-options -DUSE_MKL --compiler-options -D_64BIT --cuda --output-file "release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp" caldgemm_cuda.cu c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -Wno-strict-aliasing -c caldgemm.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -Wno-strict-aliasing -c benchmark.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/benchmark.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c cmodules/timer.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/timer.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c cmodules/qmalloc.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qmalloc.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c cmodules/affinity.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/affinity.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c cmodules/threadserver.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/threadserver.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c caldgemm_cpu.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_cpu.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c cmodules/qsem.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qsem.o c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c caldgemm_adl.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_adl.o caldgemm_cuda.cu(364): warning: variable "threads" was declared but never referenced caldgemm_cuda.cu(364): warning: variable "blocks" was declared but never referenced caldgemm_cuda.cu(364): warning: variable "threads" was declared but never referenced caldgemm_cuda.cu(364): warning: variable "blocks" was declared but never referenced ptxas info : 0 bytes gmem ptxas info : Compiling entry function '_Z20CUDAConversionKernelPKdPdmm' for 'sm_35' ptxas info : Function properties for _Z20CUDAConversionKernelPKdPdmm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 14 registers, 352 bytes cmem[0] ptxas info : Compiling entry function '_Z17CUDAKernelLinpackPdS_S_mmmddm' for 'sm_35' ptxas info : Function properties for _Z17CUDAKernelLinpackPdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 101 registers, 392 bytes cmem[0] ptxas info : Compiling entry function '_Z16CUDAKernelALPHA1PdS_S_mmmddm' for 'sm_35' ptxas info : Function properties for _Z16CUDAKernelALPHA1PdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 101 registers, 392 bytes cmem[0] ptxas info : Compiling entry function '_Z10CUDAKernelPdS_S_mmmddm' for 'sm_35' ptxas info : Function properties for _Z10CUDAKernelPdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 101 registers, 392 bytes cmem[0] ptxas info : 0 bytes gmem ptxas info : Compiling entry function '_Z20CUDAConversionKernelPKdPdmm' for 'sm_61' ptxas info : Function properties for _Z20CUDAConversionKernelPKdPdmm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 25 registers, 352 bytes cmem[0] ptxas info : Compiling entry function '_Z17CUDAKernelLinpackPdS_S_mmmddm' for 'sm_61' ptxas info : Function properties for _Z17CUDAKernelLinpackPdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 95 registers, 392 bytes cmem[0] ptxas info : Compiling entry function '_Z16CUDAKernelALPHA1PdS_S_mmmddm' for 'sm_61' ptxas info : Function properties for _Z16CUDAKernelALPHA1PdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 95 registers, 392 bytes cmem[0] ptxas info : Compiling entry function '_Z10CUDAKernelPdS_S_mmmddm' for 'sm_61' ptxas info : Function properties for _Z10CUDAKernelPdS_S_mmmddm 0 bytes stack frame, 0 bytes spill stores, 0 bytes spill loads ptxas info : Used 95 registers, 392 bytes cmem[0] cat release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp | grep -v NVCC_GREP | sed "s/#pragma detect_mismatch(\"_MSC_VER\", \"1600\")//g" > release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp.tmp mv -f release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp.tmp release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp if [ -e "caldgemm_cuda.cu.x86_64-pc-linux-gnu.patch" ]; then patch -r /dev/null -s --no-backup-if-mismatch -i caldgemm_cuda.cu.x86_64-pc-linux-gnu.patch release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp; fi c++ -m64 -D"_AMD64_" -D"_X64_" -pipe -DGCC_RUNTIME -flto -Wall -Wno-write-strings -fopenmp -O3 -march=native -msse4.2 -m64 -fweb -frename-registers -minline-all-stringops -mfpmath=sse -ftracer -funroll-loops -fpeel-loops -fprefetch-loop-arrays -ffast-math -fno-stack-protector -ggdb -x c++ -Wno-effc++ -I/home/nvidia/intel/mkl/include -I/usr/local/openmpi/include/vampirtrace -I"/usr/local/cuda/include" -I"/usr/local/cuda/sdk/common/inc" -DCALDGEMM_CUDA -DCALDGEMM_CUDA_CUBLAS -DUSE_MKL -D_64BIT -c release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.cpp -o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.o caldgemm_cuda.cu: In member function ‘virtual int caldgemm_cuda::RunCALDGEMM_Exit()’: caldgemm_cuda.cu:738:55: warning: ‘cudaError_t cudaThreadSynchronize()’ is deprecated [-Wdeprecated-declarations] CHKRET(cudaThreadSynchronize(), "Synchronizing CUDA Thread"); ^ /usr/local/cuda/include/cuda_runtime_api.h:957:46: note: declared here extern __CUDA_DEPRECATED __host__ cudaError_t CUDARTAPI cudaThreadSynchronize(void); ^~~~~~~~~~~~~~~~~~~~~ c++ -m64 -Wall -ggdb -fopenmp -flto -L/usr/local/cuda/lib64 -L/opt/intel/compilers_and_libraries_2016.2.181/linux/compiler/lib/intel64 -L/home/nvidia/intel/mkl/lib/intel64/ -L/home/nvidia/intel/lib/intel64/ release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cu/caldgemm_cuda.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/benchmark.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/timer.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qmalloc.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_cpu.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/affinity.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/threadserver.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/cmodules/qsem.o release/x86_64-pc-linux-gnu_64EXECUTABLE_dgemm_bench/cpp/caldgemm_adl.o -lrt -ldl -lpthread -lcudart -lcuda -lcublas -liomp5 -lmkl_intel_lp64 -lmkl_core -lmkl_intel_thread -o dgemm_bench /tmp/cccjW1s5.ltrans1.ltrans.o:(.nvFatBinSegment+0x8): undefined reference to `fatbinData' collect2: error: ld returned 1 exit status makefiles/makefile:191: recipe for target 'dgemm_bench' failed make: *** [dgemm_bench] Error 1
Looking forward to your reply.
Downgrade the version of gcc solved the problem.Thanks.
I failed at compiling the caldgemm.The log is:
Looking forward to your reply.