go-skynet / go-llama.cpp

LLama.cpp golang bindings
MIT License
614 stars 78 forks source link

ERROR ON MAKE LLAMA_CUBLAS=1 (Error - error: more than one instance of overloaded function "swap" matches the argument list:) #328

Open shubhamraj216 opened 3 months ago

shubhamraj216 commented 3 months ago

Hi, I am getting below error on running command - make LLAMA_CUBLAS=1. Has anyone encountered this, and to resolve? Error - hich: no ccache in (/opt/amazon/openmpi/bin/:/opt/amazon/efa/bin/:/opt/tensorflow/bin:/usr/local/cuda/bin:/usr/local/cuda/include:/usr/libexec/gcc/x86_64-redhat-linux/7:/opt/aws/bin:/sbin:/bin:/usr/sbin:/usr/bin) I ccache not found. Consider installing it for faster compilation. I llama.cpp build info: I UNAME_S: Linux I UNAME_P: x86_64 I UNAME_M: x86_64 I CFLAGS: -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion I CXXFLAGS: -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include I NVCCFLAGS: -std=c++11 -O3 -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 I LDFLAGS: -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/usr/lib64 -L/usr/local/cuda/targets/x86_64-linux/lib -L/usr/lib/wsl/lib I CC: cc (GCC) 7.3.1 20180712 (Red Hat 7.3.1-17) I CXX: g++ (GCC) 7.3.1 20180712 (Red Hat 7.3.1-17) I NVCC: Build cuda_11.8.r11.8/compiler.31833905_0

cc -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -std=c11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes -Werror=implicit-int -Werror=implicit-function-declaration -pthread -march=native -mtune=native -Wdouble-promotion -c ggml.c -o ggml.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c llama.cpp -o llama.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c common/common.cpp -o common.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c common/sampling.cpp -o sampling.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c common/grammar-parser.cpp -o grammar-parser.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c common/build-info.cpp -o build-info.o g++ -std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -march=native -mtune=native -Wno-array-bounds -Wno-format-truncation -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -c common/console.cpp -o console.o nvcc -std=c++11 -O3 -use_fast_math --forward-unknown-to-host-compiler -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/usr/local/cuda/targets/x86_64-linux/include -Xcompiler "-std=c++11 -fPIC -O3 -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wmissing-declarations -Wmissing-noreturn -pthread -Wno-array-bounds -Wno-format-truncation -Wno-pedantic" -c ggml-cuda.cu -o ggml-cuda.o /usr/include/c++/7/bits/unique_ptr.h(374): error: more than one instance of overloaded function "swap" matches the argument list: function template "void swap(T &, T &)" ggml-cuda.cu(6971): here function template "std::enable_if<std::and_<std::_not<std::is_tuple_like<_Tp>>, std::is_move_constructible<_Tp>, std::is_move_assignable<_Tp>>::value, void>::type std::swap(_Tp &, _Tp &)" /usr/include/c++/7/bits/move.h(187): here argument types are: (ggml_cuda_pool , ggml_cuda_pool ) detected during: instantiation of "void std::unique_ptr<_Tp, _Dp>::reset(std::unique_ptr<_Tp, _Dp>::pointer) [with _Tp=ggml_cuda_pool, _Dp=std::default_delete]" (283): here instantiation of "std::unique_ptr<_Tp, _Dp> &std::unique_ptr<_Tp, _Dp>::operator=(std::unique_ptr<_Tp, _Dp> &&) [with _Tp=ggml_cuda_pool, _Dp=std::default_delete]" ggml-cuda.cu(688): here

1 error detected in the compilation of "ggml-cuda.cu".