New warning while building newest experimental branch

I llama.cpp build info: I UNAME_S: Linux I UNAME_P: unknown I UNAME_M: x86_64 I CFLAGS: -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native I CXXFLAGS: -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread I LDFLAGS:
I CC: cc (GCC) 13.2.1 20230801 I CXX: g++ (GCC) 13.2.1 20230801

rm -vf *.o main quantize_llama quantize_gpt2 quantize_gptj quantize_neox quantize_mpt quantize-stats perplexity embedding benchmark-matmult save-load-state gguf imatrix imatrix.exe gguf.exe main.exe quantize_llama.exe quantize_gptj.exe quantize_gpt2.exe quantize_neox.exe quantize_mpt.exe koboldcpp_default.dll koboldcpp_openblas.dll koboldcpp_failsafe.dll koboldcpp_noavx2.dll koboldcpp_clblast.dll koboldcpp_clblast_noavx2.dll koboldcpp_cublas.dll koboldcpp_hipblas.dll koboldcpp_vulkan.dll koboldcpp_default.so koboldcpp_openblas.so koboldcpp_failsafe.so koboldcpp_noavx2.so koboldcpp_clblast.so koboldcpp_clblast_noavx2.so koboldcpp_cublas.so koboldcpp_hipblas.so koboldcpp_vulkan.so removed 'common.o' removed 'expose.o' removed 'ggml-alloc.o' removed 'ggml-backend.o' removed 'ggml-cuda.o' removed 'ggml.o' removed 'ggml-quants.o' removed 'ggml_v1.o' removed 'ggml_v2_cublas.o' removed 'ggml_v2-cuda-legacy.o' removed 'ggml_v2-cuda.o' removed 'ggml_v2.o' removed 'ggml_v3_cublas.o' removed 'ggml_v3-cuda.o' removed 'ggml_v3.o' removed 'ggml_v4_cublas.o' removed 'ggml_v4_vulkan.o' removed 'ggml-vulkan.o' removed 'gpttype_adapter_cublas.o' removed 'gpttype_adapter.o' removed 'gpttype_adapter_vulkan.o' removed 'grammar-parser.o' removed 'koboldcpp_default.so' removed 'koboldcpp_cublas.so' removed 'koboldcpp_vulkan.so' I llama.cpp build info: I UNAME_S: Linux I UNAME_P: unknown I UNAME_M: x86_64 I CFLAGS: -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native I CXXFLAGS: -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread I LDFLAGS:
I CC: cc (GCC) 13.2.1 20230801 I CXX: g++ (GCC) 13.2.1 20230801

cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c ggml.c -o ggml.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c otherarch/ggml_v3.c -o ggml_v3.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c otherarch/ggml_v2.c -o ggml_v2.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c otherarch/ggml_v1.c -o ggml_v1.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -c expose.cpp -o expose.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -c common/common.cpp -o common.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -c gpttype_adapter.cpp -o gpttype_adapter.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c ggml-quants.c -o ggml-quants.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c ggml-alloc.c -o ggml-alloc.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -c ggml-backend.c -o ggml-backend.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -c common/grammar-parser.cpp -o grammar-parser.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -c ggml.c -o ggml_v4_cublas.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -c otherarch/ggml_v3.c -o ggml_v3_cublas.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -c otherarch/ggml_v2.c -o ggml_v2_cublas.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -c gpttype_adapter.cpp -o gpttype_adapter_cublas.o nvcc --forward-unknown-to-host-compiler -use_fast_math -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_MMQ_Y=64 -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -Wno-pedantic -c ggml-cuda.cu -o ggml-cuda.o nvcc --forward-unknown-to-host-compiler -use_fast_math -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_MMQ_Y=64 -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -Wno-pedantic -c otherarch/ggml_v3-cuda.cu -o ggml_v3-cuda.o nvcc --forward-unknown-to-host-compiler -use_fast_math -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_MMQ_Y=64 -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -Wno-pedantic -c otherarch/ggml_v2-cuda.cu -o ggml_v2-cuda.o nvcc --forward-unknown-to-host-compiler -use_fast_math -arch=native -DGGML_CUDA_DMMV_X=32 -DGGML_CUDA_MMV_Y=1 -DK_QUANTS_PER_ITERATION=2 -DGGML_CUDA_MMQ_Y=64 -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include -Wno-pedantic -c otherarch/ggml_v2-cuda-legacy.cu -o ggml_v2-cuda-legacy.o cc -I. -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-deprecated -Wno-deprecated-declarations -pthread -march=native -mtune=native -DGGML_USE_VULKAN -c ggml.c -o ggml_v4_vulkan.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_VULKAN -c gpttype_adapter.cpp -o gpttype_adapter_vulkan.o g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_VULKAN -c ggml-vulkan.cpp -o ggml-vulkan.o In file included from gpttype_adapter.cpp:20: llama.cpp: In function ‘ggml_tensor llm_build_kqv(ggml_context, const llama_model&, const llama_hparams&, const llama_kv_cache&, ggml_cgraph, ggml_tensor, ggml_tensor, ggml_tensor, ggml_tensor, ggml_tensor, int64_t, int32_t, int32_t, float, const llm_build_cb&, int)’: llama.cpp:4900:108: note: ‘#pragma message: TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL’ 4900 | #pragma message("TODO: ALiBi support in ggml_soft_max_ext is not implemented for Vulkan, Kompute, and SYCL") | ^ llama.cpp:4901:87: note: ‘#pragma message: Falling back to ggml_alibi(). Will become an error in Mar 2024’ 4901 | #pragma message(" Falling back to ggml_alibi(). Will become an error in Mar 2024") | ^ llama.cpp:4902:73: note: ‘#pragma message: ref: https://github.com/ggerganov/llama.cpp/pull/5488’ 4902 | #pragma message("ref: https://github.com/ggerganov/llama.cpp/pull/5488") | ^ g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread ggml.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o -shared -o koboldcpp_default.so g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I/opt/cuda/targets/x86_64-linux/include ggml_v4_cublas.o ggml_v3_cublas.o ggml_v2_cublas.o ggml_v1.o expose.o common.o gpttype_adapter_cublas.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o ggml-cuda.o ggml_v3-cuda.o ggml_v2-cuda.o ggml_v2-cuda-legacy.o -shared -o koboldcpp_cublas.so -lcuda -lcublas -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L/opt/cuda/targets/x86_64-linux/lib -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/wsl/lib g++ -I. -I./common -I./include -I./include/CL -I./otherarch -I./otherarch/tools -I./include/vulkan -O3 -DNDEBUG -std=c++11 -fPIC -DLOG_DISABLE_LOGS -D_GNU_SOURCE -pthread -s -Wno-multichar -Wno-write-strings -Wno-deprecated -Wno-deprecated-declarations -pthread ggml_v4_vulkan.o ggml_v3.o ggml_v2.o ggml_v1.o expose.o common.o gpttype_adapter_vulkan.o ggml-vulkan.o ggml-quants.o ggml-alloc.o ggml-backend.o grammar-parser.o -lvulkan -shared -o koboldcpp_vulkan.so

LostRuins / koboldcpp

New warning while building newest experimental branch #702