Closed sumedhreddy90 closed 1 year ago
(TinyChatEngine) jetson@jon-jp511-isaac-ros:~/TinyChatEngine/llm$ make chat -j CUDA is available! src/LLaMATokenizer.cc src/OPTGenerate.cc src/utils.cc src/OPTTokenizer.cc src/Generate.cc src/nn_modules/Int8OPTAttention.cc src/nn_modules/Fp32llamaForCausalLM.cc src/nn_modules/Fp32llamaDecoder.cc src/nn_modules/Int4OPTForCausalLM.cc src/nn_modules/Int8OPTDecoderLayer.cc src/nn_modules/Fp32OPTForCausalLM.cc src/nn_modules/Int4OPTDecoder.cc src/nn_modules/OPTForCausalLM.cc src/nn_modules/Fp32OPTDecoder.cc src/nn_modules/Fp32llamaDecoderLayer.cc src/nn_modules/Int4OPTAttention.cc src/nn_modules/Fp32OPTAttention.cc src/nn_modules/Fp32llamaAttention.cc src/nn_modules/Int4OPTDecoderLayer.cc src/nn_modules/Int8OPTDecoder.cc src/nn_modules/Fp32OPTDecoderLayer.cc src/ops/linear.cc src/ops/LayerNorm.cc src/ops/embedding.cc src/ops/batch_add.cc src/ops/softmax.cc src/ops/BMM_S8T_S8N_S8T.cc src/ops/W8A8B8O8LinearReLU.cc src/ops/LayerNormQ.cc src/ops/RotaryPosEmb.cc src/ops/arg_max.cc src/ops/BMM_F32T.cc src/ops/LlamaRMSNorm.cc src/ops/BMM_S8T_S8N_F32T.cc src/ops/W8A8BFP32OFP32Linear.cc src/ops/W8A8B8O8Linear.cc ../kernels/matmul_imp.cc ../kernels/matmul_int8.cc ../kernels/matmul_int4.cc ../kernels/cuda/matmul_ref_fp32.cc ../kernels/cuda/matmul_ref_int8.cc ../kernels/cuda/matmul_int4.cu ../kernels/cuda/gemv_cuda.cu src/nn_modules/cuda/utils.cu src/nn_modules/cuda/Int4llamaForCausalLM.cu src/nn_modules/cuda/Int4llamaDecoder.cu src/nn_modules/cuda/Int4llamaDecoderLayer.cu src/nn_modules/cuda/LLaMAGenerate.cu src/nn_modules/cuda/Int4llamaAttention.cu src/ops/cuda/linear.cu src/ops/cuda/embedding.cu src/ops/cuda/batch_add.cu src/ops/cuda/softmax.cu src/ops/cuda/BMM_F16T.cu src/ops/cuda/RotaryPosEmb.cu src/ops/cuda/LlamaRMSNorm.cu /usr/local/cuda/bin/nvcc -std=c++17 -Xptxas -O3 -gencode arch=compute_87,code=sm_87 --forward-unknown-to-host-compiler -Xcompiler "-pthread" -DQM_CUDA -DENABLE_BF16 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_BFLOAT16_OPERATORS__ -U__CUDA_NO_BFLOAT16_CONVERSIONS__ -U__CUDA_NO_BFLOAT162_OPERATORS__ -U__CUDA_NO_BFLOAT162_CONVERSIONS__ --expt-relaxed-constexpr --expt-extended-lambda --use_fast_math --threads=8 -fPIC -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -I./include/ops/cuda -I/usr/local/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -I/usr/include/aarch64-linux-gnu -o chat application/chat.cc build/transformer/src/LLaMATokenizer.o build/transformer/src/OPTGenerate.o build/transformer/src/utils.o build/transformer/src/OPTTokenizer.o build/transformer/src/Generate.o build/transformer/src/nn_modules/Int8OPTAttention.o build/transformer/src/nn_modules/Fp32llamaForCausalLM.o build/transformer/src/nn_modules/Fp32llamaDecoder.o build/transformer/src/nn_modules/Int4OPTForCausalLM.o build/transformer/src/nn_modules/Int8OPTDecoderLayer.o build/transformer/src/nn_modules/Fp32OPTForCausalLM.o build/transformer/src/nn_modules/Int4OPTDecoder.o build/transformer/src/nn_modules/OPTForCausalLM.o build/transformer/src/nn_modules/Fp32OPTDecoder.o build/transformer/src/nn_modules/Fp32llamaDecoderLayer.o build/transformer/src/nn_modules/Int4OPTAttention.o build/transformer/src/nn_modules/Fp32OPTAttention.o build/transformer/src/nn_modules/Fp32llamaAttention.o build/transformer/src/nn_modules/Int4OPTDecoderLayer.o build/transformer/src/nn_modules/Int8OPTDecoder.o build/transformer/src/nn_modules/Fp32OPTDecoderLayer.o build/transformer/src/ops/linear.o build/transformer/src/ops/LayerNorm.o build/transformer/src/ops/embedding.o build/transformer/src/ops/batch_add.o build/transformer/src/ops/softmax.o build/transformer/src/ops/BMM_S8T_S8N_S8T.o build/transformer/src/ops/W8A8B8O8LinearReLU.o build/transformer/src/ops/LayerNormQ.o build/transformer/src/ops/RotaryPosEmb.o build/transformer/src/ops/arg_max.o build/transformer/src/ops/BMM_F32T.o build/transformer/src/ops/LlamaRMSNorm.o build/transformer/src/ops/BMM_S8T_S8N_F32T.o build/transformer/src/ops/W8A8BFP32OFP32Linear.o build/transformer/src/ops/W8A8B8O8Linear.o build/transformer/../kernels/matmul_imp.o build/transformer/../kernels/matmul_int8.o build/transformer/../kernels/matmul_int4.o build/transformer/../kernels/cuda/matmul_ref_fp32.o build/transformer/../kernels/cuda/matmul_ref_int8.o build/transformer/../kernels/cuda/matmul_int4.o build/transformer/../kernels/cuda/gemv_cuda.o build/transformer/src/nn_modules/cuda/utils.o build/transformer/src/nn_modules/cuda/Int4llamaForCausalLM.o build/transformer/src/nn_modules/cuda/Int4llamaDecoder.o build/transformer/src/nn_modules/cuda/Int4llamaDecoderLayer.o build/transformer/src/nn_modules/cuda/LLaMAGenerate.o build/transformer/src/nn_modules/cuda/Int4llamaAttention.o build/transformer/src/ops/cuda/linear.o build/transformer/src/ops/cuda/embedding.o build/transformer/src/ops/cuda/batch_add.o build/transformer/src/ops/cuda/softmax.o build/transformer/src/ops/cuda/BMM_F16T.o build/transformer/src/ops/cuda/RotaryPosEmb.o build/transformer/src/ops/cuda/LlamaRMSNorm.o -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -lnvrtc -lcuda -lcudnn -lcurand -lcusolver -L/usr/local/cuda/lib64 -L/usr/local/cuda/targets/aarch64-linux/lib -L/usr/lib/aarch64-linux-gnu -Xlinker -rpath=/usr/local/cuda/lib64 -Xlinker -rpath=/usr/local/cuda/targets/aarch64-linux/lib -Xlinker -rpath=/usr/lib/aarch64-linux-gnu /usr/bin/ld: cannot find -lcublas /usr/bin/ld: cannot find -lcublasLt /usr/bin/ld: cannot find -lcudnn /usr/bin/ld: cannot find -lcurand /usr/bin/ld: cannot find -lcusolver collect2: error: ld returned 1 exit status make: *** [Makefile:228: chat] Error 255
Solved .so Linker errors. Closing
@sumedhreddy90 Can you specify the solution? I am facing the same problem while compiling it on my Orin AGX.