mit-han-lab / TinyChatEngine

TinyChatEngine: On-Device LLM Inference Library
https://mit-han-lab.github.io/TinyChatEngine/
MIT License
624 stars 58 forks source link

No such file or directory during compilation #106

Open saeid93 opened 1 month ago

saeid93 commented 1 month ago

Follwoing the documentation using:

cd llm
make chat -j

I get the following error:

CUDA is unavailable!
src/GPTBigCodeGenerate.cc src/GPTBigCodeTokenizer.cc src/Generate.cc src/LLaMATokenizer.cc src/OPTGenerate.cc src/OPTTokenizer.cc src/interface.cc src/utils.cc src/nn_modules/Fp32CLIPAttention.cc src/nn_modules/Fp32CLIPEncoder.cc src/nn_modules/Fp32CLIPEncoderLayer.cc src/nn_modules/Fp32CLIPVisionTransformer.cc src/nn_modules/Fp32GPTBigCodeAttention.cc src/nn_modules/Fp32GPTBigCodeDecoder.cc src/nn_modules/Fp32GPTBigCodeDecoderLayer.cc src/nn_modules/Fp32GPTBigCodeForCausalLM.cc src/nn_modules/Fp32OPTAttention.cc src/nn_modules/Fp32OPTDecoder.cc src/nn_modules/Fp32OPTDecoderLayer.cc src/nn_modules/Fp32OPTForCausalLM.cc src/nn_modules/Fp32llamaAttention.cc src/nn_modules/Fp32llamaDecoder.cc src/nn_modules/Fp32llamaDecoderLayer.cc src/nn_modules/Fp32llamaForCausalLM.cc src/nn_modules/Int4GPTBigCodeAttention.cc src/nn_modules/Int4GPTBigCodeDecoder.cc src/nn_modules/Int4GPTBigCodeDecoderLayer.cc src/nn_modules/Int4GPTBigCodeForCausalLM.cc src/nn_modules/Int4OPTAttention.cc src/nn_modules/Int4OPTDecoder.cc src/nn_modules/Int4OPTDecoderLayer.cc src/nn_modules/Int4OPTForCausalLM.cc src/nn_modules/Int8OPTAttention.cc src/nn_modules/Int8OPTDecoder.cc src/nn_modules/Int8OPTDecoderLayer.cc src/nn_modules/OPTForCausalLM.cc src/ops/BMM_F32T.cc src/ops/BMM_S8T_S8N_F32T.cc src/ops/BMM_S8T_S8N_S8T.cc src/ops/Conv2D.cc src/ops/Gelu.cc src/ops/LayerNorm.cc src/ops/LayerNormQ.cc src/ops/LlamaRMSNorm.cc src/ops/RotaryPosEmb.cc src/ops/W8A8B8O8Linear.cc src/ops/W8A8B8O8LinearReLU.cc src/ops/W8A8BFP32OFP32Linear.cc src/ops/arg_max.cc src/ops/batch_add.cc src/ops/embedding.cc src/ops/linear.cc src/ops/softmax.cc ../kernels/matmul_imp.cc ../kernels/matmul_int4.cc ../kernels/matmul_int8.cc ../kernels/pthread_pool.cc src/nn_modules/non_cuda/Int4llamaAttention.cc src/nn_modules/non_cuda/Int4llamaDecoder.cc src/nn_modules/non_cuda/Int4llamaDecoderLayer.cc src/nn_modules/non_cuda/Int4llamaForCausalLM.cc src/nn_modules/non_cuda/LLaMAGenerate.cc src/nn_modules/non_cuda/LLaVAGenerate.cc ../kernels/avx/matmul_avx_fp32.cc ../kernels/avx/matmul_avx_int4.cc ../kernels/avx/matmul_avx_int8.cc ../kernels/avx/matmul_avx_int8_int4.cc
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/GPTBigCodeGenerate.cc -o build/transformer/src/GPTBigCodeGenerate.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/LLaMATokenizer.cc -o build/transformer/src/LLaMATokenizer.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/GPTBigCodeTokenizer.cc -o build/transformer/src/GPTBigCodeTokenizer.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/Generate.cc -o build/transformer/src/Generate.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/OPTGenerate.cc -o build/transformer/src/OPTGenerate.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/OPTTokenizer.cc -o build/transformer/src/OPTTokenizer.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/interface.cc -o build/transformer/src/interface.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/utils.cc -o build/transformer/src/utils.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32CLIPAttention.cc -o build/transformer/src/nn_modules/Fp32CLIPAttention.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32CLIPEncoder.cc -o build/transformer/src/nn_modules/Fp32CLIPEncoder.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32CLIPEncoderLayer.cc -o build/transformer/src/nn_modules/Fp32CLIPEncoderLayer.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32CLIPVisionTransformer.cc -o build/transformer/src/nn_modules/Fp32CLIPVisionTransformer.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32GPTBigCodeAttention.cc -o build/transformer/src/nn_modules/Fp32GPTBigCodeAttention.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32GPTBigCodeDecoder.cc -o build/transformer/src/nn_modules/Fp32GPTBigCodeDecoder.o
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32GPTBigCodeDecoderLayer.cc -o build/transformer/src/nn_modules/Fp32GPTBigCodeDecoderLayer.o
In file included from ./include/Generate.h:29,
                 from src/GPTBigCodeGenerate.cc:5:
./include/OPTTokenizer.h:22:10: fatal error: nlohmann/json.hpp: No such file or directory
   22 | #include <nlohmann/json.hpp>
      |          ^~~~~~~~~~~~~~~~~~~
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32GPTBigCodeForCausalLM.cc -o build/transformer/src/nn_modules/Fp32GPTBigCodeForCausalLM.o
compilation terminated.
g++ -std=c++11 -pthread -Ofast  -mavx2 -mfma -ffast-math -DUSE_INT8_INT4_PRODUCT -fpermissive -DQM_x86 -I../kernels -I./include -I./include/nn_modules -I./json/single_include/ -I./half-2.2.0/include/ -c src/nn_modules/Fp32OPTAttention.cc -o build/transformer/src/nn_modules/Fp32OPTAttention.o
make: *** [Makefile:181: build/transformer/src/GPTBigCodeGenerate.o] Error 1
make: *** Waiting for unfinished jobs....
In file included from ./include/Generate.h:29,
                 from src/Generate.cc:8:
./include/OPTTokenizer.h:22:10: fatal error: nlohmann/json.hpp: No such file or directory
   22 | #include <nlohmann/json.hpp>
      |          ^~~~~~~~~~~~~~~~~~~
compilation terminated.
make: *** [Makefile:181: build/transformer/src/Generate.o] Error 1
In file included from ./include/Generate.h:29,
                 from src/OPTGenerate.cc:5:
./include/OPTTokenizer.h:22:10: fatal error: nlohmann/json.hpp: No such file or directory
   22 | #include <nlohmann/json.hpp>
      |          ^~~~~~~~~~~~~~~~~~~
compilation terminated.
In file included from src/OPTTokenizer.cc:1:
./include/OPTTokenizer.h:22:10: fatal error: nlohmann/json.hpp: No such file or directory
   22 | #include <nlohmann/json.hpp>
      |          ^~~~~~~~~~~~~~~~~~~
compilation terminated.
make: *** [Makefile:181: build/transformer/src/OPTGenerate.o] Error 1
make: *** [Makefile:181: build/transformer/src/OPTTokenizer.o] Error 1

Could you please let me know what I might be doing wrong here?

p2991459 commented 1 month ago

I am also facing the same error on windows, I followed the step by step from readme but on make chat -j gives me error

(TinyChatEngine) C:\Users\linuxdev\Desktop\Projects\TinyChatEngine\llm>gcc --version gcc (Rev6, Built by MSYS2 project) 13.2.0 Copyright (C) 2023 Free Software Foundation, Inc. This is free software; see the source for copying conditions. There is NO warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

(TinyChatEngine) C:\Users\linuxdev\Desktop\Projects\TinyChatEngine\llm>make chat -j The system cannot find the path specified. CUDA is unavailable! process_begin: CreateProcess(NULL, uname -m, ...) failed. Makefile:72: pipe: Bad file descriptor process_begin: CreateProcess(NULL, uname -m, ...) failed. Makefile:90: pipe: Bad file descriptor process_begin: CreateProcess(NULL, uname -p, ...) failed. Makefile:102: pipe: Bad file descriptor src/GPTBigCodeGenerate.cc src/GPTBigCodeTokenizer.cc src/Generate.cc src/LLaMATokenizer.cc src/OPTGenerate.cc src/OPTTokenizer.cc src/interface.cc src/utils.cc src/nn_modules/Fp32CLIPAttention.cc src/nn_modules/Fp32CLIPEncoder.cc src/nn_modules/Fp32CLIPEncoderLayer.cc src/nn_modules/Fp32CLIPVisionTransformer.cc src/nn_modules/Fp32GPTBigCodeAttention.cc src/nn_modules/Fp32GPTBigCodeDecoder.cc src/nn_modules/Fp32GPTBigCodeDecoderLayer.cc src/nn_modules/Fp32GPTBigCodeForCausalLM.cc src/nn_modules/Fp32OPTAttention.cc src/nn_modules/Fp32OPTDecoder.cc src/nn_modules/Fp32OPTDecoderLayer.cc src/nn_modules/Fp32OPTForCausalLM.cc src/nn_modules/Fp32llamaAttention.cc src/nn_modules/Fp32llamaDecoder.cc src/nn_modules/Fp32llamaDecoderLayer.cc src/nn_modules/Fp32llamaForCausalLM.cc src/nn_modules/Int4GPTBigCodeAttention.cc src/nn_modules/Int4GPTBigCodeDecoder.cc src/nn_modules/Int4GPTBigCodeDecoderLayer.cc src/nn_modules/Int4GPTBigCodeForCausalLM.cc src/nn_modules/Int4OPTAttention.cc src/nn_modules/Int4OPTDecoder.cc src/nn_modules/Int4OPTDecoderLayer.cc src/nn_modules/Int4OPTForCausalLM.cc src/nn_modules/Int8OPTAttention.cc src/nn_modules/Int8OPTDecoder.cc src/nn_modules/Int8OPTDecoderLayer.cc src/nn_modules/MistralGenerate.cc src/nn_modules/OPTForCausalLM.cc src/ops/BMM_F32T.cc src/ops/BMM_S8T_S8N_F32T.cc src/ops/BMM_S8T_S8N_S8T.cc src/ops/Conv2D.cc src/ops/Gelu.cc src/ops/LayerNorm.cc src/ops/LayerNormQ.cc src/ops/LlamaRMSNorm.cc src/ops/RotaryPosEmb.cc src/ops/W8A8B8O8Linear.cc src/ops/W8A8B8O8LinearReLU.cc src/ops/W8A8BFP32OFP32Linear.cc src/ops/arg_max.cc src/ops/batch_add.cc src/ops/embedding.cc src/ops/linear.cc src/ops/softmax.cc ../kernels/matmul_imp.cc ../kernels/matmul_int4.cc ../kernels/matmul_int8.cc ../kernels/pthread_pool.cc src/nn_modules/non_cuda/Int4llamaAttention.cc src/nn_modules/non_cuda/Int4llamaDecoder.cc src/nn_modules/non_cuda/Int4llamaDecoderLayer.cc src/nn_modules/non_cuda/Int4llamaForCausalLM.cc src/nn_modules/non_cuda/LLaMA3Generate.cc src/nn_modules/non_cuda/LLaMAGenerate.cc src/nn_modules/non_cuda/LLaVAGenerate.cc ../kernels/ref/matmul_ref_fp32.cc ../kernels/ref/matmul_ref_int4.cc ../kernels/ref/matmul_ref_int8.cc The syntax of the command is incorrect. make: [Makefile:180: build/transformer/src/GPTBigCodeGenerate.o] Error 1 make: Waiting for unfinished jobs.... The syntax of the command is incorrect. make: *** [Makefile:180: build/transformer/src/GPTBigCodeTokenizer.o] Error 1