source /opt/intel/oneapi/setvars.sh
source ../3rdparty/oneccl/build/_install/env/setvars.sh
export CC=icx
export CXX=icpx
mkdir build && cd build
cmake .. -DWITH_GPU=ON
make -j
OMP_NUM_THREADS=20 mpirun -n 1 -env XFT_ENGINE=GPU:0 numactl -N 0 -m 0 ./example \
--model /home/xfast/models/llama-2-7b-chat-xft/ \
--token /home/xfast/models/llama-2-7b-chat-hf/tokenizer.model \
--dtype fp16 \
--loop 3 \
--no_stream \
--input_len 18 \
--output_len 8
[INFO] First token time: 69.575 ms
[INFO] Second token time: 61.2469 ms
[INFO] Final output is:
==============================================
Once upon a time, there existed a little girl who liked to have adventures. She lived in a small village surrounded by
How to build and test w/ single Intel GPU