punica-ai / punica

Serving multiple LoRA finetuned LLM as one
https://arxiv.org/abs/2310.18547
Apache License 2.0
883 stars 40 forks source link

pip install -v --no-build-isolation . is giving errors #13

Closed luciferlinx101 closed 7 months ago

luciferlinx101 commented 7 months ago
          instantiation of "void flashinfer::vec_t<float, vec_size>::cast_load(const T *) [with vec_size=8UL, T=nv_bfloat16]" at line 492 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/../flashinfer/decode.cuh
          instantiation of "void flashinfer::BatchDecodeWithPagedKVCacheKernel<cooperative,rotary_mode,norm_on_the_fly,num_stages_smem,vec_size,bdx,bdy,bdz,DTypeIn,DTypeOut,IdType>(DTypeIn *, flashinfer::paged_kv_t<DTypeIn, IdType>, DTypeOut *, float *, float, float, float) [with cooperative=true, rotary_mode=flashinfer::RotaryMode::kNone, norm_on_the_fly=false, num_stages_smem=2U, vec_size=8U, bdx=8U, bdy=1U, bdz=16U, DTypeIn=nv_bfloat16, DTypeOut=nv_bfloat16, IdType=int32_t]" at line 1058 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/../flashinfer/decode.cuh
          instantiation of "cudaError_t flashinfer::BatchDecodeWithPagedKVCache(DTypeIn *, flashinfer::paged_kv_t<DTypeIn, IdType>, DTypeOut *, float *, uint32_t, flashinfer::RotaryMode, float, float, cudaStream_t, uint32_t) [with DTypeIn=nv_bfloat16, DTypeOut=nv_bfloat16, IdType=int32_t]" at line 20 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/flashinfer_all.cu
          instantiation of "void FlashInferBatchDecodeKernel(T *, T *, T *, int32_t *, int32_t *, int32_t *, int, int, int, int, int, int, int) [with T=nv_bfloat16]" at line 71 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/flashinfer_all.cu

/home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/../flashinfer/vec_dtypes.cuh(1287): error: identifier "__float22bfloat162_rn" is undefined __float22bfloat162_rn(((float2)(&src.data))[i]); ^ detected during: instantiation of "void flashinfer::vec_t<nv_bfloat16, vec_size>::cast_from(const flashinfer::vec_t<T, vec_size> &) [with vec_size=8UL, T=float]" at line 78 instantiation of "void flashinfer::cast_store_impl(tgt_float_t , const flashinfer::vec_t<src_float_t, vec_size> &) [with src_float_t=float, tgt_float_t=nv_bfloat16, vec_size=8UL]" at line 1184 instantiation of "void flashinfer::vec_t<float, vec_size>::cast_store(T ) const [with vec_size=8UL, T=nv_bfloat16]" at line 648 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/../flashinfer/decode.cuh instantiation of "void flashinfer::BatchDecodeWithPagedKVCacheKernel<cooperative,rotary_mode,norm_on_the_fly,num_stages_smem,vec_size,bdx,bdy,bdz,DTypeIn,DTypeOut,IdType>(DTypeIn , flashinfer::paged_kv_t<DTypeIn, IdType>, DTypeOut , float , float, float, float) [with cooperative=true, rotary_mode=flashinfer::RotaryMode::kNone, norm_on_the_fly=false, num_stages_smem=2U, vec_size=8U, bdx=8U, bdy=1U, bdz=16U, DTypeIn=nv_bfloat16, DTypeOut=nv_bfloat16, IdType=int32_t]" at line 1058 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/../flashinfer/decode.cuh instantiation of "cudaError_t flashinfer::BatchDecodeWithPagedKVCache(DTypeIn , flashinfer::paged_kv_t<DTypeIn, IdType>, DTypeOut , float , uint32_t, flashinfer::RotaryMode, float, float, cudaStream_t, uint32_t) [with DTypeIn=nv_bfloat16, DTypeOut=nv_bfloat16, IdType=int32_t]" at line 20 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/flashinfer_all.cu instantiation of "void FlashInferBatchDecodeKernel(T , T , T , int32_t , int32_t , int32_t *, int, int, int, int, int, int, int) [with T=nv_bfloat16]" at line 71 of /home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/flashinfer_all.cu

4 errors detected in the compilation of "/home/ubuntu/multi-tenant-test/punica/csrc/flashinfer_adapter/flashinfer_all.cu". ninja: build stopped: subcommand failed. Traceback (most recent call last): File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 2100, in _run_ninja_build subprocess.run( File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/subprocess.py", line 516, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.

The above exception was the direct cause of the following exception:

Traceback (most recent call last): File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 353, in main() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 335, in main json_out['return_val'] = hook(hook_input['kwargs']) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py", line 251, in build_wheel return _build_backend().build_wheel(wheel_directory, config_settings, File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/build_meta.py", line 416, in build_wheel return self._build_with_temp_dir(['bdist_wheel'], '.whl', File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/build_meta.py", line 401, in _build_with_temp_dir self.run_setup() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/build_meta.py", line 338, in run_setup exec(code, locals()) File "", line 51, in File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/init.py", line 107, in setup return distutils.core.setup(attrs) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/core.py", line 185, in setup return run_commands(dist) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/core.py", line 201, in run_commands dist.run_commands() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/dist.py", line 969, in run_commands self.run_command(cmd) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/dist.py", line 1234, in run_command super().run_command(command) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/dist.py", line 988, in run_command cmd_obj.run() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/wheel/bdist_wheel.py", line 364, in run self.run_command("build") File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/cmd.py", line 318, in run_command self.distribution.run_command(command) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/dist.py", line 1234, in run_command super().run_command(command) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/dist.py", line 988, in run_command cmd_obj.run() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/command/build.py", line 131, in run self.run_command(cmd_name) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/cmd.py", line 318, in run_command self.distribution.run_command(command) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/dist.py", line 1234, in run_command super().run_command(command) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/dist.py", line 988, in run_command cmd_obj.run() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 84, in run _build_ext.run(self) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/command/build_ext.py", line 345, in run self.build_extensions() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 873, in build_extensions build_ext.build_extensions(self) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/command/build_ext.py", line 467, in build_extensions self._build_extensions_serial() File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/command/build_ext.py", line 493, in _build_extensions_serial self.build_extension(ext) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/command/build_ext.py", line 246, in build_extension _build_ext.build_extension(self, ext) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/setuptools/_distutils/command/build_ext.py", line 548, in build_extension objects = self.compiler.compile( File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 686, in unix_wrap_ninja_compile _write_ninja_file_and_compile_objects( File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1774, in _write_ninja_file_and_compile_objects _run_ninja_build( File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 2116, in _run_ninja_build raise RuntimeError(message) from e RuntimeError: Error compiling objects for extension error: subprocess-exited-with-error

× Building wheel for punica (pyproject.toml) did not run successfully. │ exit code: 1 ╰─> See above for output.

note: This error originates from a subprocess, and is likely not a problem with pip. full command: /home/ubuntu/miniconda3/envs/muti-tenant-test-1/bin/python /home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/site-packages/pip/_vendor/pyproject_hooks/_in_process/_in_process.py build_wheel /tmp/tmpxzh48d2c cwd: /home/ubuntu/multi-tenant-test/punica Building wheel for punica (pyproject.toml) ... error ERROR: Failed building wheel for punica Failed to build punica ERROR: Could not build wheels for punica, which is required to install pyproject.toml-based projects

abcdabcd987 commented 7 months ago

Looks like you are using a GPU that's older than sm_80. Can you tell me which GPU you are using?

BTW, I notice that you are using Python 3.8. I believe that the current code won't run on Python 3.8 because of the type annotation. I'd recommend you to install Python 3.10 using mamba.

luciferlinx101 commented 7 months ago

I am using NVIDIA A10 G. I will update python version and check too.

luciferlinx101 commented 7 months ago

Also when I used env TORCH_CUDA_ARCH_LIST="8.0" pip install -v --no-build-isolation .

I was able to install successfully but on running

python -m punica.utils.convert_lora_weight model/gsm8k-r16/adapter_model.bin model/gsm8k-r16.punica.pt I got following Traceback (most recent call last): File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/runpy.py", line 185, in _run_module_as_main mod_name, mod_spec, code = _get_module_details(mod_name, _Error) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/runpy.py", line 111, in _get_module_details import(pkg_name) File "/home/ubuntu/multi-tenant-test/punica/punica/init.py", line 1, in import punica.models File "/home/ubuntu/multi-tenant-test/punica/punica/models/init.py", line 1, in import punica.models.llama File "/home/ubuntu/multi-tenant-test/punica/punica/models/llama.py", line 16, in from punica.ops import append_kv, init_kv, batch_decode, rms_norm File "/home/ubuntu/multi-tenant-test/punica/punica/ops/init.py", line 3, in import punica.ops._kernels as _kernels ModuleNotFoundError: No module named 'punica.ops._kernels'

luciferlinx101 commented 7 months ago

I am able to install now but even with python 3.10 I am getting the same error

Also when I used env TORCH_CUDA_ARCH_LIST="8.0" pip install -v --no-build-isolation .

I was able to install successfully but on running

python -m punica.utils.convert_lora_weight model/gsm8k-r16/adapter_model.bin model/gsm8k-r16.punica.pt I got following Traceback (most recent call last): File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/runpy.py", line 185, in _run_module_as_main mod_name, mod_spec, code = _get_module_details(mod_name, _Error) File "/home/ubuntu/miniconda3/envs/muti-tenant-test-1/lib/python3.8/runpy.py", line 111, in _get_module_details import(pkg_name) File "/home/ubuntu/multi-tenant-test/punica/punica/init.py", line 1, in import punica.models File "/home/ubuntu/multi-tenant-test/punica/punica/models/init.py", line 1, in import punica.models.llama File "/home/ubuntu/multi-tenant-test/punica/punica/models/llama.py", line 16, in from punica.ops import append_kv, init_kv, batch_decode, rms_norm File "/home/ubuntu/multi-tenant-test/punica/punica/ops/init.py", line 3, in import punica.ops._kernels as _kernels ModuleNotFoundError: No module named 'punica.ops._kernels'

abcdabcd987 commented 7 months ago

12