Open Evanlovea opened 9 months ago
I run it in cuda driver 11.6 and cudnn 8.6, and the system is ubuntu 20.04.
maybe you should upgrade your gcc version.
check if libcudart.so
exists in /usr/local/cuda/lib64
then
export LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
I have build a github actions to auto build wheels you can use.
@Evanlovea how did you get it to work? I am using an ubuntu VM with A100 GPU
Detected CUDA files, patching ldflags Emitting ninja build file /home/zzg-cx/.cache/torch_extensions/py38_cu116/inference_core_ops/build.ninja... Building extension module inference_core_ops... Allowing ninja to set a default number of workers... (overridable by setting the environment variable MAX_JOBS=N) [1/1] c++ core_ops.o bias_activation.o bias_activation.cuda.o layer_norm.o layer_norm.cuda.o rms_norm.o rms_norm.cuda.o gated_activation_kernels.o gated_activation_kernels.cuda.o -shared -L/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/home/zzg-cx/anaconda3/envs/vllms/lib64 -lcudart -o inference_core_ops.so FAILED: inference_core_ops.so c++ core_ops.o bias_activation.o bias_activation.cuda.o layer_norm.o layer_norm.cuda.o rms_norm.o rms_norm.cuda.o gated_activation_kernels.o gated_activation_kernels.cuda.o -shared -L/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/lib -lc10 -lc10_cuda -ltorch_cpu -ltorch_cuda_cu -ltorch_cuda_cpp -ltorch -ltorch_python -L/home/zzg-cx/anaconda3/envs/vllms/lib64 -lcudart -o inference_core_ops.so /usr/bin/ld: cannot find -lcudart: No such file or directory collect2: error: ld returned 1 exit status ninja: build stopped: subcommand failed. Traceback (most recent call last): File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1900, in _run_ninja_build subprocess.run( File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/subprocess.py", line 516, in run raise CalledProcessError(retcode, process.args, subprocess.CalledProcessError: Command '['ninja', '-v']' returned non-zero exit status 1.
The above exception was the direct cause of the following exception:
Traceback (most recent call last): File "vllm_test.py", line 106, in
pipe = mii.pipeline("/opt/zzg-cx/code/LLM_QA/LLM_models/QWen-14B-Chat")
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/mii/api.py", line 156, in pipeline
inference_engine = load_model(model_config)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/mii/modeling/models.py", line 17, in load_model
inference_engine = build_hf_engine(
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/engine_factory.py", line 127, in build_hf_engine
return InferenceEngineV2(policy, engine_config)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/engine_v2.py", line 83, in init
self._model = self._policy.build_model(self._config, self._base_mp_group)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/model_implementations/inference_policy_base.py", line 156, in build_model
self.model = self.instantiate_model(engine_config, mp_group)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/model_implementations/qwen/policy.py", line 17, in instantiate_model
return QwenInferenceModel(config=self._model_config, engine_config=engine_config, base_mp_group=mp_group)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/model_implementations/inference_transformer_base.py", line 215, in init
self.make_norm_layer()
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/model_implementations/qwen/model.py", line 124, in make_norm_layer
self.norm = heuristics.instantiate_pre_norm(norm_config, self._engine_config)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/modules/heuristics.py", line 160, in instantiate_pre_norm
return DSPreNormRegistry.instantiate_config(config)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/modules/module_registry.py", line 36, in instantiate_config
if not target_implementation.supports_config(config_bundle.config):
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/modules/implementations/pre_norm/cuda_pre_rms.py", line 36, in supportsconfig
= CUDARMSPreNorm(config.channels, config.residual_dtype)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/inference/v2/kernels/core_ops/cuda_rms_norm/rms_norm_base.py", line 36, in init
self.inf_module = InferenceCoreBuilder().load()
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 478, in load
return self.jit_load(verbose)
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/deepspeed/ops/op_builder/builder.py", line 522, in jit_load
op_module = load(name=self.name,
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1284, in load
return _jit_compile(
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1508, in _jit_compile
_write_ninja_file_and_build_library(
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1623, in _write_ninja_file_and_build_library
_run_ninja_build(
File "/home/zzg-cx/anaconda3/envs/vllms/lib/python3.8/site-packages/torch/utils/cpp_extension.py", line 1916, in _run_ninja_build
raise RuntimeError(message) from e
RuntimeError: Error building extension 'inference_core_ops'