调用vllm_cli_demo时输入完问答内容返回报错:
GLM-4: python: ../../../lib/Analysis/Allocation.cpp:43: std::pair<llvm::SmallVector, llvm::SmallVector > mlir::triton::getCvtOrder(mlir::Attribute, mlir::Attribute): Assertion `!(srcMmaLayout && dstMmaLayout && !srcMmaLayout.isAmpere()) && "mma -> mma layout conversion is only supported on Ampere"' failed.
SIGABRT received at time=1719559266 on cpu 6
PC: @ 0x7ffbc8ec600b (unknown) raise
@ 0x7ffbc91e3420 (unknown) (unknown)
@ 0x4173257325203a75 (unknown) (unknown)
[2024-06-28 15:21:06,250 E 427092 427943] logging.cc:440: SIGABRT received at time=1719559266 on cpu 6
[2024-06-28 15:21:06,250 E 427092 427943] logging.cc:440: PC: @ 0x7ffbc8ec600b (unknown) raise
[2024-06-28 15:21:06,252 E 427092 427943] logging.cc:440: @ 0x7ffbc91e3420 (unknown) (unknown)
[2024-06-28 15:21:06,254 E 427092 427943] logging.cc:440: @ 0x4173257325203a75 (unknown) (unknown)
Fatal Python error: Aborted
Stack (most recent call first):
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/backends/cuda.py", line 173 in make_llir
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/backends/cuda.py", line 199 in
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/compiler.py", line 193 in compile
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/runtime/jit.py", line 416 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/runtime/jit.py", line 167 in
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/ops/prefix_prefill.py", line 757 in context_attention_fwd
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/ops/paged_attn.py", line 200 in forward_prefix
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/backends/xformers.py", line 305 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/layer.py", line 89 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 106 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 207 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 272 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 316 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 364 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 749 in execute_model
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/worker.py", line 280 in execute_model
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 140 in execute_method
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/concurrent/futures/thread.py", line 58 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/concurrent/futures/thread.py", line 83 in _worker
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 953 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 1016 in _bootstrap_inner
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 973 in _bootstrap
System Info / 系統信息
python=3.10 accelerate 0.31.0 aiofiles 23.2.1 aiohttp 3.9.5 aiosignal 1.3.1 altair 5.3.0 annotated-types 0.7.0 anyio 4.4.0 async-timeout 4.0.3 attrs 23.2.0 bitsandbytes 0.43.1 Brotli 1.0.9 certifi 2024.6.2 charset-normalizer 2.0.4 click 8.1.7 cloudpickle 3.0.0 cmake 3.29.6 contourpy 1.2.1 cycler 0.12.1 datasets 2.20.0 dill 0.3.8 diskcache 5.6.3 distro 1.9.0 dnspython 2.6.1 einops 0.8.0 email_validator 2.2.0 exceptiongroup 1.2.1 fastapi 0.111.0 fastapi-cli 0.0.4 ffmpy 0.3.2 filelock 3.13.1 fonttools 4.53.0 frozenlist 1.4.1 fsspec 2024.5.0 gmpy2 2.1.2 gradio 4.37.1 gradio_client 1.0.2 h11 0.14.0 httpcore 1.0.5 httptools 0.6.1 httpx 0.27.0 huggingface-hub 0.23.4 idna 3.7 importlib_resources 6.4.0 interegular 0.3.3 Jinja2 3.1.4 joblib 1.4.2 jsonschema 4.22.0 jsonschema-specifications 2023.12.1 kiwisolver 1.4.5 lark 1.1.9 llvmlite 0.43.0 lm-format-enforcer 0.10.1 markdown-it-py 3.0.0 MarkupSafe 2.1.3 matplotlib 3.9.0 mdurl 0.1.2 mkl-fft 1.3.8 mkl-random 1.2.4 mkl-service 2.4.0 mpmath 1.3.0 msgpack 1.0.8 multidict 6.0.5 multiprocess 0.70.16 nest-asyncio 1.6.0 networkx 3.2.1 ninja 1.11.1.1 numba 0.60.0 numpy 1.26.4 nvidia-ml-py 12.555.43 openai 1.35.7 orjson 3.10.5 outlines 0.0.46 packaging 24.1 pandas 2.2.2 Pillow 9.3.0 pip 24.0 prometheus_client 0.20.0 prometheus-fastapi-instrumentator 7.0.0 protobuf 5.27.2 psutil 6.0.0 py-cpuinfo 9.0.0 pyairports 2.1.1 pyarrow 16.1.0 pyarrow-hotfix 0.6 pycountry 24.6.1 pydantic 2.7.4 pydantic_core 2.18.4 pydub 0.25.1 Pygments 2.18.0 pyparsing 3.1.2 PySocks 1.7.1 python-dateutil 2.9.0.post0 python-dotenv 1.0.1 python-multipart 0.0.9 pytz 2024.1 PyYAML 6.0.1 ray 2.31.0 referencing 0.35.1 regex 2024.5.15 requests 2.32.2 rich 13.7.1 rpds-py 0.18.1 ruff 0.5.0 safetensors 0.4.3 scikit-learn 1.5.0 scipy 1.14.0 semantic-version 2.10.0 sentence-transformers 3.0.1 sentencepiece 0.2.0 setuptools 69.5.1 shellingham 1.5.4 six 1.16.0 sniffio 1.3.1 sse-starlette 2.1.2 starlette 0.37.2 sympy 1.12 threadpoolctl 3.5.0 tiktoken 0.7.0 timm 1.0.7 tokenizers 0.19.1 tomlkit 0.12.0 toolz 0.12.1 torch 2.3.0 torchaudio 2.3.0 torchvision 0.18.0 tqdm 4.66.4 transformers 4.40.0 triton 2.3.0 typer 0.12.3 typing_extensions 4.11.0 tzdata 2024.1 ujson 5.10.0 urllib3 2.2.2 uvicorn 0.30.1 uvloop 0.19.0 vllm 0.5.0.post1 vllm-flash-attn 2.5.9 watchfiles 0.22.0 websockets 11.0.3 wheel 0.43.0 xformers 0.0.26.post1 xxhash 3.4.1 yarl 1.9.4
Who can help? / 谁可以帮助到您?
No response
Information / 问题信息
Reproduction / 复现过程
调用vllm_cli_demo时输入完问答内容返回报错: GLM-4: python: ../../../lib/Analysis/Allocation.cpp:43: std::pair<llvm::SmallVector, llvm::SmallVector > mlir::triton::getCvtOrder(mlir::Attribute, mlir::Attribute): Assertion `!(srcMmaLayout && dstMmaLayout && !srcMmaLayout.isAmpere()) && "mma -> mma layout conversion is only supported on Ampere"' failed.
SIGABRT received at time=1719559266 on cpu 6
PC: @ 0x7ffbc8ec600b (unknown) raise
@ 0x7ffbc91e3420 (unknown) (unknown)
@ 0x4173257325203a75 (unknown) (unknown)
[2024-06-28 15:21:06,250 E 427092 427943] logging.cc:440: SIGABRT received at time=1719559266 on cpu 6
[2024-06-28 15:21:06,250 E 427092 427943] logging.cc:440: PC: @ 0x7ffbc8ec600b (unknown) raise
[2024-06-28 15:21:06,252 E 427092 427943] logging.cc:440: @ 0x7ffbc91e3420 (unknown) (unknown)
[2024-06-28 15:21:06,254 E 427092 427943] logging.cc:440: @ 0x4173257325203a75 (unknown) (unknown)
Fatal Python error: Aborted
Stack (most recent call first): File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/backends/cuda.py", line 173 in make_llir File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/backends/cuda.py", line 199 in
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/compiler/compiler.py", line 193 in compile
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/runtime/jit.py", line 416 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/triton/runtime/jit.py", line 167 in
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/ops/prefix_prefill.py", line 757 in context_attention_fwd
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/ops/paged_attn.py", line 200 in forward_prefix
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/backends/xformers.py", line 305 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/attention/layer.py", line 89 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 106 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 207 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 272 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 316 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/model_executor/models/chatglm.py", line 364 in forward
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1541 in _call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1532 in _wrapped_call_impl
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/model_runner.py", line 749 in execute_model
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/worker.py", line 280 in execute_model
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115 in decorate_context
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/site-packages/vllm/worker/worker_base.py", line 140 in execute_method
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/concurrent/futures/thread.py", line 58 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/concurrent/futures/thread.py", line 83 in _worker
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 953 in run
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 1016 in _bootstrap_inner
File "/home/ubuntu/anaconda3/envs/ceshi1/lib/python3.10/threading.py", line 973 in _bootstrap
Extension modules: _brotli, yaml._yaml, mkl._mklinit, mkl._py_mkl_service, numpy.core._multiarray_umath, numpy.core._multiarray_tests, numpy.linalg._umath_linalg, numpy.fft._pocketfft_internal, numpy.random._common, numpy.random.bit_generator, numpy.random._bounded_integers, numpy.random._mt19937, numpy.random.mtrand, numpy.random._philox, numpy.random._pcg64, numpy.random._sfc64, numpy.random._generator, torch._C, torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._nn, torch._C._sparse, torch._C._special, sentencepiece._sentencepiece, psutil._psutil_linux, psutil._psutil_posix, msgpack._cmsgpack, google._upb._message, setproctitle, uvloop.loop, ray._raylet, regex._regex, pyarrow.lib, pyarrow._json, PIL._imaging, scipy._lib._ccallback_c, scipy.linalg._fblas, scipy.linalg._flapack, scipy.linalg.cython_lapack, scipy.linalg._cythonized_array_utils, scipy.linalg._solve_toeplitz, scipy.linalg._decomp_lu_cython, scipy.linalg._matfuncs_sqrtm_triu, scipy.linalg.cython_blas, scipy.linalg._matfuncs_expm, scipy.linalg._decomp_update, scipy.sparse._sparsetools, _csparsetools, scipy.sparse._csparsetools, scipy.sparse.linalg._dsolve._superlu, scipy.sparse.linalg._eigen.arpack._arpack, scipy.sparse.linalg._propack._spropack, scipy.sparse.linalg._propack._dpropack, scipy.sparse.linalg._propack._cpropack, scipy.sparse.linalg._propack._zpropack, scipy.sparse.csgraph._tools, scipy.sparse.csgraph._shortest_path, scipy.sparse.csgraph._traversal, scipy.sparse.csgraph._min_spanning_tree, scipy.sparse.csgraph._flow, scipy.sparse.csgraph._matching, scipy.sparse.csgraph._reordering, scipy.optimize._group_columns, scipy._lib.messagestream, scipy.optimize._trlib._trlib, scipy.optimize._lbfgsb, _moduleTNC, scipy.optimize._moduleTNC, scipy.optimize._cobyla, scipy.optimize._slsqp, scipy.optimize._minpack, scipy.optimize._lsq.givens_elimination, scipy.optimize._zeros, scipy.optimize._highs.cython.src._highs_wrapper, scipy.optimize._highs._highs_wrapper, scipy.optimize._highs.cython.src._highs_constants, scipy.optimize._highs._highs_constants, scipy.linalg._interpolative, scipy.optimize._bglu_dense, scipy.optimize._lsap, scipy.spatial._ckdtree, scipy.spatial._qhull, scipy.spatial._voronoi, scipy.spatial._distance_wrap, scipy.spatial._hausdorff, scipy.special._ufuncs_cxx, scipy.special._ufuncs, scipy.special._specfun, scipy.special._comb, scipy.special._ellip_harm_2, scipy.spatial.transform._rotation, scipy.optimize._direct, gmpy2.gmpy2, cuda_utils (total: 95) Aborted (core dumped
Expected behavior / 期待表现
这部分不知道该怎么解决