Traceback (most recent call last):
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/routes.py", line 412, in run_predict
output = await app.get_blocks().process_api(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/blocks.py", line 1299, in process_api
result = await self.call_function(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/blocks.py", line 1035, in call_function
prediction = await anyio.to_thread.run_sync(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync
return await get_asynclib().run_sync_in_worker_thread(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread
return await future
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 867, in run
result = context.run(func, args)
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/utils.py", line 491, in async_iteration
return next(iterator)
File "/home/xjsysopr/software/server/langchain-ChatGLM/webui.py", line 37, in get_answer
for resp, history in local_doc_qa.get_knowledge_based_answer(
File "/home/xjsysopr/software/server/langchain-ChatGLM/chains/local_doc_qa.py", line 262, in get_knowledge_based_answer
for result, history in self.llm._call(prompt=prompt,
File "/home/xjsysopr/software/server/langchain-ChatGLM/models/chatglm_llm.py", line 72, in _call
for inum, (streamresp, ) in enumerate(self.model.stream_chat(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1311, in stream_chat
for outputs in self.stream_generate(inputs, gen_kwargs):
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 35, in generator_context
response = gen.send(None)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1388, in stream_generate
outputs = self(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1190, in forward
transformer_outputs = self.transformer(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 996, in forward
layer_ret = layer(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 627, in forward
attention_outputs = self.attention(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 445, in forward
mixed_raw_layer = self.query_key_value(hidden_states)
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl
return forward_call(*args, *kwargs)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 391, in forward
output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width)
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply
return super().apply(args, kwargs) # type: ignore[misc]
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 56, in forward
weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width)
File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 286, in extract_weight_to_half
func(
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 48, in call
func = self._prepare_func()
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 36, in _prepare_func
curr_device = cudart.cudaGetDevice()
File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/library/base.py", line 72, in wrapper
raise RuntimeError("Library %s is not initialized" % self.__name)
RuntimeError: Library cudart is not initialized
配置文件model_config.py为:
import torch.cuda import torch.backends import os import logging import uuid
LOG_FORMAT = "%(levelname) -5s %(asctime)s" "-1d: %(message)s" logger = logging.getLogger() logger.setLevel(logging.INFO) logging.basicConfig(format=LOG_FORMAT)
embedding_model_dict = { "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", "ernie-base": "nghuyong/ernie-3.0-base-zh", "text2vec-base": "shibing624/text2vec-base-chinese", "text2vec": "GanymedeNil/text2vec-large-chinese", }
Embedding model name
EMBEDDING_MODEL = "text2vec"
Embedding running device
EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
EMBEDDING_DEVICE = "cuda"
supported LLM models
llm_model_dict = { "chatyuan": "ClueAI/ChatYuan-large-v2", "chatglm-6b-int4-qe": "THUDM/chatglm-6b-int4-qe", "chatglm-6b-int4": "THUDM/chatglm-6b-int4", "chatglm-6b-int8": "THUDM/chatglm-6b-int8", "chatglm-6b": "THUDM/chatglm-6b", "moss": "fnlp/moss-moon-003-sft", }
LLM model name
LLM_MODEL = "chatglm-6b-int4"
LLM lora path,默认为空,如果有请直接指定文件夹路径
LLM_LORA_PATH = "" USE_LORA = True if LLM_LORA_PATH else False
LLM streaming reponse
STREAMING = True
Use p-tuning-v2 PrefixEncoder
USE_PTUNING_V2 = False
LLM running device
LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
LLM_DEVICE = "cuda"
MOSS load in 8bit
LOAD_IN_8BIT = True
VS_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(file)), "vector_store")
UPLOAD_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(file)), "content")
基于上下文的prompt模版,请务必保留"{question}"和"{context}"
PROMPT_TEMPLATE = """已知信息: {context}
根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
文本分句长度
SENTENCE_SIZE = 100
匹配后单段上下文长度
CHUNK_SIZE = 250
LLM input history length
LLM_HISTORY_LEN = 1
return top-k text chunk from vector store
VECTOR_SEARCH_TOP_K = 1
知识检索内容相关度 Score, 数值范围约为0-1100,如果为0,则不生效,经测试设置为小于500时,匹配结果更精准
VECTOR_SEARCH_SCORE_THRESHOLD = 0
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(file)), "nltk_data")
FLAG_USER_NAME = uuid.uuid4().hex
logger.info(f""" loading model config llm device: {LLM_DEVICE} embedding device: {EMBEDDING_DEVICE} dir: {os.path.dirname(os.path.dirname(file))} flagging username: {FLAG_USER_NAME} """)
是否开启跨域,默认为False,如果需要开启,请设置为True
is open cross domain
OPEN_CROSS_DOMAIN = False
执行: python webui.py
打开http://0.0.0.0:7860/ 选中知识库问答并添加新的知识库,开启问答报错如下:(使用nvidia-smi查看gpu使用情况,webui.py正常占用gpu资源)
Traceback (most recent call last): File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/routes.py", line 412, in run_predict output = await app.get_blocks().process_api( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/blocks.py", line 1299, in process_api result = await self.call_function( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/blocks.py", line 1035, in call_function prediction = await anyio.to_thread.run_sync( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/to_thread.py", line 31, in run_sync return await get_asynclib().run_sync_in_worker_thread( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 937, in run_sync_in_worker_thread return await future File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 867, in run result = context.run(func, args) File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/gradio/utils.py", line 491, in async_iteration return next(iterator) File "/home/xjsysopr/software/server/langchain-ChatGLM/webui.py", line 37, in get_answer for resp, history in local_doc_qa.get_knowledge_based_answer( File "/home/xjsysopr/software/server/langchain-ChatGLM/chains/local_doc_qa.py", line 262, in get_knowledge_based_answer for result, history in self.llm._call(prompt=prompt, File "/home/xjsysopr/software/server/langchain-ChatGLM/models/chatglm_llm.py", line 72, in _call for inum, (streamresp, ) in enumerate(self.model.stream_chat( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 35, in generator_context response = gen.send(None) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1311, in stream_chat for outputs in self.stream_generate(inputs, gen_kwargs): File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 35, in generator_context response = gen.send(None) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1388, in stream_generate outputs = self( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 1190, in forward transformer_outputs = self.transformer( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 996, in forward layer_ret = layer( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(args, kwargs) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 627, in forward attention_outputs = self.attention( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, kwargs) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/modeling_chatglm.py", line 445, in forward mixed_raw_layer = self.query_key_value(hidden_states) File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1501, in _call_impl return forward_call(*args, *kwargs) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 391, in forward output = W8A16Linear.apply(input, self.weight, self.weight_scale, self.weight_bit_width) File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/torch/autograd/function.py", line 506, in apply return super().apply(args, kwargs) # type: ignore[misc] File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 56, in forward weight = extract_weight_to_half(quant_w, scale_w, weight_bit_width) File "/root/.cache/huggingface/modules/transformers_modules/THUDM/chatglm-6b-int4/02a065cf2797029c036a02cac30f1da1a9bc49a3/quantization.py", line 286, in extract_weight_to_half func( File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 48, in call func = self._prepare_func() File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/kernels/base.py", line 36, in _prepare_func curr_device = cudart.cudaGetDevice() File "/home/xjsysopr/software/server/miniconda3/envs/pytorch_env/lib/python3.10/site-packages/cpm_kernels/library/base.py", line 72, in wrapper raise RuntimeError("Library %s is not initialized" % self.__name) RuntimeError: Library cudart is not initialized