Closed RemKeeper closed 1 year ago
Model Config
import torch.cuda import torch.backends import os import logging import uuid
LOG_FORMAT = "%(levelname) -5s %(asctime)s" "-1d: %(message)s" logger = logging.getLogger() logger.setLevel(logging.INFO) logging.basicConfig(format=LOG_FORMAT)
embedding_model_dict = { "ernie-tiny": "nghuyong/ernie-3.0-nano-zh", "ernie-base": "nghuyong/ernie-3.0-base-zh", "text2vec-base": "shibing624/text2vec-base-chinese", "text2vec": "GanymedeNil/text2vec-large-chinese", "m3e-small": "moka-ai/m3e-small", "m3e-base": "moka-ai/m3e-base", }
EMBEDDING_MODEL = "text2vec"
EMBEDDING_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
llm_model_dict = { "chatglm-6b-int4-qe": { "name": "chatglm-6b-int4-qe", "pretrained_model_name": "THUDM/chatglm-6b-int4-qe", "local_model_path": None, "provides": "ChatGLM" }, "chatglm-6b-int4": { "name": "chatglm-6b-int4", "pretrained_model_name": "ChatGLM_6b_int4\chatglm-6b-int4", "local_model_path": None, "provides": "ChatGLM" }, "chatglm-6b-int8": { "name": "chatglm-6b-int8", "pretrained_model_name": "THUDM/chatglm-6b-int8", "local_model_path": None, "provides": "ChatGLM" }, "chatglm-6b": { "name": "chatglm-6b", "pretrained_model_name": "THUDM/chatglm-6b", "local_model_path": None, "provides": "ChatGLM" }, "chatglm2-6b": { "name": "chatglm2-6b", "pretrained_model_name": "THUDM/chatglm2-6b", "local_model_path": None, "provides": "ChatGLM" }, "chatglm2-6b-int4": { "name": "chatglm2-6b-int4", "pretrained_model_name": "THUDM/chatglm2-6b-int4", "local_model_path": None, "provides": "ChatGLM" }, "chatglm2-6b-int8": { "name": "chatglm2-6b-int8", "pretrained_model_name": "THUDM/chatglm2-6b-int8", "local_model_path": None, "provides": "ChatGLM" }, "chatyuan": { "name": "chatyuan", "pretrained_model_name": "ClueAI/ChatYuan-large-v2", "local_model_path": None, "provides": None }, "moss": { "name": "moss", "pretrained_model_name": "fnlp/moss-moon-003-sft", "local_model_path": None, "provides": "MOSSLLM" }, "vicuna-13b-hf": { "name": "vicuna-13b-hf", "pretrained_model_name": "vicuna-13b-hf", "local_model_path": None, "provides": "LLamaLLM" },
# 通过 fastchat 调用的模型请参考如下格式
"fastchat-chatglm-6b": {
"name": "chatglm-6b", # "name"修改为fastchat服务中的"model_name"
"pretrained_model_name": "chatglm-6b",
"local_model_path": None,
"provides": "FastChatOpenAILLM", # 使用fastchat api时,需保证"provides"为"FastChatOpenAILLM"
"api_base_url": "http://localhost:8000/v1" # "name"修改为fastchat服务中的"api_base_url"
},
"fastchat-chatglm2-6b": {
"name": "chatglm2-6b", # "name"修改为fastchat服务中的"model_name"
"pretrained_model_name": "chatglm2-6b",
"local_model_path": None,
"provides": "FastChatOpenAILLM", # 使用fastchat api时,需保证"provides"为"FastChatOpenAILLM"
"api_base_url": "http://localhost:8000/v1" # "name"修改为fastchat服务中的"api_base_url"
},
# 通过 fastchat 调用的模型请参考如下格式
"fastchat-vicuna-13b-hf": {
"name": "vicuna-13b-hf", # "name"修改为fastchat服务中的"model_name"
"pretrained_model_name": "vicuna-13b-hf",
"local_model_path": None,
"provides": "FastChatOpenAILLM", # 使用fastchat api时,需保证"provides"为"FastChatOpenAILLM"
"api_base_url": "http://localhost:8000/v1" # "name"修改为fastchat服务中的"api_base_url"
},
}
LLM_MODEL = "chatglm-6b-int4"
LOAD_IN_8BIT = False
BF16 = False
LORA_DIR = "loras/"
LLM_LORA_PATH = "" USE_LORA = True if LLM_LORA_PATH else False
STREAMING = True
USE_PTUNING_V2 = False
LLM_DEVICE = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
KB_ROOT_PATH = os.path.join(os.path.dirname(os.path.dirname(file)), "knowledge_base")
PROMPT_TEMPLATE = """已知信息: {context}
根据上述已知信息,简洁和专业的来回答用户的问题。如果无法从中得到答案,请说 “根据已知信息无法回答该问题” 或 “没有提供足够的相关信息”,不允许在答案中添加编造成分,答案请使用中文。 问题是:{question}"""
CACHED_VS_NUM = 1
SENTENCE_SIZE = 100
CHUNK_SIZE = 250
LLM_HISTORY_LEN = 3
VECTOR_SEARCH_TOP_K = 5
VECTOR_SEARCH_SCORE_THRESHOLD = 0
NLTK_DATA_PATH = os.path.join(os.path.dirname(os.path.dirname(file)), "nltk_data")
FLAG_USER_NAME = uuid.uuid4().hex
logger.info(f""" loading model config llm device: {LLM_DEVICE} embedding device: {EMBEDDING_DEVICE} dir: {os.path.dirname(os.path.dirname(file))} flagging username: {FLAG_USER_NAME} """)
OPEN_CROSS_DOMAIN = False
BING_SEARCH_URL = "https://api.bing.microsoft.com/v7.0/search"
BING_SUBSCRIPTION_KEY = ""
ZH_TITLE_ENHANCE = False
问题描述 / Problem Description 尝试加载chatglm-6b-int4模型失败 提示不要使用cpu加载量化模型 请问我应该如何指定使用GPU加载
复现问题的步骤 / Steps to Reproduce 使用python ./webui.py 运行项目
预期的结果 / Expected Result 成功加载
实际结果 / Actual Result 无法加载,且通过webui加载报ERROR
环境信息 / Environment Information
附加信息 / Additional Information Loading ChatGLM_6b_int4\chatglm-6b-int4... No compiled kernel found. Compiling kernels : C:\Users\jacki.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization_kernels_parallel.c Compiling gcc -O3 -fPIC -pthread -fopenmp -std=c99 C:\Users\jacki.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization_kernels_parallel.c -shared -o C:\Users\jacki.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization_kernels_parallel.so e:/mingw/bin/../lib/gcc/mingw32/6.3.0/../../../../mingw32/bin/ld.exe: cannot find -lpthread collect2.exe: error: ld returned 1 exit status Compile default cpu kernel failed, using default cpu kernel code. Compiling gcc -O3 -fPIC -std=c99 C:\Users\jacki.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization_kernels.c -shared -o C:\Users\jacki.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization_kernels.so Load default cpu kernel failed: Traceback (most recent call last): File "C:\Users\jacki/.cache\huggingface\modules\transformers_modules\chatglm-6b-int4\quantization.py", line 178, in init kernels = ctypes.cdll.LoadLibrary(kernel_file) File "C:\Users\jacki.conda\envs\langchain\lib\ctypes__init__.py", line 452, in LoadLibrary return self._dlltype(name) File "C:\Users\jacki.conda\envs\langchain\lib\ctypes__init.py", line 374, in init__ self._handle = _dlopen(self._name, mode) OSError: [WinError 193] %1 不是有效的 Win32 应用程序。
Failed to load kernel. Cannot load cpu kernel, don't use quantized model on cpu. Using quantization cache Applying quantization to glm layers Loaded the model in 2.34 seconds. WARNING 2023-07-08 16:40:10,889-1d: No sentence-transformers model found with name C:\Users\jacki/.cache\torch\sentence_transformers\GanymedeNil_text2vec-large-chinese. Creating a new one with MEAN pooling. WARNING 2023-07-08 16:40:12,558-1d: The dtype of attention mask (torch.int64) is not bool ERROR 2023-07-08 16:40:12,559-1d: Library cudart is not initialized