Open dragononly opened 1 year ago
@st.cache_resource def get_model(): tokenizer = AutoTokenizer.from_pretrained("chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("chatglm2-6b", trust_remote_code=True).cuda() model = PeftModel.from_pretrained(model, "weights/sentiment_comp_ie_chatglm2").half()
# from utils import load_model_on_gpus # model = load_model_on_gpus("chatglm2-6b", num_gpus=2) model = model.eval() return tokenizer, model
@st.cache_resource def get_model(): tokenizer = AutoTokenizer.from_pretrained("chatglm2-6b", trust_remote_code=True) model = AutoModel.from_pretrained("chatglm2-6b", trust_remote_code=True).cuda() model = PeftModel.from_pretrained(model, "weights/sentiment_comp_ie_chatglm2").half()
多显卡支持,使用下面两行代替上面一行,将num_gpus改为你实际的显卡数量