support OpenBMB/MiniCPM

支持 https://github.com/OpenBMB/MiniCPM?tab=readme-ov-file 模型 :

import torch
from transformers import AutoTokenizer, LlamaTokenizerFast, AutoModelForCausalLM
path = 'openbmb/MiniCPM-2B-dpo-fp16'
tokenizer = AutoTokenizer.from_pretrained(path)
model = AutoModelForCausalLM.from_pretrained(path, torch_dtype=torch.float16, device_map='cuda', trust_remote_code=True)
from fastllm_pytools import llm
llm.set_device_map("cpu")
model = llm.from_hf(model, tokenizer, dtype = "float16") # dtype支持 "float16", "int8", "int4"
print(model.response("<用户>山东省最高的山是哪座山, 它比黄山高还是矮？差距多少？<AI>", max_length=256, top_p=0.8,temperature=0.5, repeat_penalty=1.02))

ztxz16 / fastllm

support OpenBMB/MiniCPM #423