tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
if not hasattr(tokenizer, 'model_dir'):
tokenizer.model_dir = model_dir
打开bf16精度,A100、H100、RTX3060、RTX3070等显卡建议启用以节省显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, bf16=True).eval()
打开fp16精度,V100、P100、T4等显卡建议启用以节省显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, fp16=True).eval()
使用CPU进行推理,需要约32GB内存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="cpu", trust_remote_code=True).eval()
默认gpu进行推理,需要约24GB显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).eval()
第一轮对话
query = tokenizer.from_list_format([
{'audio': 'assets/audio/1272-128104-0000.flac'}, # Either a local path or an url
{'text': 'what does the person say?'},
])
response, history = model.chat(tokenizer, query=query, history=None)
print(response)
The person says: "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel".
第二轮对话
response, history = model.chat(tokenizer, 'Find the start time and end time of the word "middle classes"', history=history)
print(response)
The word "middle classes" starts at <|2.33|> seconds and ends at <|3.26|> seconds.
用ModelScope那个调用代码,调用报错了。 提前从本地下载好,然后改个路径下载。然后就报错了
这是从 github上边的 体验代码
import os os.environ["CUDA_VISIBLE_DEVICES"] = "1"
from modelscope import ( snapshot_download, AutoModelForCausalLM, AutoTokenizer, GenerationConfig ) import torch
model_id = '/mnt1/wp/damo_download/Qwen-Audio-Chat'
model_id = 'qwen/Qwen-Audio-Chat'
revision = 'master'
model_dir = snapshot_download(model_id, revision=revision) torch.manual_seed(1234)
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True) if not hasattr(tokenizer, 'model_dir'): tokenizer.model_dir = model_dir
打开bf16精度,A100、H100、RTX3060、RTX3070等显卡建议启用以节省显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, bf16=True).eval()
打开fp16精度,V100、P100、T4等显卡建议启用以节省显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True, fp16=True).eval()
使用CPU进行推理,需要约32GB内存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="cpu", trust_remote_code=True).eval()
默认gpu进行推理,需要约24GB显存
model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto", trust_remote_code=True).eval()
第一轮对话
query = tokenizer.from_list_format([ {'audio': 'assets/audio/1272-128104-0000.flac'}, # Either a local path or an url {'text': 'what does the person say?'}, ]) response, history = model.chat(tokenizer, query=query, history=None) print(response)
The person says: "mister quilter is the apostle of the middle classes and we are glad to welcome his gospel".
第二轮对话
response, history = model.chat(tokenizer, 'Find the start time and end time of the word "middle classes"', history=history) print(response)
The word "middle classes" starts at <|2.33|> seconds and ends at <|3.26|> seconds.