Closed AlexJJJChen closed 6 months ago
swift infer 正常嘛, 可以参考最佳实践中的单样本推理代码进行修改
swift infer 正常嘛, 可以参考最佳实践中的单样本推理代码进行修改
swift infer是可以运行的,但是infer怎么使用自己的dataset做测试呀?
还有我发现我改变model type之后,代码无法运行 import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
from swift.llm import ( get_model_tokenizer, get_template, inference, ModelType, get_default_template_type, inference_stream ) from swift.utils import seed_everything import torch
model_type = "finetune_output/checkpoint-460 merged"# ModelType.llava1d6_mistral_7b_instruct # ModelType.llava1d6_yi_34b_instruct template_type = get_default_template_type(model_type) print(f'template_type: {template_type}')
model, tokenizer = get_model_tokenizer(model_type, torch.float16, model_kwargs={'device_map': 'auto'}) model.generation_config.max_new_tokens = 256 template = get_template(template_type, tokenizer) seed_everything(42)
images = ['image0.png'] query = 'summarize the image and give me a climate report' response, = inference(model, template, query, images=images) print(f'query: {query}') print(f'response: {response}')
swift infer --ckpt_dir xxx --custom_val_dataset_path xxx --val_dataset_sample -1
使用自己的dataset做测试
单样本推理,你还需要 Swift.from_pretrained,可以查看 https://github.com/modelscope/swift/blob/main/docs/source/LLM/LLM%E5%BE%AE%E8%B0%83%E6%96%87%E6%A1%A3.md#%E5%BE%AE%E8%B0%83%E5%90%8E%E6%A8%A1%E5%9E%8B
from transformers import LlavaNextProcessor, LlavaNextForConditionalGeneration import torch from PIL import Image import requests
from modelscope import snapshot_download from transformers import AutoModelForCausalLM, AutoTokenizer from peft import AutoPeftModelForCausalLM
device_count = torch.cuda.device_count() if device_count > 0: logger.debug("Select GPU device") device = torch.device("cuda") else: logger.debug("Select CPU device") device = torch.device("cpu")
processor = LlavaNextProcessor.from_pretrained("llava-hf/llava-v1.6-mistral-7b-hf")
model = LlavaNextForConditionalGeneration.from_pretrained("finetune_output/checkpoint-478-merged", torch_dtype=torch.float16, low_cpu_mem_usage=True, device_map='auto')
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference. ../aten/src/ATen/native/cuda/Indexing.cu:1290: indexSelectLargeIndex: block: [162,0,0], thread: [32,0,0] Assertion
srcIndex < srcSelectDimSize
failed.