Closed dushigao-cugb closed 10 months ago
很神奇,没遇到过,看起来是数据输入的问题。试试transformers==4.31.0和torch==2.0.1?不行的话print一下input的信息我看看
打印了pixel_values,如下 pixel_values: tensor([[[[ 0.8208, 0.8062, 0.8501, ..., -0.2886, -0.3179, -0.3762], [ 0.7188, 0.8940, 0.8647, ..., -0.2886, -0.2449, -0.2595], [ 0.6748, 0.7627, 0.7627, ..., -0.2595, -0.2739, -0.2449], ..., [ 1.8867, 1.9307, 1.7695, ..., 0.0471, 0.1055, 0.0471], [ 1.9307, 1.6963, 0.6021, ..., 0.0033, -0.1281, -0.1572], [ 1.2588, 0.1493, -0.2449, ..., 0.1785, -0.0405, -0.0258]],
[[ 0.8594, 0.8740, 0.9043, ..., -0.1312, -0.1462, -0.1312],
[ 0.8145, 0.9644, 0.9341, ..., -0.1462, -0.1013, -0.1163],
[ 0.7690, 0.8442, 0.8442, ..., -0.1312, -0.1312, -0.1462],
...,
[ 2.0293, 2.0742, 1.9551, ..., 0.0188, 0.0488, -0.0262],
[ 2.0742, 1.8350, 0.7090, ..., -0.1013, -0.2363, -0.2363],
[ 1.3691, 0.2439, -0.2063, ..., 0.0338, -0.1613, -0.1013]],
[[ 0.7949, 0.8379, 0.8662, ..., 0.0840, 0.0840, 0.0271],
[ 0.7808, 0.9229, 0.8945, ..., 0.0698, 0.1266, 0.1266],
[ 0.7524, 0.8091, 0.7949, ..., 0.0840, 0.1124, 0.1409],
...,
[ 2.0742, 2.1309, 2.0176, ..., 0.0840, 0.1693, 0.1266],
[ 2.1465, 1.9180, 0.7949, ..., -0.0582, -0.1293, -0.0724],
[ 1.4062, 0.2974, -0.1009, ..., 0.0982, -0.1009, -0.0298]]]],
device='cuda:0', dtype=torch.float16)
Traceback (most recent call last):
File "/content/gdrive/MyDrive/blip/blip2_peft_inference.py", line 107, in
预测代码没有做改动,我贴在下面
import torch from datasets import load_dataset from torch.utils.data import Dataset, DataLoader from peft import LoraConfig, get_peft_model, PeftModel from transformers import AutoProcessor, AutoModelForVision2Seq import nltk from nltk.translate.meteor_score import single_meteor_score import pickle import numpy as np from tqdm import tqdm import argparse import os import pdb import pytorch_lightning as pl
def get_meteor_score(infer, gt): gt = gt.lower().strip().split(" ") infer = infer.lower().strip().split(" ")
meteor_scores = single_meteor_score(
gt, infer
)
return meteor_scores
def main(): parser = argparse.ArgumentParser() parser.add_argument("--inference_dataset_name", type=str, default="RSITMD", help="name of inference dataset") parser.add_argument("--inference_dataset_dir", type=str, default="/home/zilun/RS5M_v4/blip2_ft/data/RSITMD", help="save dir for test set") parser.add_argument("--use_lora_weight", action="store_true", help="use lora weight or not") parser.add_argument("--blip2_lora_weight_dir", type=str, default="./blip2_lora_ckpt/BLIP2-RSITMD-Lora-15-12_5e-05_1e-06_50-64", help="save dir for BLIP2 lora weight (OPT-6.7B)")
parser.add_argument("--result_dir", type=str, default="./eval_result", help="evaluation result save dir")
parser.add_argument("--blip2_model_name", type=str, default="Salesforce/blip2-opt-6.7b", help="which blip2 model to use")
args = parser.parse_args()
pl.seed_everything(2023)
print("---------load dataset---------")
dataset = load_dataset("imagefolder", data_dir=args.inference_dataset_dir, split="test")
print("---------load model------")
model = AutoModelForVision2Seq.from_pretrained(
args.blip2_model_name,
torch_dtype=torch.float16,
device_map="auto"
)
processor = AutoProcessor.from_pretrained(
args.blip2_model_name,
torch_dtype=torch.float16,
device_map="auto"
)
if args.use_lora_weight:
print("use lora weights for blip2")
assert args.blip2_lora_weight_dir is not None
model = PeftModel.from_pretrained(model, args.blip2_lora_weight_dir)
device = "cuda" if torch.cuda.is_available() else "cpu"
print("---------start test--------------")
meteor_scores = []
for index, exmaple in enumerate(dataset):
image = exmaple["image"]
print(image)
inputs = processor(images=image, return_tensors="pt").to(device, torch.float16)
pixel_values = inputs.pixel_values
# inference
print("pixel_values:",pixel_values)
generated_output = model.generate(
pixel_values=pixel_values,
max_length=50,
do_sample=True,
top_k=50,
top_p=0.95,
num_return_sequences=1,
)
generated_caption = processor.batch_decode(generated_output, skip_special_tokens=True)[0].replace("\n", "")
# get gt cap
gt_caption = exmaple["text"].replace("\n", "")
# calculate score
if gt_caption is None:
print("None caption in gt: {}".format(index))
print(index, generated_caption, "|||", gt_caption)
meteor_score = get_meteor_score(generated_caption, gt_caption)
meteor_scores.append(meteor_score)
meteor_scores = np.array(meteor_scores)
os.makedirs(args.result_dir, exist_ok=True)
if args.use_lora_weight:
print("{} lora mode meteor score:{}".format(np.mean(meteor_scores), args.inference_dataset_name))
with open("{}/lora_meteor_{}.pkl".format(args.result_dir, args.inference_dataset_name), "wb") as f:
pickle.dump(meteor_scores, f)
else:
print("{} vanilla model meteor score:{}".format(np.mean(meteor_scores), args.inference_dataset_name))
with open("{}/origin_meteor_{}.pkl".format(args.result_dir, args.inference_dataset_name), "wb") as f:
pickle.dump(meteor_scores, f)
if name == "main": main()
换成这个版本(transformers==4.31.0和torch==2.0.1),报错还是一样,没有变化吗?
报错是在model.generate上,如果不是模型版本问题,我认为问题应该是在input上,可以参考这个:https://huggingface.co/docs/transformers/main/en/model_doc/blip-2#transformers.Blip2ForConditionalGeneration.forward.example
也许可以比较一下正确input的格式和你的现在input格式有什么差别?
嗯嗯是的,版本换了,但是保存还是一样 ---- 回复的原邮件 ---- | 发件人 | Zilun @.> | | 发送日期 | 2023年10月19日 23:27 | | 收件人 | om-ai-lab/RS5M @.> | | 抄送人 | dushigao-cugb @.>, Author @.> | | 主题 | Re: [om-ai-lab/RS5M] clip finetune模型推理报错 (Issue #6) |
换成这个版本(transformers==4.31.0和torch==2.0.1),报错还是一样,没有变化吗?
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
嗯嗯是的,版本换了,但是报错还是一样,没有变化 ---- 回复的原邮件 ---- | 发件人 | Zilun @.> | | 发送日期 | 2023年10月19日 23:27 | | 收件人 | om-ai-lab/RS5M @.> | | 抄送人 | dushigao-cugb @.>, Author @.> | | 主题 | Re: [om-ai-lab/RS5M] clip finetune模型推理报错 (Issue #6) |
换成这个版本(transformers==4.31.0和torch==2.0.1),报错还是一样,没有变化吗?
— Reply to this email directly, view it on GitHub, or unsubscribe. You are receiving this because you authored the thread.Message ID: @.***>
单独使用6.7b的权重推理没问题,但是加上lora的权重推理才报错,有可能是和lora权重有关系吗
单独使用6.7b的权重推理没问题,但是加上lora的权重推理才报错,有可能是和lora权重有关系吗
你好,我新建了一个环境,重新装了下torch, transformers, datasets, peft, lightning这些环境,是可以正常跑的。
需要注意的是要 pip install transformers==4.31.0,4.32.0会有模型名字解析的问题。
此外我的torch环境是2.1.0,peft是0.5.0
工作目录layout如下:
.
├── 15datasets_teaser.png
├── blip2_finetune
│ ├── blip2_lora_ckpt
│ │ └── BLIP2-RSITMD-Lora-4-20_5e-05_0.01_30-16
│ │ ├── adapter_config.json
│ │ ├── adapter_model.bin
│ │ └── README.md
│ ├── blip2_peft_inference.py
│ └── Readme.md
├── blip2_lora_dataset
│ ├── coco_test
│ │ ├── metadata.csv
│ │ └── Readme.md
│ ├── README.md
│ ├── RSICD
│ │ └── test
│ │ ├── 00623.jpg
│ │ ├── 00624.jpg
│ │ ├── 00625.jpg
│ │ ├── 00626.jpg
│ │ ├── ......
│ ├── RSITMD
│ │ └── test
│ │ ├── airport_10.tif
│ │ ├── airport_11.tif
│ │ ├── airport_12.tif
│ │ ├── airport_2.tif
│ │ ├── airport_3.tif
│ │ ├── ...
run的指令如下:
python blip2_peft_inference.py --inference_dataset_name "RSITMD" --inference_dataset_dir ../blip2_lora_dataset/RSITMD --use_lora_weight --blip2_lora_weight_dir ./blip2_lora_ckpt/BLIP2-RSITMD-Lora-4-20_5e-05_0.01_30-1
我个人猜测是哪个包更新后不兼容了,或者目录layout有问题
你好 目前是什么情况?
用lora的权重推理出现了如下报错,请问是什么原因,感谢! 2023-10-18 10:04:14.469988: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT Seed set to 2023 ---------load dataset--------- Resolving data files: 100% 454/454 [00:00<00:00, 94581.73it/s] ---------load model------ Loading checkpoint shards: 100% 4/4 [03:02<00:00, 45.72s/it] use lora weights for blip2 ---------start test-------------- <PIL.TiffImagePlugin.TiffImageFile image mode=RGB size=256x256 at 0x78FCDA7F6920> Traceback (most recent call last): File "/content/gdrive/MyDrive/blip/blip2_peft_inference.py", line 106, in
main()
File "/content/gdrive/MyDrive/blip/blip2_peft_inference.py", line 71, in main
generated_output = model.generate(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, *kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/models/blip_2/modeling_blip_2.py", line 1880, in generate
outputs = self.language_model.generate(
File "/usr/local/lib/python3.10/dist-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(args, **kwargs)
File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 1644, in generate
input_ids, model_kwargs = self._expand_inputs_for_generation(
File "/usr/local/lib/python3.10/dist-packages/transformers/generation/utils.py", line 742, in _expand_inputs_for_generation
input_ids = input_ids.repeat_interleave(expand_size, dim=0)
File "/usr/local/lib/python3.10/dist-packages/torch/_meta_registrations.py", line 963, in meta_repeat_interleave_Tensor
raise RuntimeError("cannot repeat_interleave a meta tensor without output_size")
RuntimeError: cannot repeat_interleave a meta tensor without output_size