Open jiuniZZzz opened 2 weeks ago
要用mindspore2.3
复现代码提供完整的
用2.3版本的mindspore CPU 训练会报错如下 GPU太贵了
df = pd.read_fwf('./IAM/gt_test.txt', header=None)
df.rename(columns={0: "file_name", 1: "text"}, inplace=True)
del df[2]
# some file names end with jp instead of jpg, let's fix this
df['file_name'] = df['file_name'].apply(lambda x: x + 'g' if x.endswith('jp') else x)
# print(df.head())
train_df, test_df = train_test_split(df, test_size=0.2)
# we reset the indices to start from zero
train_df.reset_index(drop=True, inplace=True)
test_df.reset_index(drop=True, inplace=True)
class IAMDataset():
def __init__(self, root_dir, df, processor, max_target_length=128):
super(IAMDataset).__init__()
self.root_dir = root_dir
self.df = df
self.processor = processor
self.max_target_length = max_target_length
def __len__(self):
return len(self.df)
def __getitem__(self, idx):
# get file name + text
file_name = self.df['file_name'][idx]
text = self.df['text'][idx]
# prepare image (i.e. resize + normalize)
image = Image.open(self.root_dir + '/' + file_name).convert("RGB")
pixel_values = self.processor(image, return_tensors="ms").pixel_values
# add labels (input_ids) by encoding the text
labels = self.processor.tokenizer(text,
padding="max_length",
max_length=self.max_target_length).input_ids
# important: make sure that PAD tokens are ignored by the loss function
labels = [label if label != self.processor.tokenizer.pad_token_id else -100 for label in labels]
# encoding = {: , : }
return pixel_values.squeeze(), Tensor(labels)
model_path1 = "C:\\Users\\virgo\\Documents\\GitHub\\trocr-base-printed"
model_path2 = "C:\\Users\\virgo\\Documents\\GitHub\\trocr-base-stage1"
processor = TrOCRProcessor.from_pretrained(model_path1)
train_dataset = IAMDataset(root_dir='./IAM/image',
df=train_df,
processor=processor)
eval_dataset = IAMDataset(root_dir='./IAM/image',
df=test_df,
processor=processor)
train_dataloader = GeneratorDataset(train_dataset, column_names=["pixel_values", "labels"], shuffle=True).batch(4)
eval_dataloader = GeneratorDataset(eval_dataset, column_names=["pixel_values", "labels"], shuffle=True)
model = VisionEncoderDecoderModel.from_pretrained(model_path2)
# set special tokens used for creating the decoder_input_ids from the labels
model.config.decoder_start_token_id = processor.tokenizer.cls_token_id
model.config.pad_token_id = processor.tokenizer.pad_token_id
# make sure vocab size is set correctly
model.config.vocab_size = model.config.decoder.vocab_size
# set beam search parameters
model.config.eos_token_id = processor.tokenizer.sep_token_id
model.config.max_length = 64
model.config.early_stopping = True
model.config.no_repeat_ngram_size = 3
model.config.length_penalty = 2.0
model.config.num_beams = 4
cer_metric = evaluate.load("cer")
def compute_cer(pred_ids, label_ids):
pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
label_ids[label_ids == -100] = processor.tokenizer.pad_token_id
label_str = processor.batch_decode(label_ids, skip_special_tokens=True)
cer = cer_metric.compute(predictions=pred_str, references=label_str)
return cer
optimizer = AdamW(model.parameters(), lr=5e-5)
for epoch in range(1):
# train
model.train()
train_loss = 0.0
for bacth in tqdm(eval_dataloader.create_dict_iterator()):
pixel_values = bacth['pixel_values']
labels = bacth['labels']
def compute_loss(pixel_values, labels):
outputs = model(pixel_values=pixel_values, labels=labels)
loss = outputs.loss
return loss
grad_fn = mindspore.value_and_grad(fn=compute_loss, weights=model.parameters())
loss, grads = grad_fn(pixel_values, labels)
optimizer.step(loss)
数据类型有问题
Describe the bug/ 问题描述 (Mandatory / 必填) GPU环境 TrOCR预训练模型微调 求梯度时报错 RuntimeError: The pointer[tensor] is null.
Hardware Environment(
Ascend
/GPU
/CPU
) / 硬件环境:Software Environment / 软件环境 (Mandatory / 必填): -- MindSpore version (e.g., 1.7.0.Bxxx) : MindSpore 2.2.14 -- Python version (e.g., Python 3.7.5) :Python 3.9.19 -- OS platform and distribution (e.g., Linux Ubuntu 16.04):ubuntu18.04 -- GCC/Compiler version (if compiled from source):7.5
Excute Mode / 执行模式 (Mandatory / 必填)(
PyNative
/Graph
):To Reproduce / 重现步骤 (Mandatory / 必填) model
dataset
运行代码
'''
for epoch in range(1):
train
''' Expected behavior / 预期结果 (Mandatory / 必填) 正常运行
Screenshots/ 日志 / 截图 (Mandatory / 必填) If applicable, add screenshots to help explain your problem.
Additional context / 备注 (Optional / 选填) Add any other context about the problem here.