Open srashtchi opened 2 years ago
If you need to see the whole code before this section I faced error here is a code:
import torch
use_cuda = torch.cuda.is_available()
device = torch.device("cuda:2" if use_cuda else "cpu")
print("Running on: ",device)
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from datasets import load_dataset, load_metric
import pandas as pd
from tqdm import tqdm
dataset_samsum = load_dataset("samsum") #document
model ="google/pegasus-large"
tokenizer = AutoTokenizer.from_pretrained(model)
rouge_metric = load_metric("rouge", cache_dir=None)
rouge_names = ["rouge1", "rouge2", "rougeL", "rougeLsum"]
def chunks(list_of_elements, batch_size):
"""Yield successive batch-sized chunks from list_of_elements."""
for i in range(0, len(list_of_elements), batch_size):
yield list_of_elements[i: i + batch_size]
def evaluate_summaries_pegasus(dataset, metric, model, tokenizer,
batch_size=16, device=device,
column_text="dialogue",
column_summary="summary"):
article_batches = list(chunks(dataset[column_text], batch_size))
target_batches = list(chunks(dataset[column_summary], batch_size))
for article_batch, target_batch in tqdm(
zip(article_batches, target_batches), total=len(article_batches)):
inputs = tokenizer(article_batch, max_length=1024, truncation=True,
padding="max_length", return_tensors="pt")
summaries = model.generate(input_ids=inputs["input_ids"].to(device),
attention_mask=inputs["attention_mask"].to(device),
length_penalty=0.8, num_beams=8, max_length=128)
decoded_summaries = [tokenizer.decode(s, skip_special_tokens=True,
clean_up_tokenization_spaces=True)
for s in summaries]
decoded_summaries = [d.replace("", " ") for d in decoded_summaries]
metric.add_batch(predictions=decoded_summaries, references=target_batch)
score = metric.compute()
return score
def convert_examples_to_features(example_batch):
input_encodings = tokenizer(example_batch["dialogue"], max_length=1024,
truncation=True)
with tokenizer.as_target_tokenizer():
target_encodings = tokenizer(example_batch["summary"], max_length=128,
truncation=True)
return {"input_ids": input_encodings["input_ids"],
"attention_mask": input_encodings["attention_mask"],
"labels": target_encodings["input_ids"]}
dataset_samsum_pt = dataset_samsum.map(convert_examples_to_features,batched=True)
columns = ["input_ids", "labels", "attention_mask"]
dataset_samsum_pt.set_format(type="torch", columns=columns)
from transformers import DataCollatorForSeq2Seq
seq2seq_data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir='pegasus-samsum', num_train_epochs=1, warmup_steps=500,
per_device_train_batch_size=1, per_device_eval_batch_size=1,
weight_decay=0.01, logging_steps=10,
evaluation_strategy='steps', eval_steps=500, save_steps=1e6,
gradient_accumulation_steps=16)
trainer = Trainer(model=model,
args=training_args,
tokenizer=tokenizer, data_collator=seq2seq_data_collator,
train_dataset=dataset_samsum_pt["train"],
eval_dataset=dataset_samsum_pt["validation"])
Does this issue similar to #46 issue raised in Apr? if it does why with the fix it still exist? @lvwerra
model ="google/pegasus-large" This is a string (model name) model = AutoModelForSeq2SeqLM.from_pretrained("google/pegasus-large") This should resolve your issue @srashtchi
Information
The problem arises in chapter:
Describe the bug
Steps to reproduce the behavior:
Run the notebook on a CUDA/GPU enabled device- A100 card
Trainer() fails withwith the following error:
Traceback (most recent call last): File "/home/shabnam/anaconda3/envs/rapids-22.08/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3378, in run_code exec(code_obj, self.user_global_ns, self.user_ns) File "", line 1, in
trainer = Trainer(model=model, args=training_args,
File "/home/shabnam/anaconda3/envs/rapids-22.08/lib/python3.9/site-packages/transformers/trainer.py", line 450, in init
self._move_model_to_device(model, args.device)
File "/home/shabnam/anaconda3/envs/rapids-22.08/lib/python3.9/site-packages/transformers/trainer.py", line 722, in _move_model_to_device
model = model.to(device)
AttributeError: 'str' object has no attribute 'to'
Expected behavior: training ...