how to load cvs and jsonl file to train model for translate language in colab notebook.
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Instruction:
{}
Input:
{}
Response:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):
instructions = examples["instruction"]
inputs = examples["input"]
outputs = examples["output"]
texts = []
for instruction, input, output in zip(instructions, inputs, outputs):
Must add EOS_TOKEN, otherwise your generation will go on forever!
how to load cvs and jsonl file to train model for translate language in colab notebook.
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
Instruction:
{}
Input:
{}
Response:
{}"""
EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN def formatting_prompts_func(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [] for instruction, input, output in zip(instructions, inputs, outputs):
Must add EOS_TOKEN, otherwise your generation will go on forever!
pass
from datasets import load_dataset dataset = load_dataset("yahma/alpaca-cleaned", split = "train") dataset = dataset.map(formatting_prompts_func, batched = True,)