how to load cvs and jsonl file to train model for translate language in colab notebook.

alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

Instruction:

{}

Input:

{}

Response:

{}"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN def formatting_prompts_func(examples): instructions = examples["instruction"] inputs = examples["input"] outputs = examples["output"] texts = [] for instruction, input, output in zip(instructions, inputs, outputs):

Must add EOS_TOKEN, otherwise your generation will go on forever!

    text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
    texts.append(text)
return { "text" : texts, }

pass

from datasets import load_dataset dataset = load_dataset("yahma/alpaca-cleaned", split = "train") dataset = dataset.map(formatting_prompts_func, batched = True,)

unslothai / unsloth

dataset for train model to translate language #1271

Instruction:

Input:

Response:

Must add EOS_TOKEN, otherwise your generation will go on forever!