Closed thistleknot closed 1 week ago
Hi, I found that seems you are using the Tinyllama for training, instead of our model...
# Specify the model name (replace '' with your actual model name)
model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
Ye I don't understand. Am I to use literally '' or what is the model name? My understanding is you show how to specify the model using the config, but not any tokenizer nor model name
The instructions have me guessing I was to pick a tokenizer (similar to mamba).
Can you provide a complete working example or tell me what would work in place of '' (or is it just ''?)
On Fri, Jun 7, 2024, 11:11 PM Ridger Zhu @.***> wrote:
Hi, I found that seems you are using the Tinyllama for training, instead of our model...
Specify the model name (replace '' with your actual model name)model_name = 'TinyLlama/TinyLlama-1.1B-Chat-v1.0'
— Reply to this email directly, view it on GitHub https://github.com/ridgerchu/matmulfreellm/issues/1#issuecomment-2155830487, or unsubscribe https://github.com/notifications/unsubscribe-auth/ABHKKOTEC5VHQCNRBQSXNNLZGKN7TAVCNFSM6AAAAABI7SRL42VHI2DSMVQWIX3LMV43OSLTON2WKQ3PNVWWK3TUHMZDCNJVHAZTANBYG4 . You are receiving this because you authored the thread.Message ID: @.***>
I re reviewed the Readme Saw a models link https://huggingface.co/ridger/MMfreeLM-2.7B Will try those
Revised code
from mmfreelm.models import HGRNBitConfig
from transformers import AutoModel
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from trl import SFTTrainer
import os
import mmfreelm
from datasets import load_dataset, DatasetDict, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
os.environ["WANDB_MODE"]="offline"
model_name_or_path = "ridger/MMfreeLM-2.7B"
#config = 'ridger/MMfreeLM-2.7B'#HGRNBitConfig()
model = AutoModel.from_pretrained(model_name_or_path)
batch_size=6
training_args = TrainingArguments(
#eval_strategy='steps',
output_dir='./results',
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=3,
eval_steps=1,
gradient_accumulation_steps=1,
optim='paged_lion_8bit',
gradient_checkpointing_kwargs={"use_reentrant": False},
#0.000003, starts at 6
#0.00003, #start at 2.6
learning_rate=0.001,
warmup_ratio=.1,
adam_beta2=0.95,
adam_epsilon=0.00001,
max_grad_norm=1.0,
lr_scheduler_type='cosine',
logging_dir='./logs',
logging_steps=1,
do_train=True
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name_or_path, model_max_length=2048,
padding_side="right", use_fast=False, add_eos_token=True, add_bos_token=True)
tokenizer.pad_token = tokenizer.eos_token
# Load the dataset
dataset = load_dataset("Abirate/english_quotes")
# Filter the quotes based on length
filtered_quotes = [q for q in dataset['train'] if 23 < len(q['quote']) < 140]
# Create a new dataset with the filtered quotes
filtered_dataset = Dataset.from_dict({'quote': [q['quote'] for q in filtered_quotes]})
# Tokenize dataset
def tokenize_function(examples):
return tokenizer(examples['quote'], truncation=True, padding='max_length', max_length=128)
tokenized_datasets = filtered_dataset.map(tokenize_function, batched=True)
# Split the dataset into training and validation
train_test_split = tokenized_datasets.train_test_split(test_size=0.2, seed=42)
dataset_dict = DatasetDict(train=train_test_split['train'], validation=train_test_split['test'])
# Define training arguments
training_args = TrainingArguments(
output_dir='./results',
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
num_train_epochs=3,
evaluation_strategy="steps",
eval_steps=1,
gradient_accumulation_steps=1,
learning_rate=0.001,
warmup_ratio=0.1,
logging_dir='./logs',
logging_steps=1
)
# Initialize the trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset_dict['train'],
eval_dataset=dataset_dict['validation'],
tokenizer=tokenizer
)
# Train the model
trainer.train()
but kernel bombs out
Hi, does it work for inferencing instead of training? and do you use NVIDIA GPU for training?
that's a great troubleshooting step
First error I made was AutoModel, and changed it to the custom class definition
then tried inference
from mmfreelm.models import HGRNBitConfig, HGRNBitForCausalLM
from transformers import AutoModel
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from trl import SFTTrainer
import os
import mmfreelm
from datasets import load_dataset, DatasetDict, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
os.environ["WANDB_MODE"]="offline"
model_name_or_path = "ridger/MMfreeLM-2.7B"
#config = 'ridger/MMfreeLM-2.7B'#HGRNBitConfig()
model = HGRNBitForCausalLM.from_pretrained(model_name_or_path)
model.to('cuda')
batch_size=6
training_args = TrainingArguments(
#eval_strategy='steps',
output_dir='./results',
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=3,
eval_steps=1,
gradient_accumulation_steps=1,
optim='paged_lion_8bit',
gradient_checkpointing_kwargs={"use_reentrant": False},
#0.000003, starts at 6
#0.00003, #start at 2.6
learning_rate=0.001,
warmup_ratio=.1,
adam_beta2=0.95,
adam_epsilon=0.00001,
max_grad_norm=1.0,
lr_scheduler_type='cosine',
logging_dir='./logs',
logging_steps=1,
do_train=True
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name_or_path, model_max_length=2048,
padding_side="right", use_fast=False, add_eos_token=True, add_bos_token=True)
tokenizer.pad_token = tokenizer.eos_token
# Generate text using the model
input_prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids.cuda()
outputs = model.generate(input_ids, max_length=32, do_sample=True, top_p=0.4, temperature=0.6)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
from mmfreelm.models import HGRNBitConfig, HGRNBitForCausalLM
from transformers import AutoModel
import transformers
from datasets import load_dataset
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
from trl import SFTTrainer
import os
import mmfreelm
from datasets import load_dataset, DatasetDict, Dataset
import pandas as pd
from sklearn.model_selection import train_test_split
os.environ["WANDB_MODE"]="offline"
model_name_or_path = "ridger/MMfreeLM-2.7B"
#config = 'ridger/MMfreeLM-2.7B'#HGRNBitConfig()
model = HGRNBitForCausalLM.from_pretrained(model_name_or_path)
model.to('cuda')
batch_size=6
training_args = TrainingArguments(
#eval_strategy='steps',
output_dir='./results',
per_device_train_batch_size=batch_size,
per_device_eval_batch_size=batch_size,
num_train_epochs=3,
eval_steps=1,
gradient_accumulation_steps=1,
optim='paged_lion_8bit',
gradient_checkpointing_kwargs={"use_reentrant": False},
#0.000003, starts at 6
#0.00003, #start at 2.6
learning_rate=0.001,
warmup_ratio=.1,
adam_beta2=0.95,
adam_epsilon=0.00001,
max_grad_norm=1.0,
lr_scheduler_type='cosine',
logging_dir='./logs',
logging_steps=1,
do_train=True
)
tokenizer = transformers.AutoTokenizer.from_pretrained(
model_name_or_path, model_max_length=2048,
padding_side="right", use_fast=False, add_eos_token=True, add_bos_token=True)
tokenizer.pad_token = tokenizer.eos_token
# Generate text using the model
input_prompt = "In a shocking finding, scientists discovered a herd of unicorns living in a remote, "
input_ids = tokenizer(input_prompt, return_tensors="pt").input_ids.cuda()
outputs = model.generate(input_ids, max_length=32, do_sample=True, top_p=0.4, temperature=0.6)
print(tokenizer.batch_decode(outputs, skip_special_tokens=True)[0])
results in kernel quitting (no console output).
nvidia yes (cuda 12.2, python 3.10)
Hmmm do you installed the triton==2.2?
Yes I updated the first post w my env
Thanks! I see. I check your envs, and finds your compute compatibility is 6.0, indicating you are using Pascal arch. The Pascal arch is may not support well to triton, which may lead to this problem I guess... I use our A100 and H100 to test our code, which works well...
triton 2.2.0 torch 2.2.0 einops 0.7.0 compute compatibility 6.0 rocky linux 9 cuda 12.2 python 3.10
setup
error