Open guangdongliang opened 3 months ago
The number of data labels is 4. the eval loss increase and the train loss decrease. I think it is overfit. Code is as below:
from setfit import SetFitModel, Trainer, TrainingArguments, sample_dataset, SetFitModelCardData import ujson as json from datasets import load_dataset, DatasetDict, Dataset from transformers import EarlyStoppingCallback import datetime def convert_files_to_dataset(train_path, val_path): # 定义一个函数来读取单个文件 def read_file(file_path): with open(file_path, 'r', encoding='utf-8') as f: lines = f.readlines() data = [json.loads(line.strip()) for line in lines] return data # 读取训练集和验证集 train_data = read_file(train_path) val_data = read_file(val_path) # 将数据转换为Dataset train_dataset = Dataset.from_list(train_data) val_dataset = Dataset.from_list(val_data) # 创建DatasetDict dataset_dict = DatasetDict({'train': train_dataset, 'validation': val_dataset}) return dataset_dict # 使用函数 train_path = 'train_cn.txt' val_path = 'val_cn.txt' dataset = convert_files_to_dataset(train_path, val_path) from setfit import sample_dataset train_dataset = sample_dataset(dataset["train"], num_samples=50) print(train_dataset) eval_dataset = dataset["validation"] print(eval_dataset) from setfit import SetFitModel model = SetFitModel.from_pretrained('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2', model_card_data=SetFitModelCardData( language=['en', 'de', 'nl'], )) from sentence_transformers.losses import CosineSimilarityLoss from setfit import SetFitTrainer trainer = SetFitTrainer( model=model, train_dataset=train_dataset, eval_dataset=eval_dataset, loss_class=CosineSimilarityLoss, num_iterations=20, num_epochs=5 ) trainer.train() metrics = trainer.evaluate() print(metrics)``` log is as below:
Dataset({ features: ['text', 'label'], num_rows: 200 }) Dataset({ features: ['text', 'label'], num_rows: 40 }) /usr/local/matrix/conda3/envs/peft/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning: resume_download is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, use force_download=True. warnings.warn( model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference. b.py:51: DeprecationWarning: SetFitTrainer has been deprecated and will be removed in v2.0.0 of SetFit. Please use Trainer instead. trainer = SetFitTrainer( Using evaluation_strategy="steps" as eval_steps is defined. Map: 100%|████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 12981.44 examples/s] Running training Num unique pairs = 8000 Batch size = 16 Num epochs = 5 Total optimization steps = 2500 0%| | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.3173, 'learning_rate': 8e-08, 'epoch': 0.0} | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.2875, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'eval_embedding_loss': 0.236, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'embedding_loss': 0.2662, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'eval_embedding_loss': 0.2351, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'embedding_loss': 0.2741, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'eval_embedding_loss': 0.2319, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'embedding_loss': 0.2747, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'eval_embedding_loss': 0.2331, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'embedding_loss': 0.196, 'learning_rate': 2e-05, 'epoch': 0.5} {'eval_embedding_loss': 0.2297, 'learning_rate': 2e-05, 'epoch': 0.5} {'embedding_loss': 0.1512, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'eval_embedding_loss': 0.2387, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'embedding_loss': 0.0866, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'eval_embedding_loss': 0.248, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'embedding_loss': 0.0437, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'eval_embedding_loss': 0.2427, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'embedding_loss': 0.07, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'eval_embedding_loss': 0.2474, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'embedding_loss': 0.0332, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'eval_embedding_loss': 0.2587, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'embedding_loss': 0.0125, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'eval_embedding_loss': 0.2573, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'embedding_loss': 0.0023, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'eval_embedding_loss': 0.2648, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'embedding_loss': 0.0033, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'eval_embedding_loss': 0.2659, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'embedding_loss': 0.0011, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'eval_embedding_loss': 0.2692, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'embedding_loss': 0.0007, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'eval_embedding_loss': 0.2687, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'embedding_loss': 0.001, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'eval_embedding_loss': 0.2739, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'embedding_loss': 0.0012, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'eval_embedding_loss': 0.2707, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'embedding_loss': 0.0005, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'eval_embedding_loss': 0.2684, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'embedding_loss': 0.0006, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'eval_embedding_loss': 0.2756, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'embedding_loss': 0.0003, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'eval_embedding_loss': 0.2698, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'embedding_loss': 0.0007, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'eval_embedding_loss': 0.2745, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'embedding_loss': 0.0004, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'eval_embedding_loss': 0.2771, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'embedding_loss': 0.0005, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'eval_embedding_loss': 0.2742, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'embedding_loss': 0.0007, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'eval_embedding_loss': 0.2719, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'embedding_loss': 0.0002, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'eval_embedding_loss': 0.2782, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'embedding_loss': 0.0002, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'eval_embedding_loss': 0.2721, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'embedding_loss': 0.0002, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'eval_embedding_loss': 0.2743, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'embedding_loss': 0.0003, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'eval_embedding_loss': 0.2822, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'embedding_loss': 0.0003, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'eval_embedding_loss': 0.2758, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'embedding_loss': 0.0004, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'eval_embedding_loss': 0.2764, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'embedding_loss': 0.0004, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'eval_embedding_loss': 0.2798, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'embedding_loss': 0.0002, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'eval_embedding_loss': 0.2769, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'embedding_loss': 0.0004, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'eval_embedding_loss': 0.2766, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'embedding_loss': 0.0002, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'eval_embedding_loss': 0.2833, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'embedding_loss': 0.0002, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5} {'eval_embedding_loss': 0.2755, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5}```
resume_download
force_download=True
SetFitTrainer
Trainer
evaluation_strategy="steps"
eval_steps
I would decrease the num_iterations parameter to 5 and see what is the behaviour there.
num_iterations
5
The number of data labels is 4. the eval loss increase and the train loss decrease. I think it is overfit. Code is as below:
code
Dataset({ features: ['text', 'label'], num_rows: 200 }) Dataset({ features: ['text', 'label'], num_rows: 40 }) /usr/local/matrix/conda3/envs/peft/lib/python3.8/site-packages/huggingface_hub/file_download.py:1132: FutureWarning:
resume_download
is deprecated and will be removed in version 1.0.0. Downloads always resume when possible. If you want to force a new download, useforce_download=True
. warnings.warn( model_head.pkl not found on HuggingFace Hub, initialising classification head with random weights. You should TRAIN this model on a downstream task to use it for predictions and inference. b.py:51: DeprecationWarning:SetFitTrainer
has been deprecated and will be removed in v2.0.0 of SetFit. Please useTrainer
instead. trainer = SetFitTrainer( Usingevaluation_strategy="steps"
aseval_steps
is defined. Map: 100%|████████████████████████████████████████████████████████████████████████████| 200/200 [00:00<00:00, 12981.44 examples/s] Running training Num unique pairs = 8000 Batch size = 16 Num epochs = 5 Total optimization steps = 2500 0%| | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.3173, 'learning_rate': 8e-08, 'epoch': 0.0} | 0/2500 [00:00<?, ?it/s] {'embedding_loss': 0.2875, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'eval_embedding_loss': 0.236, 'learning_rate': 4.000000000000001e-06, 'epoch': 0.1} {'embedding_loss': 0.2662, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'eval_embedding_loss': 0.2351, 'learning_rate': 8.000000000000001e-06, 'epoch': 0.2} {'embedding_loss': 0.2741, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'eval_embedding_loss': 0.2319, 'learning_rate': 1.2e-05, 'epoch': 0.3} {'embedding_loss': 0.2747, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'eval_embedding_loss': 0.2331, 'learning_rate': 1.6000000000000003e-05, 'epoch': 0.4} {'embedding_loss': 0.196, 'learning_rate': 2e-05, 'epoch': 0.5} {'eval_embedding_loss': 0.2297, 'learning_rate': 2e-05, 'epoch': 0.5} {'embedding_loss': 0.1512, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'eval_embedding_loss': 0.2387, 'learning_rate': 1.9555555555555557e-05, 'epoch': 0.6} {'embedding_loss': 0.0866, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'eval_embedding_loss': 0.248, 'learning_rate': 1.9111111111111113e-05, 'epoch': 0.7} {'embedding_loss': 0.0437, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'eval_embedding_loss': 0.2427, 'learning_rate': 1.866666666666667e-05, 'epoch': 0.8} {'embedding_loss': 0.07, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'eval_embedding_loss': 0.2474, 'learning_rate': 1.8222222222222224e-05, 'epoch': 0.9} {'embedding_loss': 0.0332, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'eval_embedding_loss': 0.2587, 'learning_rate': 1.7777777777777777e-05, 'epoch': 1.0} {'embedding_loss': 0.0125, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'eval_embedding_loss': 0.2573, 'learning_rate': 1.7333333333333336e-05, 'epoch': 1.1} {'embedding_loss': 0.0023, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'eval_embedding_loss': 0.2648, 'learning_rate': 1.688888888888889e-05, 'epoch': 1.2} {'embedding_loss': 0.0033, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'eval_embedding_loss': 0.2659, 'learning_rate': 1.6444444444444444e-05, 'epoch': 1.3} {'embedding_loss': 0.0011, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'eval_embedding_loss': 0.2692, 'learning_rate': 1.6000000000000003e-05, 'epoch': 1.4} {'embedding_loss': 0.0007, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'eval_embedding_loss': 0.2687, 'learning_rate': 1.555555555555556e-05, 'epoch': 1.5} {'embedding_loss': 0.001, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'eval_embedding_loss': 0.2739, 'learning_rate': 1.5111111111111112e-05, 'epoch': 1.6} {'embedding_loss': 0.0012, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'eval_embedding_loss': 0.2707, 'learning_rate': 1.4666666666666666e-05, 'epoch': 1.7} {'embedding_loss': 0.0005, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'eval_embedding_loss': 0.2684, 'learning_rate': 1.4222222222222224e-05, 'epoch': 1.8} {'embedding_loss': 0.0006, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'eval_embedding_loss': 0.2756, 'learning_rate': 1.377777777777778e-05, 'epoch': 1.9} {'embedding_loss': 0.0003, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'eval_embedding_loss': 0.2698, 'learning_rate': 1.3333333333333333e-05, 'epoch': 2.0} {'embedding_loss': 0.0007, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'eval_embedding_loss': 0.2745, 'learning_rate': 1.288888888888889e-05, 'epoch': 2.1} {'embedding_loss': 0.0004, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'eval_embedding_loss': 0.2771, 'learning_rate': 1.2444444444444446e-05, 'epoch': 2.2} {'embedding_loss': 0.0005, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'eval_embedding_loss': 0.2742, 'learning_rate': 1.2e-05, 'epoch': 2.3} {'embedding_loss': 0.0007, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'eval_embedding_loss': 0.2719, 'learning_rate': 1.1555555555555556e-05, 'epoch': 2.4} {'embedding_loss': 0.0002, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'eval_embedding_loss': 0.2782, 'learning_rate': 1.1111111111111113e-05, 'epoch': 2.5} {'embedding_loss': 0.0002, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'eval_embedding_loss': 0.2721, 'learning_rate': 1.0666666666666667e-05, 'epoch': 2.6} {'embedding_loss': 0.0002, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'eval_embedding_loss': 0.2743, 'learning_rate': 1.0222222222222223e-05, 'epoch': 2.7} {'embedding_loss': 0.0003, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'eval_embedding_loss': 0.2822, 'learning_rate': 9.777777777777779e-06, 'epoch': 2.8} {'embedding_loss': 0.0003, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'eval_embedding_loss': 0.2758, 'learning_rate': 9.333333333333334e-06, 'epoch': 2.9} {'embedding_loss': 0.0004, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'eval_embedding_loss': 0.2764, 'learning_rate': 8.888888888888888e-06, 'epoch': 3.0} {'embedding_loss': 0.0004, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'eval_embedding_loss': 0.2798, 'learning_rate': 8.444444444444446e-06, 'epoch': 3.1} {'embedding_loss': 0.0002, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'eval_embedding_loss': 0.2769, 'learning_rate': 8.000000000000001e-06, 'epoch': 3.2} {'embedding_loss': 0.0004, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'eval_embedding_loss': 0.2766, 'learning_rate': 7.555555555555556e-06, 'epoch': 3.3} {'embedding_loss': 0.0002, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'eval_embedding_loss': 0.2833, 'learning_rate': 7.111111111111112e-06, 'epoch': 3.4} {'embedding_loss': 0.0002, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5} {'eval_embedding_loss': 0.2755, 'learning_rate': 6.666666666666667e-06, 'epoch': 3.5}```