Closed Guanchaofeng closed 1 year ago
运行参数发一下
运行参数发一下
deepspeed --num_gpus 8 --master_port 10951 LLaMA-Efficient-Tuning-main/src/train_bash.py \ --deepspeed ds_config.json \ --stage sft \ --model_name_or_path /data/baichuan-inc/Baichuan-13B-Chat \ --do_train \ --dataset sft_qac-6k-6k \ --template baichuan \ --finetuning_type full \ --output_dir /data/Tuning-main/models/model_0911/bcchat13b_V3.0/sft-full-1w-2w-qca-6k-6k_0910 \ --overwrite_cache \ --per_device_train_batch_size 8 \ --per_device_eval_batch_size 8 \ --gradient_accumulation_steps 8 \ --preprocessing_num_workers 16 \ --lr_scheduler_type cosine \ --logging_steps 2 \ --save_steps 10 \ --eval_steps 10 \ --learning_rate 1e-5 \ --num_train_epochs 1 \ --val_size 0.01 \ --evaluation_strategy steps \ --load_best_model_at_end \ --max_source_length 512 \ --plot_loss \ --fp16 >> logs/log_0911/bcchat13b_sft-full-1w-2w-qca-6k-6k_1e-5_V3.0-0910.log 2>&1 &
运行参数发一下
下面是deepspeed设置:
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 1000,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1
},
"optimizer": {
"type": "AdamW",
"params": {
"lr": "auto",
"betas": "auto",
"eps": "auto",
"weight_decay": "auto"
}
},
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": "1e-08",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},
"zero_optimization": {
"stage": 2,
"offload_optimizer": {
"device": "cpu",
"pin_memory": true
},
"offload_param": {
"device": "cpu",
"pin_memory": true
},
"allgather_partitions": true,
"allgather_bucket_size": 5e8,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 5e8,
"contiguous_gradients": true
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 100,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
你写的配置的问题
"scheduler": {
"type": "WarmupLR",
"params": {
"warmup_min_lr": "auto",
"warmup_max_lr": "auto",
"warmup_num_steps": "auto"
}
},
之前训练没有问题,更新后出现的问题