Closed KeremTurgutlu closed 6 months ago
# create llama pro python scripts/block_expansion.py \ --model_name meta-llama/Llama-2-7b-hf \ --output_dir /mnt/vol_b/models \ --expansion_rate 0.1 # train python train.py \ --model_name meta-llama/Llama-2-7b-hf \ --dataset orca_math \ --dataset_samples 1000 \ --batch_size 8 \ --context_length 1024 \ --gradient_accumulation_steps 2 \ --train_type bnb_llama_pro \ --llama_pro_path /mnt/vol_b/models/meta-llama/Llama-2-7b-hf_blk_exp-32-35/ \ --sharding_strategy full_shard \ --use_gradient_checkpointing true \ --reentrant_checkpointing true \ --use_cpu_offload false \ --use_activation_cpu_offload false \ --log_to wandb \ --verbose true \ --project_name "fsdp-dora-tests" \ --save_model true \ --output_dir /mnt/vol_b/models/llama-7b-orca-math-1k-bnb-llama-pro
Results: https://wandb.ai/answerdotai/fsdp-dora-tests
Results: https://wandb.ai/answerdotai/fsdp-dora-tests