Closed sidhantls closed 4 weeks ago
Adds support for #2
NUM_TRAIN_SAMPLES=50000 MAX_LEN=256 BETA=1. ACT_AWARE=activation COMP_VALUES=(0.90 0.85 0.80) EVAL_BS=8 BATCH_SIZE=2 LTYPE=adaptive R_LOSS=default LR=1e-3 MODEL=meta-llama/Llama-2-13b-hf CACHE_DIR=/exports/eddie/scratch/s2593541/lrd/cache_train_llama13 LAMBDA=16. GAMMA=1. # Loop over the COMP values for i in ${!COMP_VALUES[@]}; do COMP=${COMP_VALUES[$i]} EXP_NAME="${MODEL#*/}_${LTYPE}_${COMP}_fixmse_${GAMMA}_${LAMBDA}" p_param=0.4 # Check if it's the first iteration if [ $i -eq 0 ]; then # Command for the first iteration without extra arguments python train_adaptive.py --model=$MODEL --target_param_ratio=$COMP --eval_full --batch_size=$BATCH_SIZE --lr=$LR --num_train_samples=$NUM_TRAIN_SAMPLES --exp_name=$EXP_NAME --max_length=$MAX_LEN --cache_dir=$CACHE_DIR --eval_freq_steps=500 --eval_batch_size=$EVAL_BS --alpha=0.5 --lambda=$LAMBDA --gamma=$GAMMA --act_aware=$ACT_AWARE --layer_type=$LTYPE --beta_scale=$BETA --r_loss=$R_LOSS --tau=0.4 --p_param=$p_param --load_act_cache else python train_adaptive.py --model=$MODEL --target_param_ratio=$COMP --eval_full --batch_size=$BATCH_SIZE --lr=$LR --num_train_samples=$NUM_TRAIN_SAMPLES --exp_name=$EXP_NAME --max_length=$MAX_LEN --cache_dir=$CACHE_DIR --eval_freq_steps=500 --eval_batch_size=$EVAL_BS --alpha=0.5 --lambda=$LAMBDA --gamma=$GAMMA --act_aware=$ACT_AWARE --load_act_cache --layer_type=$LTYPE --beta_scale=$BETA --r_loss=$R_LOSS fi done
Adds support for #2