sidhantls / adaptive-rank-selection-svd

Implementation of Adaptive Rank Selections for Low-Rank Approximation of Language Models
2 stars 1 forks source link

Multigpu support #4

Closed sidhantls closed 4 weeks ago

sidhantls commented 4 weeks ago

Adds support for #2

NUM_TRAIN_SAMPLES=50000
MAX_LEN=256
BETA=1.
ACT_AWARE=activation
COMP_VALUES=(0.90 0.85 0.80)
EVAL_BS=8
BATCH_SIZE=2
LTYPE=adaptive
R_LOSS=default
LR=1e-3

MODEL=meta-llama/Llama-2-13b-hf
CACHE_DIR=/exports/eddie/scratch/s2593541/lrd/cache_train_llama13
LAMBDA=16.
GAMMA=1.

# Loop over the COMP values
for i in ${!COMP_VALUES[@]}; do
    COMP=${COMP_VALUES[$i]}
    EXP_NAME="${MODEL#*/}_${LTYPE}_${COMP}_fixmse_${GAMMA}_${LAMBDA}"
    p_param=0.4
    # Check if it's the first iteration
    if [ $i -eq 0 ]; then
        # Command for the first iteration without extra arguments
        python train_adaptive.py --model=$MODEL --target_param_ratio=$COMP --eval_full --batch_size=$BATCH_SIZE --lr=$LR --num_train_samples=$NUM_TRAIN_SAMPLES --exp_name=$EXP_NAME --max_length=$MAX_LEN --cache_dir=$CACHE_DIR --eval_freq_steps=500 --eval_batch_size=$EVAL_BS --alpha=0.5 --lambda=$LAMBDA --gamma=$GAMMA --act_aware=$ACT_AWARE  --layer_type=$LTYPE --beta_scale=$BETA --r_loss=$R_LOSS --tau=0.4 --p_param=$p_param --load_act_cache
    else
        python train_adaptive.py --model=$MODEL --target_param_ratio=$COMP --eval_full --batch_size=$BATCH_SIZE --lr=$LR --num_train_samples=$NUM_TRAIN_SAMPLES --exp_name=$EXP_NAME --max_length=$MAX_LEN --cache_dir=$CACHE_DIR --eval_freq_steps=500 --eval_batch_size=$EVAL_BS --alpha=0.5 --lambda=$LAMBDA --gamma=$GAMMA --act_aware=$ACT_AWARE --load_act_cache --layer_type=$LTYPE --beta_scale=$BETA --r_loss=$R_LOSS
    fi
done