A fork from fairseq. Please see Fairseq how to install.
fairseq-train \
$TRAIN_DIR \
--save-dir checkpoints/levenshtein_transformer_mt_prev_target \
--ddp-backend=no_c10d \
--task ape_lev \
--criterion nat_loss \
--arch levenshtein_transformer \
--noise random_delete \
--share-all-embeddings \
--optimizer adam --adam-betas '(0.9,0.98)' \
--lr 0.0005 --lr-scheduler inverse_sqrt \
--min-lr '1e-09' --warmup-updates 10000 \
--warmup-init-lr '1e-07' --label-smoothing 0.1 \
--dropout 0.3 --weight-decay 0.01 \
--decoder-learned-pos \
--encoder-learned-pos \
--apply-bert-init \
--fixed-validation-seed 7 \
--max-tokens 8000 --update-freq 8 \
--save-interval-updates 10000 --no-epoch-checkpoints \
--max-update 300000 --source-lang src --target-lang pe --fp16 \
--num-workers 16 --prev-target mt\
fairseq-train \
$TRAIN_DIR \
--save-dir checkpoints/multisource_levenshtein_transformer \
--ddp-backend=no_c10d \
--task ape_lev \
--criterion nat_loss \
--arch multisource_levenshtein_transformer \
--noise random_delete \
--share-all-embeddings \
--optimizer adam --adam-betas '(0.9,0.98)' \
--lr 0.0005 --lr-scheduler inverse_sqrt \
--min-lr '1e-09' --warmup-updates 10000 \
--warmup-init-lr '1e-07' --label-smoothing 0.1 \
--dropout 0.3 --weight-decay 0.01 \
--decoder-learned-pos \
--encoder-learned-pos \
--apply-bert-init \
--fixed-validation-seed 7 \
--max-tokens 8000 --update-freq 8 \
--save-interval-updates 10000 --no-epoch-checkpoints \
--max-update 300000 --source-lang src --target-lang pe --fp16 \
--num-workers 16 --input-type multisource
fairseq-train \
$TRAIN_DIR \
--save-dir checkpoints/multisource_transformer \
--arch multisource_transformer --share-decoder-input-output-embed \
--task ape \
--optimizer adam --adam-betas '(0.9, 0.98)' --clip-norm 0.0 \
--lr 5e-4 --lr-scheduler inverse_sqrt --warmup-updates 4000 \
--dropout 0.3 --weight-decay 0.0001 \
--criterion label_smoothed_cross_entropy --label-smoothing 0.1 \
--max-tokens 8000 --update-freq 8 \
--save-interval-updates 10000 --no-epoch-checkpoints \
--max-update 300000 --source-lang src --target-lang pe --fp16 \
--num-workers 16 --input-type multisource