It is very incredible that the version using pre-trained word embedding is worse than the one using random word embedding. I don't know if I had wrong configurations, configurations are as follows:
train.json:
{
"encoder": "gru",
"encoder_dim": 1200,
"bidir": true,
"case_sensitive": true,
"checkpoint_path": "",
"vocab_configs": [
{
"mode": "fixed",
"name": "word_embedding",
"cap": false,
"dim": 200,
"size": 1133884,
"vocab_file": "/nfs/private/FST/models/embeddings/glove.840B.300d.txt",
"embs_file": ""
}
]
}
It is very incredible that the version using pre-trained word embedding is worse than the one using random word embedding. I don't know if I had wrong configurations, configurations are as follows: train.json: { "encoder": "gru", "encoder_dim": 1200, "bidir": true, "case_sensitive": true, "checkpoint_path": "", "vocab_configs": [ { "mode": "fixed", "name": "word_embedding", "cap": false, "dim": 200, "size": 1133884, "vocab_file": "/nfs/private/FST/models/embeddings/glove.840B.300d.txt", "embs_file": "" } ] }
run.sh: RESULTS_HOME="results" MDL_CFGS="model_configs" GLOVE_PATH="/nfs/private/FST/models/embeddings/"
DATA_DIR="data/CS_10M_pretrained/TFRecords" NUM_INST=10000000 # Number of sentences
CFG="CS_10M_pretrained"
BS=400 SEQ_LEN=30
export CUDA_VISIBLE_DEVICES=0 python src/train.py \ --input_file_pattern="$DATA_DIR/train-?????-of-00100" \ --train_dir="$RESULTS_HOME/$CFG/train" \ --learning_rate_decay_factor=0 \ --batch_size=$BS \ --sequence_length=$SEQ_LEN \ --nepochs=1 \ --num_train_inst=$NUM_INST \ --save_model_secs=1800 \ --Glove_path=$GLOVE_PATH \ --model_config="$MDL_CFGS/$CFG/train.json" &
export CUDA_VISIBLE_DEVICES=1 python src/eval.py \ --input_file_pattern="$DATA_DIR/validation-?????-of-00001" \ --checkpoint_dir="$RESULTS_HOME/$CFG/train" \ --eval_dir="$RESULTS_HOME/$CFG/eval" \ --batch_size=$BS \ --model_config="$MDL_CFGS/$CFG/train.json" \ --eval_interval_secs=1800 \ --sequence_length=$SEQ_LEN &