Open raki-1203 opened 3 years ago
1기에 사용한 모델 거의 그대로 사용해봤는데 성적이 좋아지질 않네요 데이터셋 자체에서 좀 걸러지는게 있어야 성능이 좋아질지는 테스트 해봐야겠습니다.
roberta-small with convolution batch 128 python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-small --run_name roberta-small_cnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model convolution
roberta-small with convolution batch 128
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-small --run_name roberta-small_cnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model convolution
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-small_cnn_batch_128_concat_5 --run_name roberta-small_cnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
> roberta-large with convolution batch 128
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_cnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model convolution
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_cnn_batch_128_concat_5 --run_name roberta-large_cnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
> roberta-large with convolution batch 160
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_cnn_batch_160_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 10 --num_train_epochs 20 --additional_model convolution
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_cnn_batch_160_concat_5 --run_name roberta-large_cnn_batch_160_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
> roberta-large with QAconvolution batch 128
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_qacnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model qa_conv
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_qacnn_batch_128_concat_5 --run_name roberta-large_qacnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model qa_conv
> roberta-large with QAconvolution_ver2 batch 128
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_qacnn2_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model qa_conv_ver2
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_qacnn2_batch_128_concat_5 --run_name roberta-large_qacnn2_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model qa_conv_ver2
> roberta-large random_concat batch 128
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_batch_128_random_concat --with_inference False --dataset_name random_concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_batch_128_random_concat --run_name roberta-large_batch_128_random_concat --elastic_index_name preprocess-wiki-index
고생하셨습니다.
결과
1기에 사용한 모델 거의 그대로 사용해봤는데 성적이 좋아지질 않네요 데이터셋 자체에서 좀 걸러지는게 있어야 성능이 좋아질지는 테스트 해봐야겠습니다.
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-small_cnn_batch_128_concat_5 --run_name roberta-small_cnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_cnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8
--num_train_epochs 20 --additional_model convolution
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_cnn_batch_128_concat_5 --run_name roberta-large_cnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_cnn_batch_160_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 10
--num_train_epochs 20 --additional_model convolution
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_cnn_batch_160_concat_5 --run_name roberta-large_cnn_batch_160_concat_5 --elastic_index_name preprocess-wiki-index --additional_model convolution
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_qacnn_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model qa_conv
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_qacnn_batch_128_concat_5 --run_name roberta-large_qacnn_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model qa_conv
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_qacnn2_batch_128_concat_5 --with_inference False --dataset_name concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20 --additional_model qa_conv_ver2
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_qacnn2_batch_128_concat_5 --run_name roberta-large_qacnn2_batch_128_concat_5 --elastic_index_name preprocess-wiki-index --additional_model qa_conv_ver2
python train.py --do_train --project_name mrc_concat_data_train --model_name_or_path klue/roberta-large --run_name roberta-large_batch_128_random_concat --with_inference False --dataset_name random_concat --per_device_train_batch_size 16 --gradient_accumulation_steps 8 --num_train_epochs 20
python inference.py --do_predict --project_name mrc_concat_data_train --finetuned_mrc_model_path ../output/mrc_concat_data_train/roberta-large_batch_128_random_concat --run_name roberta-large_batch_128_random_concat
--elastic_index_name preprocess-wiki-index