Closed Vimos closed 4 years ago
如果你运行CHID出现问题,请参考下面的patch。
± |master U:6 ?:9 ✗| → git diff
diff --git a/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py b/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
index bcccbc9..3760ad1 100755
--- a/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
+++ b/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
@@ -29,7 +29,7 @@ import torch
from torch import nn
from torch.nn import CrossEntropyLoss
-from .tools.file_utils import cached_path
+from tools.file_utils import cached_path
logger = logging.getLogger(__name__)
diff --git a/baselines/models_pytorch/mrc_pytorch/run_mrc.py b/baselines/models_pytorch/mrc_pytorch/run_mrc.py
index 9078f58..9f281f1 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/run_mrc.py
@@ -10,12 +10,12 @@ from torch import nn
from torch.utils.data import TensorDataset, DataLoader
from tqdm import tqdm
-from baselines.models_pytorch.mrc_pytorch.preprocess.cmrc2018_evaluate import get_eval
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForQuestionAnswering, ALBertConfig, \
- ALBertForQA
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMRC
-from baselines.models_pytorch.mrc_pytorch.tools import official_tokenization as tokenization, utils
-from baselines.models_pytorch.mrc_pytorch.tools.pytorch_optimization import get_optimization, warmup_linear
+from preprocess.cmrc2018_evaluate import get_eval
+from pytorch_modeling import BertConfig, BertForQuestionAnswering, ALBertConfig, \
+ A
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMRC
+from tools import official_tokenization as tokenization, utils
+from tools.pytorch_optimization import get_optimization, warmup_linear
def evaluate(model, args, eval_examples, eval_features, device, global_steps, best_f1, best_em, best_f1_em):
diff --git a/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh b/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
index 6aa2d37..6ce068e 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
+++ b/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
@@ -10,11 +10,11 @@ TASK_NAME="CHID"
python run_multichoice_mrc.py \
--gpu_ids="0,1,2,3" \
--num_train_epochs=3 \
- --train_batch_size=16 \
+ --train_batch_size=4 \
--predict_batch_size=16 \
--learning_rate=2e-5 \
--warmup_proportion=0.06 \
- --max_seq_length=64 \
+ --max_seq_length=128 \
--vocab_file=$BERT_DIR/vocab.txt \
--bert_config_file=$BERT_DIR/bert_config.json \
--init_restore_dir=$BERT_DIR/pytorch_model.pth \
diff --git a/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
index 4ecdd36..02177a4 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
@@ -32,13 +32,13 @@ import torch
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from tqdm import tqdm
-from baselines.models_pytorch.mrc_pytorch.preprocess.CHID_preprocess import RawResult, get_final_predictions, \
- write_predictions, generate_input, evaluate
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.tools.official_tokenization import BertTokenizer
-from baselines.models_pytorch.mrc_pytorch.tools.pytorch_optimization import get_optimization, warmup_linear
+from preprocess.CHID_preprocess import RawResult, get_final_predictions, \
+ evaluate, generate_input
+from pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
+from pytorch_modeling import BertConfig, BertForMultipleChoice
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
+from tools.official_tokenization import BertTokenizer
+from tools.pytorch_optimization import get_optimization, warmup_linear
def reset_model(args, bert_config, model_cls):
@@ -122,7 +122,7 @@ def main():
print(args)
os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids
- device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
n_gpu = torch.cuda.device_count()
print("device: {} n_gpu: {}, 16-bits training: {}".format(device, n_gpu, args.fp16))
diff --git a/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py b/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
index 27cb0e5..f58c10e 100755
--- a/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
@@ -7,12 +7,12 @@ import torch
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from tqdm import tqdm
-from baselines.models_pytorch.mrc_pytorch.preprocess.CHID_preprocess import RawResult, get_final_predictions, \
- InputFeatures, write_predictions, generate_input
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.tools.official_tokenization import BertTokenizer
+from preprocess.CHID_preprocess import RawResult, get_final_predictions, \
+ generate_input
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
+from pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
+from pytorch_modeling import BertConfig, BertForMultipleChoice
+from tools.official_tokenization import BertTokenizer
from glob import glob
diff --git a/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py b/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
index 194be30..868ad33 100755
--- a/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
+++ b/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
@@ -24,7 +24,7 @@ import os
import logging
import six
-from models.file_utils import cached_path
+from .file_utils import cached_path
logger = logging.getLogger(__name__)
@@ -385,4 +385,4 @@ def _is_punctuation(char):
cat = unicodedata.category(char)
if cat.startswith("P"):
return True
- return False
\ No newline at end of file
+ return False
谢谢
https://github.com/CLUEbenchmark/CLUE/blob/6d2cb75607d85d4902d7f05880bb91bb3749dc4b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py#L35-L40
baselines.models_pytorch.mrc_pytorch.
应该移除,在没有IDE的环境中,baselines
并不是一个package