CLUEbenchmark / CLUE

中文语言理解测评基准 Chinese Language Understanding Evaluation Benchmark: datasets, baselines, pre-trained models, corpus and leaderboard
http://www.CLUEbenchmarks.com
4.02k stars 540 forks source link

import 路径问题 #46

Closed Vimos closed 4 years ago

Vimos commented 4 years ago

https://github.com/CLUEbenchmark/CLUE/blob/6d2cb75607d85d4902d7f05880bb91bb3749dc4b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py#L35-L40

baselines.models_pytorch.mrc_pytorch. 应该移除,在没有IDE的环境中,baselines并不是一个package

Vimos commented 4 years ago

如果你运行CHID出现问题,请参考下面的patch。

± |master U:6 ?:9 ✗| → git diff
diff --git a/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py b/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
index bcccbc9..3760ad1 100755
--- a/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
+++ b/baselines/models_pytorch/mrc_pytorch/pytorch_modeling.py
@@ -29,7 +29,7 @@ import torch
 from torch import nn
 from torch.nn import CrossEntropyLoss

-from .tools.file_utils import cached_path
+from tools.file_utils import cached_path

 logger = logging.getLogger(__name__)

diff --git a/baselines/models_pytorch/mrc_pytorch/run_mrc.py b/baselines/models_pytorch/mrc_pytorch/run_mrc.py
index 9078f58..9f281f1 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/run_mrc.py
@@ -10,12 +10,12 @@ from torch import nn
 from torch.utils.data import TensorDataset, DataLoader
 from tqdm import tqdm

-from baselines.models_pytorch.mrc_pytorch.preprocess.cmrc2018_evaluate import get_eval
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForQuestionAnswering, ALBertConfig, \
-    ALBertForQA
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMRC
-from baselines.models_pytorch.mrc_pytorch.tools import official_tokenization as tokenization, utils
-from baselines.models_pytorch.mrc_pytorch.tools.pytorch_optimization import get_optimization, warmup_linear
+from preprocess.cmrc2018_evaluate import get_eval
+from pytorch_modeling import BertConfig, BertForQuestionAnswering, ALBertConfig, \
+    A
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMRC
+from tools import official_tokenization as tokenization, utils
+from tools.pytorch_optimization import get_optimization, warmup_linear

 def evaluate(model, args, eval_examples, eval_features, device, global_steps, best_f1, best_em, best_f1_em):
diff --git a/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh b/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
index 6aa2d37..6ce068e 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
+++ b/baselines/models_pytorch/mrc_pytorch/run_mrc_chid.sh
@@ -10,11 +10,11 @@ TASK_NAME="CHID"
 python run_multichoice_mrc.py \
   --gpu_ids="0,1,2,3" \
   --num_train_epochs=3 \
-  --train_batch_size=16 \
+  --train_batch_size=4 \
   --predict_batch_size=16 \
   --learning_rate=2e-5 \
   --warmup_proportion=0.06 \
-  --max_seq_length=64 \
+  --max_seq_length=128 \
   --vocab_file=$BERT_DIR/vocab.txt \
   --bert_config_file=$BERT_DIR/bert_config.json \
   --init_restore_dir=$BERT_DIR/pytorch_model.pth \
diff --git a/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
index 4ecdd36..02177a4 100644
--- a/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/run_multichoice_mrc.py
@@ -32,13 +32,13 @@ import torch
 from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
 from tqdm import tqdm

-from baselines.models_pytorch.mrc_pytorch.preprocess.CHID_preprocess import RawResult, get_final_predictions, \
-    write_predictions, generate_input, evaluate
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.tools.official_tokenization import BertTokenizer
-from baselines.models_pytorch.mrc_pytorch.tools.pytorch_optimization import get_optimization, warmup_linear
+from preprocess.CHID_preprocess import RawResult, get_final_predictions, \
+    evaluate, generate_input
+from pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
+from pytorch_modeling import BertConfig, BertForMultipleChoice
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
+from tools.official_tokenization import BertTokenizer
+from tools.pytorch_optimization import get_optimization, warmup_linear

 def reset_model(args, bert_config, model_cls):
@@ -122,7 +122,7 @@ def main():
     print(args)
     os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu_ids

-    device = torch.device("cuda" if torch.cuda.is_available() and not args.no_cuda else "cpu")
+    device = torch.device("cuda" if torch.cuda.is_available()  else "cpu")
     n_gpu = torch.cuda.device_count()
     print("device: {} n_gpu: {}, 16-bits training: {}".format(device, n_gpu, args.fp16))

diff --git a/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py b/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
index 27cb0e5..f58c10e 100755
--- a/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
+++ b/baselines/models_pytorch/mrc_pytorch/test_multichoice_mrc.py
@@ -7,12 +7,12 @@ import torch
 from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
 from tqdm import tqdm

-from baselines.models_pytorch.mrc_pytorch.preprocess.CHID_preprocess import RawResult, get_final_predictions, \
-    InputFeatures, write_predictions, generate_input
-from baselines.models_pytorch.mrc_pytorch.google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.pytorch_modeling import BertConfig, BertForMultipleChoice
-from baselines.models_pytorch.mrc_pytorch.tools.official_tokenization import BertTokenizer
+from preprocess.CHID_preprocess import RawResult, get_final_predictions, \
+    generate_input
+from google_albert_pytorch_modeling import AlbertConfig, AlbertForMultipleChoice
+from pytorch_modeling import ALBertConfig, ALBertForMultipleChoice
+from pytorch_modeling import BertConfig, BertForMultipleChoice
+from tools.official_tokenization import BertTokenizer
 from glob import glob

diff --git a/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py b/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
index 194be30..868ad33 100755
--- a/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
+++ b/baselines/models_pytorch/mrc_pytorch/tools/official_tokenization.py
@@ -24,7 +24,7 @@ import os
 import logging
 import six

-from models.file_utils import cached_path
+from .file_utils import cached_path

 logger = logging.getLogger(__name__)

@@ -385,4 +385,4 @@ def _is_punctuation(char):
     cat = unicodedata.category(char)
     if cat.startswith("P"):
         return True
-    return False
\ No newline at end of file
+    return False
brightmart commented 4 years ago

谢谢