Riccorl / transformer-srl

Reimplementation of a BERT based model (Shi et al, 2019), currently the state-of-the-art for English SRL. This model implements also predicate disambiguation.
69 stars 9 forks source link

spaCy errors encountered while trying out transformer-srl library on Win 10, Python 3.8 #16

Closed pragyakatyayan closed 2 years ago

pragyakatyayan commented 2 years ago

I successfully installed transformers-srl library using my Jupyter-notebook terminal. However, I encountered the following error while trying to run the following code. I am using a Windows 10 system with Python- 3.8.

code:

from transformer_srl import dataset_readers, models, predictors

predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

error with spacy version- 2.2.4

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-1-27de09498ecb> in <module>
      1 from transformer_srl import dataset_readers, models, predictors
      2 
----> 3 predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
      4 predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in from_path(cls, archive_path, predictor_name, cuda_device, dataset_reader_to_load, frozen, import_plugins, language, restrict_frames, restrict_roles)
    153         if import_plugins:
    154             plugins.import_plugins()
--> 155         return SrlTransformersPredictor.from_archive(
    156             load_archive(archive_path, cuda_device=cuda_device),
    157             predictor_name,

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in from_archive(cls, archive, predictor_name, dataset_reader_to_load, frozen, language, restrict_frames, restrict_roles)
    197             model.eval()
    198 
--> 199         return predictor_class(model, dataset_reader, language)

~\anaconda3\lib\site-packages\transformer_srl\predictors.py in __init__(self, model, dataset_reader, language)
     20         self, model: Model, dataset_reader: DatasetReader, language: str = "en_core_web_sm",
     21     ) -> None:
---> 22         super().__init__(model, dataset_reader, language)
     23 
     24     @staticmethod

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\srl.py in __init__(self, model, dataset_reader, language)
     22     ) -> None:
     23         super().__init__(model, dataset_reader)
---> 24         self._tokenizer = SpacyTokenizer(language=language, pos_tags=True)
     25 
     26     def predict(self, sentence: str) -> JsonDict:

~\anaconda3\lib\site-packages\allennlp\data\tokenizers\spacy_tokenizer.py in __init__(self, language, pos_tags, parse, ner, keep_spacy_tokens, split_on_spaces, start_tokens, end_tokens)
     61         end_tokens: Optional[List[str]] = None,
     62     ) -> None:
---> 63         self.spacy = get_spacy_model(language, pos_tags, parse, ner)
     64         if split_on_spaces:
     65             self.spacy.tokenizer = _WhitespaceSpacyTokenizer(self.spacy.vocab)

~\anaconda3\lib\site-packages\allennlp\common\util.py in get_spacy_model(spacy_model_name, pos_tags, parse, ner)
    273             disable.append("ner")
    274         try:
--> 275             spacy_model = spacy.load(spacy_model_name, disable=disable)
    276         except OSError:
    277             logger.warning(

~\AppData\Roaming\Python\Python38\site-packages\spacy\__init__.py in load(name, **overrides)
     28     if depr_path not in (True, False, None):
     29         deprecation_warning(Warnings.W001.format(path=depr_path))
---> 30     return util.load_model(name, **overrides)
     31 
     32 

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model(name, **overrides)
    162             return load_model_from_link(name, **overrides)
    163         if is_package(name):  # installed as package
--> 164             return load_model_from_package(name, **overrides)
    165         if Path(name).exists():  # path to model data directory
    166             return load_model_from_path(Path(name), **overrides)

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_package(name, **overrides)
    183     """Load a model from an installed package."""
    184     cls = importlib.import_module(name)
--> 185     return cls.load(**overrides)
    186 
    187 

~\anaconda3\lib\site-packages\en_core_web_sm\__init__.py in load(**overrides)
      8 
      9 def load(**overrides):
---> 10     return load_model_from_init_py(__file__, **overrides)

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_init_py(init_file, **overrides)
    226     if not model_path.exists():
    227         raise IOError(Errors.E052.format(path=path2str(data_path)))
--> 228     return load_model_from_path(data_path, meta, **overrides)
    229 
    230 

~\AppData\Roaming\Python\Python38\site-packages\spacy\util.py in load_model_from_path(model_path, meta, **overrides)
    207             config = meta.get("pipeline_args", {}).get(name, {})
    208             factory = factories.get(name, name)
--> 209             component = nlp.create_pipe(factory, config=config)
    210             nlp.add_pipe(component, name=name)
    211     return nlp.from_disk(model_path, exclude=disable)

~\AppData\Roaming\Python\Python38\site-packages\spacy\language.py in create_pipe(self, name, config)
    298                 raise KeyError(Errors.E108.format(name=name))
    299             else:
--> 300                 raise KeyError(Errors.E002.format(name=name))
    301         factory = self.factories[name]
    302         return factory(self, **config)

KeyError: "[E002] Can't find factory for 'tok2vec'. This usually happens when spaCy calls `nlp.create_pipe` with a component name that's not built in - for example, when constructing the pipeline from a model's meta.json. If you're using a custom component, you can write to `Language.factories['tok2vec']` or remove it from the model meta and add it via `nlp.add_pipe` instead."

I tried to upgrade to spacy-3.1.1, but got the following error:

---------------------------------------------------------------------------
ModuleNotFoundError                       Traceback (most recent call last)
<ipython-input-1-27de09498ecb> in <module>
----> 1 from transformer_srl import dataset_readers, models, predictors
      2 
      3 predictor = predictors.SrlTransformersPredictor.from_path("D:/srl_bert_base_conll2012.tar.gz", "transformer_srl")
      4 predictor.predict(sentence="Did Uriah honestly think he could beat the game in under three hours?")

~\anaconda3\lib\site-packages\transformer_srl\dataset_readers.py in <module>
     12 from allennlp.data.tokenizers import Token
     13 from allennlp_models.common.ontonotes import Ontonotes, OntonotesSentence
---> 14 from allennlp_models.structured_prediction import SrlReader
     15 from conllu import parse_incr
     16 from nltk import Tree

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\__init__.py in <module>
      1 # flake8: noqa: F403
----> 2 from allennlp_models.structured_prediction.predictors import *
      3 from allennlp_models.structured_prediction.dataset_readers import *
      4 from allennlp_models.structured_prediction.metrics import *
      5 from allennlp_models.structured_prediction.models import *

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\__init__.py in <module>
      2     BiaffineDependencyParserPredictor,
      3 )
----> 4 from allennlp_models.structured_prediction.predictors.constituency_parser import (
      5     ConstituencyParserPredictor,
      6 )

~\anaconda3\lib\site-packages\allennlp_models\structured_prediction\predictors\constituency_parser.py in <module>
      3 from overrides import overrides
      4 from nltk import Tree
----> 5 from spacy.lang.en.tag_map import TAG_MAP
      6 
      7 from allennlp.common.util import JsonDict, sanitize

ModuleNotFoundError: No module named 'spacy.lang.en.tag_map'

I am not sure, how to resolve the issue. Kindly help. Any help is deeply appreciated. Thanks in advance.

Riccorl commented 2 years ago

Hi, requirements says you need spaCy 2.3.x. Can you try to update it from 2.2.4 to 2.3? Let me know if this solves your issue.

pragyakatyayan commented 2 years ago

Thanks for replying. I downgraded spaCy to version 2.3.0, but I am still getting keyerror-

KeyError: "[E002] Can't find factory for 'tok2vec'. This usually happens when spaCy calls `nlp.create_pipe` with a component name that's not built in - for example, when constructing the pipeline from a model's meta.json. If you're using a custom component, you can write to `Language.factories['tok2vec']` or remove it from the model meta and add it via `nlp.add_pipe` instead."

UPDATE: However, I downloaded the language model again for 2.3.0 using python -m spacy download en_core_web_sm and it resolved the error. I got the following output:

{'verbs': [{'verb': 'Did',
   'description': '[do.01: Did] Uriah honestly think he could beat the game in under three hours ?',
   'tags': ['B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O'],
   'frame': 'do.01',
   'frame_score': 0.9999996423721313,
   'lemma': 'do'},
  {'verb': 'think',
   'description': 'Did [ARG0: Uriah] [ARGM-ADV: honestly] [think.01: think] [ARG1: he could beat the game in under three hours] ?',
   'tags': ['O',
    'B-ARG0',
    'B-ARGM-ADV',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'I-ARG1',
    'O'],
   'frame': 'think.01',
   'frame_score': 1.0,
   'lemma': 'think'},
  {'verb': 'could',
   'description': 'Did Uriah honestly think he [go.04: could] beat the game in under three hours ?',
   'tags': ['O',
    'O',
    'O',
    'O',
    'O',
    'B-V',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O',
    'O'],
   'frame': 'go.04',
   'frame_score': 0.10186543315649033,
   'lemma': 'could'},
  {'verb': 'beat',
   'description': 'Did Uriah honestly think [ARG0: he] [ARGM-MOD: could] [beat.03: beat] [ARG1: the game] [ARGM-TMP: in under three hours] ?',
   'tags': ['O',
    'O',
    'O',
    'O',
    'B-ARG0',
    'B-ARGM-MOD',
    'B-V',
    'B-ARG1',
    'I-ARG1',
    'B-ARGM-TMP',
    'I-ARGM-TMP',
    'I-ARGM-TMP',
    'I-ARGM-TMP',
    'O'],
   'frame': 'beat.03',
   'frame_score': 0.9999936819076538,
   'lemma': 'beat'}],
 'words': ['Did',
  'Uriah',
  'honestly',
  'think',
  'he',
  'could',
  'beat',
  'the',
  'game',
  'in',
  'under',
  'three',
  'hours',
  '?']}
Riccorl commented 2 years ago

Glad it worked :)