CambridgeMolecularEngineering / chemdataextractor2

ChemDataExtractor Version 2.0
Other
121 stars 28 forks source link

failed to execute doc.cem #43

Closed christina0106 closed 1 year ago

christina0106 commented 1 year ago

doc = Document('UV-vis spectrum of 5,10,15,20-Tetra(4-carboxyphenyl)porphyrin in Tetrahydrofuran (THF).') doc.cems

Error information: tialising AllenNLP model ..

EOFError Traceback (most recent call last) ~\AppData\Local\Temp\ipykernel_24828\270517424.py in ----> 1 doc.cems

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\document.py in cems(self) 564 A list of all Chemical Entity Mentions in this document as :class:~chemdataextractor.doc.text.Span 565 """ --> 566 return list(set([n for el in self.elements for n in el.cems])) 567 568 @property

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\document.py in (.0) 564 A list of all Chemical Entity Mentions in this document as :class:~chemdataextractor.doc.text.Span 565 """ --> 566 return list(set([n for el in self.elements for n in el.cems])) 567 568 @property

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in cems(self) 344 A list of all Chemical Entity Mentions in this text as :class:chemdataextractor.doc.text.span 345 """ --> 346 return [cem for sent in self.sentences for cem in sent.cems] 347 348 @property

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in (.0) 344 A list of all Chemical Entity Mentions in this text as :class:chemdataextractor.doc.text.span 345 """ --> 346 return [cem for sent in self.sentences for cem in sent.cems] 347 348 @property

~.conda\envs\python37\lib\site-packages\chemdataextractor\utils.py in fget_memoized(self) 27 def fget_memoized(self): 28 if not hasattr(self, attr_name): ---> 29 setattr(self, attr_name, fget(self)) 30 return getattr(self, attr_name) 31 return property(fget_memoized)

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in cems(self) 642 spans = [] 643 # print(self.text.encode('utf8')) --> 644 for result in chemical_name.scan(self.tokens): 645 # parser scan yields (result, startindex, endindex) - we just use the indexes here 646 tokens = self.tokens[result[1]:result[2]]

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in scan(self, tokens, max_matches, overlap) 115 while i < length and matches < max_matches: 116 try: --> 117 results, next_i = self.parse(tokens, i) 118 except ParseException as err: 119 # print(err.msg)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in parse(self, tokens, i, actions) 144 """ 145 try: --> 146 result, found_index = self._parse_tokens(tokens, i, actions) 147 except IndexError: 148 raise ParseException(tokens, i, 'IndexError', self)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in _parse_tokens(self, tokens, i, actions) 425 results = [] 426 for e in self.exprs: --> 427 exprresults, i = e.parse(tokens, i) 428 if exprresults is not None: 429 results.extend(exprresults)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in parse(self, tokens, i, actions) 144 """ 145 try: --> 146 result, found_index = self._parse_tokens(tokens, i, actions) 147 except IndexError: 148 raise ParseException(tokens, i, 'IndexError', self)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in _parse_tokens(self, tokens, i, actions) 682 results = [] 683 try: --> 684 results, i = self.expr.parse(tokens, i, actions) 685 except (ParseException, IndexError): 686 pass

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in parse(self, tokens, i, actions) 144 """ 145 try: --> 146 result, found_index = self._parse_tokens(tokens, i, actions) 147 except IndexError: 148 raise ParseException(tokens, i, 'IndexError', self)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in _parse_tokens(self, tokens, i, actions) 425 results = [] 426 for e in self.exprs: --> 427 exprresults, i = e.parse(tokens, i) 428 if exprresults is not None: 429 results.extend(exprresults)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in parse(self, tokens, i, actions) 144 """ 145 try: --> 146 result, found_index = self._parse_tokens(tokens, i, actions) 147 except IndexError: 148 raise ParseException(tokens, i, 'IndexError', self)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in _parse_tokens(self, tokens, i, actions) 628 def _parse_tokens(self, tokens, i, actions=True): 629 try: --> 630 self.expr.try_parse(tokens, i) 631 except (ParseException, IndexError): 632 pass

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in try_parse(self, tokens, i) 158 159 def try_parse(self, tokens, i): --> 160 return self.parse(tokens, i, actions=False)[1] 161 162 def _parse_tokens(self, tokens, i, actions=True):

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in parse(self, tokens, i, actions) 144 """ 145 try: --> 146 result, found_index = self._parse_tokens(tokens, i, actions) 147 except IndexError: 148 raise ParseException(tokens, i, 'IndexError', self)

~.conda\envs\python37\lib\site-packages\chemdataextractor\parse\elements.py in _parse_tokens(self, tokens, i, actions) 295 def _parse_tokens(self, tokens, i, actions=True): 296 token = tokens[i] --> 297 tag = token[self.tag_type] 298 if tag == self.match: 299 return [E(self.name or safe_name(tag), token[0])], i + 1

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in getitem(self, key) 1073 return self.text 1074 elif key == 1: -> 1075 return self.legacy_pos_tag 1076 elif isinstance(key, str): 1077 return self.getattr(key)

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in legacy_pos_tag(self) 1063 def legacy_pos_tag(self): 1064 pos_tag = self[POS_TAG_TYPE] -> 1065 ner_tag = self[NER_TAG_TYPE] 1066 if ner_tag is not None and ner_tag != "O": 1067 return ner_tag

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in getitem(self, key) 1075 return self.legacy_pos_tag 1076 elif isinstance(key, str): -> 1077 return self.getattr(key) 1078 else: 1079 raise IndexError("Key" + str(key) + " is out of bounds for this token.")

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in getattr(self, name) 1083 return self._tags[name] 1084 else: -> 1085 self.sentence._assign_tags(name) 1086 if name not in self._tags.keys(): 1087 raise AttributeError(name + " is not a supported tag type for the sentence: " + str(self.sentence) + str(self.sentence.taggers) + str(type(self.sentence))

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\text.py in _assign_tags(self, tag_type) 788 tags = None 789 if hasattr(tagger, "batch_tag_for_type") and tagger.can_batch_tag(tag_type) and self.document is not None: --> 790 self.document._batch_assign_tags(tagger, tag_type) 791 elif hasattr(tagger, "tag_for_type"): 792 tags = tagger.tag_for_type(self.tokens, tag_type)

~.conda\envs\python37\lib\site-packages\chemdataextractor\doc\document.py in _batch_assign_tags(self, tagger, tag_type) 621 622 if hasattr(tagger, "batch_tag_for_type"): --> 623 tag_results = tagger.batch_tag_for_type(all_tokens, tag_type) 624 else: 625 tag_results = tagger.batch_tag(all_tokens)

~.conda\envs\python37\lib\site-packages\chemdataextractor\nlp\tag.py in batch_tag_for_type(self, sents, tag_type) 204 """ 205 tagger = self.taggers_dict[tag_type] --> 206 return tagger.batch_tag(sents) 207 208 def can_batch_tag(self, tag_type):

~.conda\envs\python37\lib\site-packages\chemdataextractor\nlp\allennlpwrapper.py in batch_tag(self, sents) 193 log.debug("".join(["Batch size:", str(len(instance))])) 194 with torch.no_grad(): --> 195 batch_predictions = self.predictor.predict_batch_instance(instance) 196 predictions.extend(batch_predictions) 197 prediction_end_time = datetime.datetime.now()

~.conda\envs\python37\lib\site-packages\chemdataextractor\nlp\allennlpwrapper.py in predictor(self) 152 gpu_id = torch.cuda.current_device() 153 loaded_archive = load_archive(archive_file=self._archive_location, weights_file=self._weights_location, --> 154 overrides=json.dumps(self.overrides)) 155 model = loaded_archive.model 156 if gpu_id is not None and gpu_id >= 0:

~.conda\envs\python37\lib\site-packages\allennlp\models\archival.py in load_archive(archive_file, cuda_device, overrides, weights_file) 228 weights_file=weights_path, 229 serialization_dir=serialization_dir, --> 230 cuda_device=cuda_device) 231 232 return Archive(model=model, config=config)

~.conda\envs\python37\lib\site-packages\allennlp\models\model.py in load(cls, config, serialization_dir, weights_file, cuda_device) 325 # This allows subclasses of Model to override _load. 326 # pylint: disable=protected-access --> 327 return cls.by_name(model_type)._load(config, serialization_dir, weights_file, cuda_device) 328 329 def extend_embedder_vocab(self, embedding_sources_mapping: Dict[str, str] = None) -> None:

~.conda\envs\python37\lib\site-packages\allennlp\models\model.py in _load(cls, config, serialization_dir, weights_file, cuda_device) 263 # want the code to look for it, so we remove it from the parameters here. 264 remove_pretrained_embedding_params(model_params) --> 265 model = Model.from_params(vocab=vocab, params=model_params) 266 267 # If vocab+embedding extension was done, the model initialized from from_params

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in from_params(cls, params, extras) 363 # We want to call subclass.from_params 364 extras = create_extras(subclass, extras) --> 365 return subclass.from_params(params=params, extras) 366 else: 367 # In some rare cases, we get a registered subclass that does not have a

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in from_params(cls, params, extras) 384 else: 385 # This class has a constructor, so create kwargs for it. --> 386 kwargs = create_kwargs(cls, params, extras) 387 388 return cls(**kwargs) # type: ignore

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in create_kwargs(cls, params, extras) 131 # and an args field indicating (str, int). We capture both. 132 annotation = remove_optional(param.annotation) --> 133 kwargs[name] = construct_arg(cls, name, annotation, param.default, params, extras) 134 135 params.assert_empty(cls.name)

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in construct_arg(cls, param_name, annotation, default, params, extras) 227 return annotation.by_name(subparams)() 228 else: --> 229 return annotation.from_params(params=subparams, subextras) 230 elif not optional: 231 # Not optional and not supplied, that's an error!

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in from_params(cls, params, extras) 363 # We want to call subclass.from_params 364 extras = create_extras(subclass, extras) --> 365 return subclass.from_params(params=params, extras) 366 else: 367 # In some rare cases, we get a registered subclass that does not have a

~.conda\envs\python37\lib\site-packages\allennlp\modules\text_field_embedders\basic_text_field_embedder.py in from_params(cls, vocab, params) 158 token_embedders = { 159 name: TokenEmbedder.from_params(subparams, vocab=vocab) --> 160 for name, subparams in token_embedder_params.items() 161 } 162

~.conda\envs\python37\lib\site-packages\allennlp\modules\text_field_embedders\basic_text_field_embedder.py in (.0) 158 token_embedders = { 159 name: TokenEmbedder.from_params(subparams, vocab=vocab) --> 160 for name, subparams in token_embedder_params.items() 161 } 162

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in from_params(cls, params, extras) 363 # We want to call subclass.from_params 364 extras = create_extras(subclass, extras) --> 365 return subclass.from_params(params=params, extras) 366 else: 367 # In some rare cases, we get a registered subclass that does not have a

~.conda\envs\python37\lib\site-packages\allennlp\common\from_params.py in from_params(cls, params, extras) 386 kwargs = create_kwargs(cls, params, extras) 387 --> 388 return cls(**kwargs) # type: ignore

~.conda\envs\python37\lib\site-packages\allennlp\modules\token_embedders\bert_token_embedder.py in init(self, pretrained_model, requires_grad, top_layer_only, scalar_mix_parameters) 268 def init(self, pretrained_model: str, requires_grad: bool = False, top_layer_only: bool = False, 269 scalar_mix_parameters: List[float] = None) -> None: --> 270 model = PretrainedBertModel.load(pretrained_model) 271 272 for param in model.parameters():

~.conda\envs\python37\lib\site-packages\allennlp\modules\token_embedders\bert_token_embedder.py in load(cls, model_name, cache_model) 36 return PretrainedBertModel._cache[model_name] 37 ---> 38 model = BertModel.from_pretrained(model_name) 39 if cache_model: 40 cls._cache[model_name] = model

~.conda\envs\python37\lib\site-packages\pytorch_pretrained_bert\modeling.py in from_pretrained(cls, pretrained_model_name_or_path, *inputs, **kwargs) 588 resolved_archive_file, tempdir)) 589 with tarfile.open(resolved_archive_file, 'r:gz') as archive: --> 590 archive.extractall(tempdir) 591 serialization_dir = tempdir 592 # Load config

~.conda\envs\python37\lib\tarfile.py in extractall(self, path, members, numeric_owner) 1998 # Do not set_attrs directories, as we will do that further down 1999 self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(), -> 2000 numeric_owner=numeric_owner) 2001 2002 # Reverse sort directories.

~.conda\envs\python37\lib\tarfile.py in extract(self, member, path, set_attrs, numeric_owner) 2040 self._extract_member(tarinfo, os.path.join(path, tarinfo.name), 2041 set_attrs=set_attrs, -> 2042 numeric_owner=numeric_owner) 2043 except OSError as e: 2044 if self.errorlevel > 0:

~.conda\envs\python37\lib\tarfile.py in _extract_member(self, tarinfo, targetpath, set_attrs, numeric_owner) 2110 2111 if tarinfo.isreg(): -> 2112 self.makefile(tarinfo, targetpath) 2113 elif tarinfo.isdir(): 2114 self.makedir(tarinfo, targetpath)

~.conda\envs\python37\lib\tarfile.py in makefile(self, tarinfo, targetpath) 2159 target.truncate() 2160 else: -> 2161 copyfileobj(source, target, tarinfo.size, ReadError, bufsize) 2162 2163 def makeunknown(self, tarinfo, targetpath):

~.conda\envs\python37\lib\tarfile.py in copyfileobj(src, dst, length, exception, bufsize) 245 blocks, remainder = divmod(length, bufsize) 246 for b in range(blocks): --> 247 buf = src.read(bufsize) 248 if len(buf) < bufsize: 249 raise exception("unexpected end of data")

~.conda\envs\python37\lib\gzip.py in read(self, size) 274 import errno 275 raise OSError(errno.EBADF, "read() on write-only GzipFile object") --> 276 return self._buffer.read(size) 277 278 def read1(self, size=-1):

~.conda\envs\python37\lib_compression.py in readinto(self, b) 66 def readinto(self, b): 67 with memoryview(b) as view, view.cast("B") as byte_view: ---> 68 data = self.read(len(byte_view)) 69 byte_view[:len(data)] = data 70 return len(data)

~.conda\envs\python37\lib\gzip.py in read(self, size) 480 break 481 if buf == b"": --> 482 raise EOFError("Compressed file ended before the " 483 "end-of-stream marker was reached") 484

EOFError: Compressed file ended before the end-of-stream marker was reached

OBrink commented 1 year ago

This seems to be the same issue as in #16. You can probably just delete the model files according to the instructions by @ti250 to resolve the issue.