Closed icedpanda closed 2 years ago
This is probably caused because the internal Gensim.KeyedVectors
model tries to use the vector arrays from the spacy.load('en_core_web_lg')
, which were loaded on GPU, due to spacy.require_gpu(0)
.
I've got an M1 mac, which doesn't have GPU support from spaCy yet. Could you try to create a PR, given these assumptions? Even though, I assume people usually wouldn't use en_core_web_lg
on GPU.
This is the full traceback. I am not familiar with how Gensim
works internally or whether it supports GPU or not.
I guess it would be sufficient to just add a note that concise-concepts
is not GPU-supported yet as people usually wouldn't use en_core_web_log
on GPU.
TypeError Traceback (most recent call last)
Cell In [1], line 23
21 # use any model that has internal spacy embeddings
22 nlp = spacy.load('en_core_web_lg')
---> 23 nlp.add_pipe("concise_concepts",
24 config={"data": data}
25 )
26 doc = nlp(text)
28 options = {"colors": {"fruit": "darkorange", "vegetable": "limegreen", "meat": "salmon"},
29 "ents": ["fruit", "vegetable", "meat"]}
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/spacy/language.py:795, in Language.add_pipe(self, factory_name, name, before, after, first, last, source, config, raw_config, validate)
787 if not self.has_factory(factory_name):
788 err = Errors.E002.format(
789 name=factory_name,
790 opts=", ".join(self.factory_names),
(...)
793 lang_code=self.lang,
794 )
--> 795 pipe_component = self.create_pipe(
796 factory_name,
797 name=name,
798 config=config,
799 raw_config=raw_config,
800 validate=validate,
801 )
802 pipe_index = self._get_pipe_index(before, after, first, last)
803 self._pipe_meta[name] = self.get_factory_meta(factory_name)
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/spacy/language.py:674, in Language.create_pipe(self, factory_name, name, config, raw_config, validate)
671 cfg = {factory_name: config}
672 # We're calling the internal _fill here to avoid constructing the
673 # registered functions twice
--> 674 resolved = registry.resolve(cfg, validate=validate)
675 filled = registry.fill({"cfg": cfg[factory_name]}, validate=validate)["cfg"]
676 filled = Config(filled)
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/confection/__init__.py:728, in registry.resolve(cls, config, schema, overrides, validate)
719 @classmethod
720 def resolve(
721 cls,
(...)
726 validate: bool = True,
727 ) -> Dict[str, Any]:
--> 728 resolved, _ = cls._make(
729 config, schema=schema, overrides=overrides, validate=validate, resolve=True
730 )
731 return resolved
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/confection/__init__.py:777, in registry._make(cls, config, schema, overrides, resolve, validate)
775 if not is_interpolated:
776 config = Config(orig_config).interpolate()
--> 777 filled, _, resolved = cls._fill(
778 config, schema, validate=validate, overrides=overrides, resolve=resolve
779 )
780 filled = Config(filled, section_order=section_order)
781 # Check that overrides didn't include invalid properties not in config
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/confection/__init__.py:849, in registry._fill(cls, config, schema, validate, resolve, parent, overrides)
846 getter = cls.get(reg_name, func_name)
847 # We don't want to try/except this and raise our own error
848 # here, because we want the traceback if the function fails.
--> 849 getter_result = getter(*args, **kwargs)
850 else:
851 # We're not resolving and calling the function, so replace
852 # the getter_result with a Promise class
853 getter_result = Promise(
854 registry=reg_name, name=func_name, args=args, kwargs=kwargs
855 )
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/concise_concepts/__init__.py:36, in make_concise_concepts(nlp, name, data, topn, model_path, word_delimiter, ent_score, exclude_pos, exclude_dep, include_compound_words, case_sensitive)
9 @Language.factory(
10 "concise_concepts",
11 default_config={
(...)
34 case_sensitive: bool,
35 ):
---> 36 return Conceptualizer(
37 nlp=nlp,
38 name=name,
39 data=data,
40 topn=topn,
41 model_path=model_path,
42 word_delimiter=word_delimiter,
43 ent_score=ent_score,
44 exclude_pos=exclude_pos,
45 exclude_dep=exclude_dep,
46 include_compound_words=include_compound_words,
47 case_sensitive=case_sensitive,
48 )
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/concise_concepts/conceptualizer/Conceptualizer.py:84, in Conceptualizer.__init__(self, nlp, name, data, topn, model_path, word_delimiter, ent_score, exclude_pos, exclude_dep, include_compound_words, case_sensitive)
82 else:
83 self.match_key = "LEMMA"
---> 84 self.run()
85 self.data_upper = {k.upper(): v for k, v in data.items()}
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/concise_concepts/conceptualizer/Conceptualizer.py:89, in Conceptualizer.run(self)
87 def run(self):
88 self.determine_topn()
---> 89 self.set_gensim_model()
90 self.verify_data()
91 self.expand_concepts()
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/concise_concepts/conceptualizer/Conceptualizer.py:159, in Conceptualizer.set_gensim_model(self)
155 vectorList.append(vector)
157 self.kv = KeyedVectors(self.nlp.vocab.vectors_length)
--> 159 self.kv.add_vectors(wordList, vectorList)
File ~/miniconda3/envs/mkg/lib/python3.10/site-packages/gensim/models/keyedvectors.py:586, in KeyedVectors.add_vectors(self, keys, weights, extras, replace)
584 weights = np.array(weights).reshape(1, -1)
585 elif isinstance(weights, list):
--> 586 weights = np.array(weights)
587 if extras is None:
588 extras = {}
File cupy/_core/core.pyx:1397, in cupy._core.core.ndarray.__array__()
TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly.
I was using
en_core_web_trf
with GPU enabled. I changed it toen_core_web_lg
since theen_core_web_trf
is not supported. However, this would give me the following error while with GPU enabled. It took me a while to figure out what was wrong.Fail while using GPU
ERROR Messasge