fit_transform tries to query non-existent node "0"

ubalklen commented 3 years ago

from nodevectors import Node2Vec
import networkx as nx

G = nx.Graph()
G.add_edge("1", "2")
n2v = Node2Vec(n_components=128)
n2v.fit_transform(G)

Output:

Making walks... Done, T=3.98
Mapping Walk Names... Done, T=0.07
Training W2V... WARNING: gensim word2vec version is unoptimizedTry version 3.6 if on windows, versions 3.7 and 3.8 have had issues
Done, T=0.39
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-6-43e45de9791e> in <module>
      2 G.add_edge("1", "2")
      3 n2v = Node2Vec(n_components=128)
----> 4 n2v.fit_transform(G)

~/miniconda3/envs/graphs/lib/python3.7/site-packages/nodevectors/node2vec.py in fit_transform(self, G)
    151             pd.DataFrame.from_records(
    152             pd.Series(np.arange(len(G.nodes)))
--> 153               .apply(self.predict)
    154               .values)
    155         )

~/miniconda3/envs/graphs/lib/python3.7/site-packages/pandas/core/series.py in apply(self, func, convert_dtype, args, **kwds)
   4106             else:
   4107                 values = self.astype(object)._values
-> 4108                 mapped = lib.map_infer(values, f, convert=convert_dtype)
   4109 
   4110         if len(mapped) and isinstance(mapped[0], Series):

pandas/_libs/lib.pyx in pandas._libs.lib.map_infer()

~/miniconda3/envs/graphs/lib/python3.7/site-packages/nodevectors/node2vec.py in predict(self, node_name)
    166         if type(node_name) is not str:
    167             node_name = str(node_name)
--> 168         return self.model.wv.__getitem__(node_name)
    169 
    170     def save_vectors(self, out_file):

~/miniconda3/envs/graphs/lib/python3.7/site-packages/gensim/models/keyedvectors.py in __getitem__(self, entities)
    351         if isinstance(entities, string_types):
    352             # allow calls like trained_model['office'], as a shorthand for trained_model[['office']]
--> 353             return self.get_vector(entities)
    354 
    355         return vstack([self.get_vector(entity) for entity in entities])

~/miniconda3/envs/graphs/lib/python3.7/site-packages/gensim/models/keyedvectors.py in get_vector(self, word)
    469 
    470     def get_vector(self, word):
--> 471         return self.word_vec(word)
    472 
    473     def words_closer_than(self, w1, w2):

~/miniconda3/envs/graphs/lib/python3.7/site-packages/gensim/models/keyedvectors.py in word_vec(self, word, use_norm)
    466             return result
    467         else:
--> 468             raise KeyError("word '%s' not in vocabulary" % word)
    469 
    470     def get_vector(self, word):

KeyError: "word '0' not in vocabulary"

Fitting and then predicting works fine:

n2v.fit(G)

for node in G:
    print(n2v.predict(node))

Output:

Making walks... Done, T=0.00
Mapping Walk Names... Done, T=0.06
Training W2V... WARNING: gensim word2vec version is unoptimizedTry version 3.6 if on windows, versions 3.7 and 3.8 have had issues
Done, T=0.38
[ 0.01669522  0.01119813 -0.00566072 -0.0134473   0.01121703  0.00379648
  0.01170088 -0.0121789  -0.01429367 -0.00849178  0.00943886 -0.00981773
  0.00337284 -0.0013884  -0.01287963 -0.00460479 -0.00217993 -0.01019352
  0.00615602 -0.00658679  0.01679845 -0.00747446  0.0019177  -0.00912566
 -0.01688758  0.00983168  0.00286994  0.00739604  0.01249113  0.00116864
  0.00235101 -0.01515406 -0.00786685 -0.01675885 -0.01421799 -0.00829282
 -0.00385966 -0.00779916 -0.00067812  0.01312324  0.0154448  -0.0107193
 -0.00059914 -0.00439935 -0.01970238 -0.00585162 -0.01741348 -0.00118494
 -0.01365886 -0.007099    0.00806013 -0.00448715 -0.00633816 -0.009869
  0.01835089  0.01462685  0.00408294  0.01042183  0.00773886  0.00500051
  0.00697436 -0.00052141 -0.00307364  0.00916708 -0.0059573  -0.00794462
  0.00316458 -0.01120937  0.00820292 -0.00175512 -0.00426679  0.00403081
  0.0036373  -0.00538955  0.00169757 -0.00476247  0.00011785 -0.00015604
 -0.02005355  0.00293106 -0.00457922  0.01199162 -0.01039407 -0.00975906
 -0.00386479  0.00380202  0.0150509   0.00117078  0.01009431 -0.01518334
 -0.01550014 -0.00316153 -0.01638743  0.00911983 -0.00656796 -0.01130522
  0.00696332  0.00222521 -0.01348531  0.01745371 -0.01043333  0.00377076
  0.00168364 -0.01029514 -0.01187336 -0.00047892  0.01747731  0.01539742
 -0.00317966  0.01036133  0.00348293  0.00357884  0.01691393 -0.01314759
 -0.00387712  0.01349622  0.00886216  0.01269572 -0.014981    0.01047694
 -0.01591979  0.00815849  0.0053769  -0.01705019  0.00478466 -0.00967307
  0.00100743 -0.00627678]
[ 1.74459908e-02  9.29250382e-03 -5.62654436e-03 -1.58256646e-02
  6.62352284e-03 -1.04596815e-03  7.46087125e-03 -1.52283600e-02
 -1.47760203e-02 -4.99586575e-03  8.37715156e-03 -1.14215305e-02
  8.03218782e-03 -4.57122130e-03 -1.37374401e-02 -6.70122309e-03
  5.60258329e-03 -1.36625227e-02  2.69854977e-03 -2.01221928e-03
  1.41100660e-02 -1.21530667e-02  7.38256099e-03 -7.29203923e-03
 -1.45003749e-02  8.89602769e-03 -1.07536477e-03  1.66074419e-03
  7.48369843e-03  8.18155764e-04  3.80413979e-03 -1.41491415e-02
 -1.12004904e-03 -1.57257933e-02 -1.23076690e-02 -9.28518735e-03
 -5.15399221e-03 -5.42826438e-03  9.19695070e-04  9.03129764e-03
  1.57911442e-02 -5.36569115e-03 -1.36574614e-03 -2.82609137e-03
 -1.89300030e-02 -5.67972986e-03 -1.65421404e-02 -3.22455773e-04
 -1.18535999e-02 -7.90045224e-03  9.72144585e-03 -7.91174080e-03
 -4.45207767e-03 -1.19799254e-02  1.93504207e-02  1.06750363e-02
  4.26934101e-03  1.17199738e-02  6.25003641e-03  1.98470801e-03
  4.88949660e-03  7.53012951e-04 -8.29974841e-03  6.85363356e-03
 -2.72968784e-03 -5.58869634e-03  1.48452440e-04 -8.40961654e-03
  3.35645187e-03 -3.52724968e-03  3.98239447e-03 -2.40911031e-03
  4.06429684e-03 -3.92150227e-03  6.94983220e-03 -8.35845713e-03
  9.88924527e-04 -1.79716619e-03 -1.90840866e-02  2.46768352e-03
 -4.37452644e-03  1.30511560e-02 -6.40019309e-03 -1.33609995e-02
  3.72520881e-04  5.42262476e-03  1.41993044e-02  7.35963322e-03
  1.08134123e-02 -1.49347940e-02 -1.22990599e-02 -9.69778374e-03
 -1.74602009e-02  8.74316972e-03 -5.31877764e-03 -7.91502465e-03
  3.98375420e-03  4.59250668e-03 -1.26426788e-02  1.60577614e-02
 -1.03733260e-02  4.70442930e-03  6.72380021e-03 -1.34339379e-02
 -1.50517235e-02  3.45687894e-03  1.50700649e-02  1.58219878e-02
  4.28991532e-03  9.33015719e-03  7.03065936e-03  3.41207208e-03
  1.49237625e-02 -1.07398266e-02 -1.00340396e-02  9.12039913e-03
  1.27081424e-02  1.08739929e-02 -1.16528282e-02  4.42440435e-03
 -1.53663196e-02  3.64650693e-03  5.37529076e-03 -1.76296048e-02
  3.67483153e-05 -7.88922701e-03 -5.40610822e-03 -1.80462585e-03]

ubalklen commented 3 years ago

Looking at the code, it seems to be related to the particular way the nodes are passed for prediction inside the fit_transform method in the node2vec implementation. Not sure why, but the predictions are performed using a np.arange. This range won't reflect the actual nodes in the graph, so an exception is raised.

The other embedders don't do that.

VHRanger commented 3 years ago

Thanks, I'll take a look this week.

VHRanger commented 3 years ago

should be fixed in latest release, please update and confirm

ubalklen commented 3 years ago

Still not working.

>pip show nodevectors

Name: nodevectors
Version: 0.1.23
Summary: Fast network node embeddings
Home-page: https://github.com/VHRanger/nodevectors/
Author: Matt Ranger
Author-email: UNKNOWN
License: MIT
Location: e:\users\user\miniconda3\envs\graph\lib\site-packages
Requires: scipy, networkx, csrgraph, gensim, pandas, numpy, scikit-learn, numba
Required-by:

>python
Python 3.7.9 (default, Aug 31 2020, 17:10:11) [MSC v.1916 64 bit (AMD64)] :: Anaconda, Inc. on win32
Type "help", "copyright", "credits" or "license" for more information.
>>> from nodevectors import Node2Vec
>>> import networkx as nx
>>> G = nx.Graph()       
>>> G.add_edge("1", "2") 
>>> n2v = Node2Vec(n_components=128)
>>> n2v.fit_transform(G)
Making walks... Done, T=2.98
Mapping Walk Names... Done, T=0.04
Training W2V... Done, T=0.05
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
  File "E:\Users\user\miniconda3\envs\graph\lib\site-packages\nodevectors\node2vec.py", line 155, in fit_transform
    .apply(self.predict)
  File "E:\Users\user\AppData\Roaming\Python\Python37\site-packages\pandas\core\series.py", line 3848, in apply
    mapped = lib.map_infer(values, f, convert=convert_dtype)
  File "pandas\_libs\lib.pyx", line 2329, in pandas._libs.lib.map_infer
  File "E:\Users\user\miniconda3\envs\graph\lib\site-packages\nodevectors\node2vec.py", line 170, in predict
    return self.model.wv.__getitem__(node_name)
  File "E:\Users\user\miniconda3\envs\graph\lib\site-packages\gensim\models\keyedvectors.py", line 353, in __getitem__
    return self.get_vector(entities)
  File "E:\Users\user\miniconda3\envs\graph\lib\site-packages\gensim\models\keyedvectors.py", line 471, in get_vector
    return self.word_vec(word)
  File "E:\Users\user\miniconda3\envs\graph\lib\site-packages\gensim\models\keyedvectors.py", line 468, in word_vec
    raise KeyError("word '%s' not in vocabulary" % word)
KeyError: "word '0' not in vocabulary"

I can see there is still a np.arrange being passed to the predict function.

IdanAlbilia commented 3 years ago

Any new info about this problem? im having the exact same issue..

IdanAlbilia commented 3 years ago

or one might know a way around it?

ubalklen commented 3 years ago

The workaround is to fit and predict separately.

ashkspark commented 1 year ago

Not working for me! Will do fit and predict.

VHRanger / nodevectors

fit_transform tries to query non-existent node "0" #32