lmcinnes / pynndescent

A Python nearest neighbor descent for approximate nearest neighbors
BSD 2-Clause "Simplified" License
879 stars 105 forks source link

TypeError: a class that defines __slots__ without defining __getstate__ #4

Open moinnadeem opened 6 years ago

moinnadeem commented 6 years ago

When pickling the nndescent class, I get the following traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-124-75bc50e52546> in <module>()
      1 with open("nndescent_continuous-1.pkl", "wb") as f:
----> 2     pickle.dump(nndescent, f)

/usr/lib64/python2.7/pickle.pyc in dump(obj, file, protocol)
   1368 
   1369 def dump(obj, file, protocol=None):
-> 1370     Pickler(file, protocol).dump(obj)
   1371 
   1372 def dumps(obj, protocol=None):

/usr/lib64/python2.7/pickle.pyc in dump(self, obj)
    222         if self.proto >= 2:
    223             self.write(PROTO + chr(self.proto))
--> 224         self.save(obj)
    225         self.write(STOP)
    226 

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    329 
    330         # Save the reduce() output and finally memoize the object
--> 331         self.save_reduce(obj=obj, *rv)
    332 
    333     def persistent_id(self, obj):

/usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
    417 
    418         if state is not None:
--> 419             save(state)
    420             write(BUILD)
    421 

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_dict(self, obj)
    647 
    648         self.memoize(obj)
--> 649         self._batch_setitems(obj.iteritems())
    650 
    651     dispatch[DictionaryType] = save_dict

/usr/lib64/python2.7/pickle.pyc in _batch_setitems(self, items)
    661             for k, v in items:
    662                 save(k)
--> 663                 save(v)
    664                 write(SETITEM)
    665             return

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    329 
    330         # Save the reduce() output and finally memoize the object
--> 331         self.save_reduce(obj=obj, *rv)
    332 
    333     def persistent_id(self, obj):

/usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
    399         else:
    400             save(func)
--> 401             save(args)
    402             write(REDUCE)
    403 

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563 
    564         if id(obj) in memo:

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563 
    564         if id(obj) in memo:

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_dict(self, obj)
    647 
    648         self.memoize(obj)
--> 649         self._batch_setitems(obj.iteritems())
    650 
    651     dispatch[DictionaryType] = save_dict

/usr/lib64/python2.7/pickle.pyc in _batch_setitems(self, items)
    661             for k, v in items:
    662                 save(k)
--> 663                 save(v)
    664                 write(SETITEM)
    665             return

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    329 
    330         # Save the reduce() output and finally memoize the object
--> 331         self.save_reduce(obj=obj, *rv)
    332 
    333     def persistent_id(self, obj):

/usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
    399         else:
    400             save(func)
--> 401             save(args)
    402             write(REDUCE)
    403 

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563 
    564         if id(obj) in memo:

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_list(self, obj)
    598 
    599         self.memoize(obj)
--> 600         self._batch_appends(iter(obj))
    601 
    602     dispatch[ListType] = save_list

/usr/lib64/python2.7/pickle.pyc in _batch_appends(self, items)
    613         if not self.bin:
    614             for x in items:
--> 615                 save(x)
    616                 write(APPEND)
    617             return

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    329 
    330         # Save the reduce() output and finally memoize the object
--> 331         self.save_reduce(obj=obj, *rv)
    332 
    333     def persistent_id(self, obj):

/usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj)
    417 
    418         if state is not None:
--> 419             save(state)
    420             write(BUILD)
    421 

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    284         f = self.dispatch.get(t)
    285         if f:
--> 286             f(self, obj) # Call unbound method with explicit self
    287             return
    288 

/usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj)
    560         write(MARK)
    561         for element in obj:
--> 562             save(element)
    563 
    564         if id(obj) in memo:

/usr/lib64/python2.7/pickle.pyc in save(self, obj)
    304             reduce = getattr(obj, "__reduce_ex__", None)
    305             if reduce:
--> 306                 rv = reduce(self.proto)
    307             else:
    308                 reduce = getattr(obj, "__reduce__", None)

/usr/lib64/python2.7/copy_reg.pyc in _reduce_ex(self, proto)
     75     except AttributeError:
     76         if getattr(self, "__slots__", None):
---> 77             raise TypeError("a class that defines __slots__ without "
     78                             "defining __getstate__ cannot be pickled")
     79         try:

TypeError: a class that defines __slots__ without defining __getstate__ cannot be pickled
lmcinnes commented 6 years ago

Okay, that's bad. I didn't explicitly define slots, so presumably it crept in from either the inheritance chain or something else that I'm doing. I'll try to look into this tomorrow.

On Wed, Jul 18, 2018 at 7:27 PM Moin Nadeem notifications@github.com wrote:

When pickling the nndescent class, I get the following traceback:


TypeError Traceback (most recent call last)

in () 1 with open("nndescent_continuous-1.pkl", "wb") as f: ----> 2 pickle.dump(nndescent, f) /usr/lib64/python2.7/pickle.pyc in dump(obj, file, protocol) 1368 1369 def dump(obj, file, protocol=None): -> 1370 Pickler(file, protocol).dump(obj) 1371 1372 def dumps(obj, protocol=None): /usr/lib64/python2.7/pickle.pyc in dump(self, obj) 222 if self.proto >= 2: 223 self.write(PROTO + chr(self.proto)) --> 224 self.save(obj) 225 self.write(STOP) 226 /usr/lib64/python2.7/pickle.pyc in save(self, obj) 329 330 # Save the reduce() output and finally memoize the object --> 331 self.save_reduce(obj=obj, *rv) 332 333 def persistent_id(self, obj): /usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj) 417 418 if state is not None: --> 419 save(state) 420 write(BUILD) 421 /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_dict(self, obj) 647 648 self.memoize(obj) --> 649 self._batch_setitems(obj.iteritems()) 650 651 dispatch[DictionaryType] = save_dict /usr/lib64/python2.7/pickle.pyc in _batch_setitems(self, items) 661 for k, v in items: 662 save(k) --> 663 save(v) 664 write(SETITEM) 665 return /usr/lib64/python2.7/pickle.pyc in save(self, obj) 329 330 # Save the reduce() output and finally memoize the object --> 331 self.save_reduce(obj=obj, *rv) 332 333 def persistent_id(self, obj): /usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj) 399 else: 400 save(func) --> 401 save(args) 402 write(REDUCE) 403 /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj) 560 write(MARK) 561 for element in obj: --> 562 save(element) 563 564 if id(obj) in memo: /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj) 560 write(MARK) 561 for element in obj: --> 562 save(element) 563 564 if id(obj) in memo: /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_dict(self, obj) 647 648 self.memoize(obj) --> 649 self._batch_setitems(obj.iteritems()) 650 651 dispatch[DictionaryType] = save_dict /usr/lib64/python2.7/pickle.pyc in _batch_setitems(self, items) 661 for k, v in items: 662 save(k) --> 663 save(v) 664 write(SETITEM) 665 return /usr/lib64/python2.7/pickle.pyc in save(self, obj) 329 330 # Save the reduce() output and finally memoize the object --> 331 self.save_reduce(obj=obj, *rv) 332 333 def persistent_id(self, obj): /usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj) 399 else: 400 save(func) --> 401 save(args) 402 write(REDUCE) 403 /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj) 560 write(MARK) 561 for element in obj: --> 562 save(element) 563 564 if id(obj) in memo: /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_list(self, obj) 598 599 self.memoize(obj) --> 600 self._batch_appends(iter(obj)) 601 602 dispatch[ListType] = save_list /usr/lib64/python2.7/pickle.pyc in _batch_appends(self, items) 613 if not self.bin: 614 for x in items: --> 615 save(x) 616 write(APPEND) 617 return /usr/lib64/python2.7/pickle.pyc in save(self, obj) 329 330 # Save the reduce() output and finally memoize the object --> 331 self.save_reduce(obj=obj, *rv) 332 333 def persistent_id(self, obj): /usr/lib64/python2.7/pickle.pyc in save_reduce(self, func, args, state, listitems, dictitems, obj) 417 418 if state is not None: --> 419 save(state) 420 write(BUILD) 421 /usr/lib64/python2.7/pickle.pyc in save(self, obj) 284 f = self.dispatch.get(t) 285 if f: --> 286 f(self, obj) # Call unbound method with explicit self 287 return 288 /usr/lib64/python2.7/pickle.pyc in save_tuple(self, obj) 560 write(MARK) 561 for element in obj: --> 562 save(element) 563 564 if id(obj) in memo: /usr/lib64/python2.7/pickle.pyc in save(self, obj) 304 reduce = getattr(obj, "__reduce_ex__", None) 305 if reduce: --> 306 rv = reduce(self.proto) 307 else: 308 reduce = getattr(obj, "__reduce__", None) /usr/lib64/python2.7/copy_reg.pyc in _reduce_ex(self, proto) 75 except AttributeError: 76 if getattr(self, "__slots__", None): ---> 77 raise TypeError("a class that defines __slots__ without " 78 "defining __getstate__ cannot be pickled") 79 try: TypeError: a class that defines __slots__ without defining __getstate__ cannot be pickled — You are receiving this because you are subscribed to this thread. Reply to this email directly, view it on GitHub , or mute the thread .
moinnadeem commented 6 years ago

Sounds good, let me know if you need help testing, as ~20 minutes each day is spent building the index. For reference, I'm building the index on a Numpy array.

lmcinnes commented 6 years ago

So I realise now that I have successfully saved off indices before but I believe I did so using joblib ( https://pythonhosted.org/joblib/persistence.html). I believe that should work for you. Still I'll try to figure out the pickle issues as you will certainly not be the only one to hit this issue.

On Wed, Jul 18, 2018 at 9:28 PM Moin Nadeem notifications@github.com wrote:

Sounds good, let me know if you need help testing, as ~20 minutes each day is spent building the index. For reference, I'm building the index on a Numpy array.

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-406125284, or mute the thread https://github.com/notifications/unsubscribe-auth/ALaKBdmMcJnbLodVnVYIB0LY3r-SWOrkks5uH-DQgaJpZM4VVhRJ .

lmcinnes commented 6 years ago

The next issue seems to be that python3 is happy with such things (I can't reproduce the error there), but python2 uses an older pickle protocol that doesn't like slots. I see you are using 2.7, which is why I never encountered this error -- I mostly test on python3. The solution, it seems, is to do:


with open("nndescent_continuous-1.pkl", "wb") as f:
    pickle.dump(nndescent, f, protocol=-1)

I know that's not obvious, but it is apparently the necessary hoops for python2. I'll try to make sure this gets documented when I get around to writing proper documentation.

On Thu, Jul 19, 2018 at 2:37 PM Leland McInnes leland.mcinnes@gmail.com wrote:

So I realise now that I have successfully saved off indices before but I believe I did so using joblib ( https://pythonhosted.org/joblib/persistence.html). I believe that should work for you. Still I'll try to figure out the pickle issues as you will certainly not be the only one to hit this issue.

On Wed, Jul 18, 2018 at 9:28 PM Moin Nadeem notifications@github.com wrote:

Sounds good, let me know if you need help testing, as ~20 minutes each day is spent building the index. For reference, I'm building the index on a Numpy array.

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-406125284, or mute the thread https://github.com/notifications/unsubscribe-auth/ALaKBdmMcJnbLodVnVYIB0LY3r-SWOrkks5uH-DQgaJpZM4VVhRJ .

moinnadeem commented 6 years ago

I tried that, but when I try loading it:

KeyError                                  Traceback (most recent call last)
<ipython-input-3-027db7d4ab11> in <module>()
      1 with open("nndescent.pkl", "rb") as f:
----> 2     nndescent = pickle.load(f)

/usr/lib64/python2.7/pickle.pyc in load(file)
   1376 
   1377 def load(file):
-> 1378     return Unpickler(file).load()
   1379 
   1380 def loads(str):

/usr/lib64/python2.7/pickle.pyc in load(self)
    856             while 1:
    857                 key = read(1)
--> 858                 dispatch[key](self)
    859         except _Stop, stopinst:
    860             return stopinst.value

/usr/lib64/python2.7/pickle.pyc in load_newobj(self)
   1081         args = self.stack.pop()
   1082         cls = self.stack[-1]
-> 1083         obj = cls.__new__(cls, *args)
   1084         self.stack[-1] = obj
   1085     dispatch[NEWOBJ] = load_newobj

/home/moinnadeem/.virtualenvs/quizlet/lib/python2.7/site-packages/funcsigs/__init__.pyc in __new__(self, *args, **kwargs)
    199     def __new__(self, *args, **kwargs):
    200         obj = int.__new__(self, *args)
--> 201         obj._name = kwargs['name']
    202         return obj
    203 

KeyError: 'name'
lmcinnes commented 6 years ago

Have you tired persistence with joblib? I do believe that that will be more robust in general, and would be the preferred option.

On Mon, Jul 23, 2018 at 6:56 PM Moin Nadeem notifications@github.com wrote:

I tried that, but when I try loading it:

KeyError Traceback (most recent call last)

in () 1 with open("nndescent.pkl", "rb") as f: ----> 2 nndescent = pickle.load(f) /usr/lib64/python2.7/pickle.pyc in load(file) 1376 1377 def load(file): -> 1378 return Unpickler(file).load() 1379 1380 def loads(str): /usr/lib64/python2.7/pickle.pyc in load(self) 856 while 1: 857 key = read(1) --> 858 dispatch[key](self) 859 except _Stop, stopinst: 860 return stopinst.value /usr/lib64/python2.7/pickle.pyc in load_newobj(self) 1081 args = self.stack.pop() 1082 cls = self.stack[-1] -> 1083 obj = cls.__new__(cls, *args) 1084 self.stack[-1] = obj 1085 dispatch[NEWOBJ] = load_newobj /home/moinnadeem/.virtualenvs/quizlet/lib/python2.7/site-packages/funcsigs/__init__.pyc in __new__(self, *args, **kwargs) 199 def __new__(self, *args, **kwargs): 200 obj = int.__new__(self, *args) --> 201 obj._name = kwargs['name'] 202 return obj 203 KeyError: 'name' — You are receiving this because you commented. Reply to this email directly, view it on GitHub , or mute the thread .
moinnadeem commented 6 years ago

Yeah, it uses the underlying pickle object, so I got the same error unfortunately :/

On Mon, Jul 23, 2018 at 4:37 PM Leland McInnes notifications@github.com wrote:

Have you tired persistence with joblib? I do believe that that will be more robust in general, and would be the preferred option.

On Mon, Jul 23, 2018 at 6:56 PM Moin Nadeem notifications@github.com wrote:

I tried that, but when I try loading it:

KeyError Traceback (most recent call last)

in () 1 with open("nndescent.pkl", "rb") as f: ----> 2 nndescent = pickle.load(f) /usr/lib64/python2.7/pickle.pyc in load(file) 1376 1377 def load(file): -> 1378 return Unpickler(file).load() 1379 1380 def loads(str): /usr/lib64/python2.7/pickle.pyc in load(self) 856 while 1: 857 key = read(1) --> 858 dispatch[key](self) 859 except _Stop, stopinst: 860 return stopinst.value /usr/lib64/python2.7/pickle.pyc in load_newobj(self) 1081 args = self.stack.pop() 1082 cls = self.stack[-1] -> 1083 obj = cls.__new__(cls, *args) 1084 self.stack[-1] = obj 1085 dispatch[NEWOBJ] = load_newobj /home/moinnadeem/.virtualenvs/quizlet/lib/python2.7/site-packages/funcsigs/__init__.pyc in __new__(self, *args, **kwargs) 199 def __new__(self, *args, **kwargs): 200 obj = int.__new__(self, *args) --> 201 obj._name = kwargs['name'] 202 return obj 203 KeyError: 'name' — You are receiving this because you commented. Reply to this email directly, view it on GitHub

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-407233711, or mute the thread https://github.com/notifications/unsubscribe-auth/AAxpN61HARK7XxF4Q5B8f_QsDdbgueXiks5uJl5GgaJpZM4VVhRJ .

lmcinnes commented 6 years ago

Okay. I will have to try to get a suitable python2.7 setup working and test there, because everythign seems to be working fine under the various python3 versions I have around. Sorry.

On Mon, Jul 23, 2018 at 7:54 PM Moin Nadeem notifications@github.com wrote:

Yeah, it uses the underlying pickle object, so I got the same error unfortunately :/

On Mon, Jul 23, 2018 at 4:37 PM Leland McInnes notifications@github.com wrote:

Have you tired persistence with joblib? I do believe that that will be more robust in general, and would be the preferred option.

On Mon, Jul 23, 2018 at 6:56 PM Moin Nadeem notifications@github.com wrote:

I tried that, but when I try loading it:

KeyError Traceback (most recent call last)

in () 1 with open("nndescent.pkl", "rb") as f: ----> 2 nndescent = pickle.load(f) /usr/lib64/python2.7/pickle.pyc in load(file) 1376 1377 def load(file): -> 1378 return Unpickler(file).load() 1379 1380 def loads(str): /usr/lib64/python2.7/pickle.pyc in load(self) 856 while 1: 857 key = read(1) --> 858 dispatch[key](self) 859 except _Stop, stopinst: 860 return stopinst.value /usr/lib64/python2.7/pickle.pyc in load_newobj(self) 1081 args = self.stack.pop() 1082 cls = self.stack[-1] -> 1083 obj = cls.__new__(cls, *args) 1084 self.stack[-1] = obj 1085 dispatch[NEWOBJ] = load_newobj

/home/moinnadeem/.virtualenvs/quizlet/lib/python2.7/site-packages/funcsigs/init.pyc in new(self, *args, **kwargs)

199 def new(self, *args, *kwargs): 200 obj = int.new(self, args) --> 201 obj._name = kwargs['name'] 202 return obj 203

KeyError: 'name'

— You are receiving this because you commented. Reply to this email directly, view it on GitHub < https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-407226729 , or mute the thread <

https://github.com/notifications/unsubscribe-auth/ALaKBcrAXwddUZoWjA_KY6h25amuHOFiks5uJlSzgaJpZM4VVhRJ

.

— You are receiving this because you authored the thread. Reply to this email directly, view it on GitHub <https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-407233711 , or mute the thread < https://github.com/notifications/unsubscribe-auth/AAxpN61HARK7XxF4Q5B8f_QsDdbgueXiks5uJl5GgaJpZM4VVhRJ

.

— You are receiving this because you commented. Reply to this email directly, view it on GitHub https://github.com/lmcinnes/pynndescent/issues/4#issuecomment-407236142, or mute the thread https://github.com/notifications/unsubscribe-auth/ALaKBWoRw77mjo5zwUUciM9Gyqu9Pg45ks5uJmIkgaJpZM4VVhRJ .