nltk / wordnet

Stand-alone WordNet API
Other
48 stars 15 forks source link

Deepcopy of a Synset #28

Open lucailvec opened 3 years ago

lucailvec commented 3 years ago

Hello,

Scenario: develop a custom transformer that work under Sklearn framework. This transformer is required to be dumped in/out by Sklearn framework. This transformer have a reference to an inverted index that use synset as the domain of a mapping function to integer.

Problem:

import copy
from nltk.corpus import wordnet
synsets = wordnet.synsets("try")
synset = synsets[0]
copy.deepcopy(synset)

it breaks by showing:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-95-022997bb6638> in <module>
      2 synsets = wordnet.synsets("try")
      3 synset = synsets[0]
----> 4 copy.deepcopy(synset)

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_tuple(x, memo, deepcopy)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

~/.conda/envs/inforetrival/lib/python3.8/copy.py in <listcomp>(.0)
    208 
    209 def _deepcopy_tuple(x, memo, deepcopy=deepcopy):
--> 210     y = [deepcopy(a, memo) for a in x]
    211     # We're not going to put the tuple in the memo, but it's still important we
    212     # check for it, in case the tuple contains recursive mutable structures.

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    170                     y = x
    171                 else:
--> 172                     y = _reconstruct(x, memo, *rv)
    173 
    174     # If is its own copy, don't memoize.

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _reconstruct(x, memo, func, args, state, listiter, dictiter, deepcopy)
    268     if state is not None:
    269         if deep:
--> 270             state = deepcopy(state, memo)
    271         if hasattr(y, '__setstate__'):
    272             y.__setstate__(state)

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    144     copier = _deepcopy_dispatch.get(cls)
    145     if copier is not None:
--> 146         y = copier(x, memo)
    147     else:
    148         if issubclass(cls, type):

~/.conda/envs/inforetrival/lib/python3.8/copy.py in _deepcopy_dict(x, memo, deepcopy)
    228     memo[id(x)] = y
    229     for key, value in x.items():
--> 230         y[deepcopy(key, memo)] = deepcopy(value, memo)
    231     return y
    232 d[dict] = _deepcopy_dict

~/.conda/envs/inforetrival/lib/python3.8/copy.py in deepcopy(x, memo, _nil)
    159                     reductor = getattr(x, "__reduce_ex__", None)
    160                     if reductor is not None:
--> 161                         rv = reductor(4)
    162                     else:
    163                         reductor = getattr(x, "__reduce__", None)

TypeError: cannot pickle '_io.BufferedReader' object

So a custom deepcopy method it's very appriacetd.

Best regard

Luca

goodmami commented 3 years ago

@lucailvec this project is, as I understand, no longer active and I suggest you try https://github.com/goodmami/wn/ instead. I saw this issue and tried it out there, but sure enough deep copy didn't work because each object contained a pointer to a shared database connection. I've restructured the code (not yet released) so this is no longer the case:

>>> import copy
>>> import wn
>>> synsets = wn.synsets("try")
>>> synset = synsets[0]
>>> synset
Synset('ewn-02535833-v')
>>> synset_copy = copy.deepcopy(synset)
>>> synset_copy is synset
False
>>> synset_copy == synset
True
>>> import pickle
>>> pickle.loads(pickle.dumps(synset))
Synset('ewn-02535833-v')

While this works, it's still not portable across machines because these objects store and use a row identifier to the backend SQL database, and this identifier is not guaranteed to be the same for different builds of the database (see goodmami/wn#84). Can you explain your needs a bit? Is the dumped transformer model distributed to others, or is it used on a single machine?

I hope to either remove the need to store the rowids or to create a custom deepcopy method to get around it, but in the meantime the current code might solve your immediate problem.