bab2min / kiwipiepy

Python API for Kiwi
Other
282 stars 27 forks source link

`SwTokenizer`가 피클되지 않는 문제 #135

Open Bing-su opened 1 year ago

Bing-su commented 1 year ago

키위토크나이저를 멀티프로세싱과 함께 사용하려다 발견한 문제입니다.

개발환경

Windows 11
python 3.10.12
kiwipiepy 0.15.2
transformers 4.31.0
tokenizers 0.13.3
dill 0.3.7

문제

import kiwipiepy.transformers_addon
from transformers import AutoTokenizer

repo = "kiwi-farm/roberta-base-32k"
tk = AutoTokenizer.from_pretrained(repo)
import pickle

pickled = pickle.dumps(tk)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[2], line 4
      1 import pickle
      2 import dill
----> 4 pickled = pickle.dumps(tk)

TypeError: cannot pickle 'SwTokenizer' object
import dill

pickled = dill.dumps(tk)
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[3], line 3
      1 import dill
----> 3 pickled = dill.dumps(tk)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:278](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:278), in dumps(obj, protocol, byref, fmode, recurse, **kwds)
    254 """
    255 Pickle an object to a string.
    256 
   (...)
    275 Default values for keyword arguments can be set in :mod:`dill.settings`.
    276 """
    277 file = StringIO()
--> 278 dump(obj, file, protocol, byref, fmode, recurse, **kwds)#, strictio)
    279 return file.getvalue()

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:250](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:250), in dump(obj, file, protocol, byref, fmode, recurse, **kwds)
    248 _kwds = kwds.copy()
    249 _kwds.update(dict(byref=byref, fmode=fmode, recurse=recurse))
--> 250 Pickler(file, protocol, **_kwds).dump(obj)
    251 return

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:418](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:418), in Pickler.dump(self, obj)
    416 def dump(self, obj): #NOTE: if settings change, need to update attributes
    417     logger.trace_setup(self)
--> 418     StockPickler.dump(self, obj)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:487](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:487), in _Pickler.dump(self, obj)
    485 if self.proto >= 4:
    486     self.framer.start_framing()
--> 487 self.save(obj)
    488 self.write(STOP)
    489 self.framer.end_framing()

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:412](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:412), in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:603](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:603), in _Pickler.save(self, obj, save_persistent_id)
    599     raise PicklingError("Tuple returned by %s must have "
    600                         "two to six elements" % reduce)
    602 # Save the reduce() output and finally memoize the object
--> 603 self.save_reduce(obj=obj, *rv)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:717](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:717), in _Pickler.save_reduce(self, func, args, state, listitems, dictitems, state_setter, obj)
    715 if state is not None:
    716     if state_setter is None:
--> 717         save(state)
    718         write(BUILD)
    719     else:
    720         # If a state_setter is specified, call it instead of load_build
    721         # to update obj's with its previous state.
    722         # First, push state_setter and its tuple of expected arguments
    723         # (obj, state) onto the stack.

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:412](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:412), in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:560](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:560), in _Pickler.save(self, obj, save_persistent_id)
    558 f = self.dispatch.get(t)
    559 if f is not None:
--> 560     f(self, obj)  # Call unbound method with explicit self
    561     return
    563 # Check private dispatch table if any, or else
    564 # copyreg.dispatch_table

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:1212](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:1212), in save_module_dict(pickler, obj)
   1209     if is_dill(pickler, child=False) and pickler._session:
   1210         # we only care about session the first pass thru
   1211         pickler._first_pass = False
-> 1212     StockPickler.save_dict(pickler, obj)
   1213     logger.trace(pickler, "# D2")
   1214 return

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:972](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:972), in _Pickler.save_dict(self, obj)
    969     self.write(MARK + DICT)
    971 self.memoize(obj)
--> 972 self._batch_setitems(obj.items())

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:998](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:998), in _Pickler._batch_setitems(self, items)
    996     for k, v in tmp:
    997         save(k)
--> 998         save(v)
    999     write(SETITEMS)
   1000 elif n:

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\site-packages\dill\_dill.py:412](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/site-packages/dill/_dill.py:412), in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File [c:\Users\dowon\miniconda3\envs\kiwi\lib\pickle.py:578](file:///C:/Users/dowon/miniconda3/envs/kiwi/lib/pickle.py:578), in _Pickler.save(self, obj, save_persistent_id)
    576 reduce = getattr(obj, "__reduce_ex__", None)
    577 if reduce is not None:
--> 578     rv = reduce(self.proto)
    579 else:
    580     reduce = getattr(obj, "__reduce__", None)

TypeError: cannot pickle 'SwTokenizer' object
dill.detect.baditems(tk)
# repr 문제 해결한 그 브랜치에서 작업해서 에러가 나지 않습니다.
[KiwiTokenizer(name_or_path='kiwi-farm/roberta-base-32k', vocab_size=32000, model_max_length=512, is_fast=False, padding_side='right', truncation_side='right', special_tokens={'bos_token': '[BOS]', 'eos_token': '[EOS]', 'unk_token': '[UNK]', 'sep_token': '[SEP]', 'pad_token': '[PAD]', 'cls_token': '[CLS]', 'mask_token': '[MASK]'}, clean_up_tokenization_spaces=True)]
dill.detect.baditems(tk._tokenizer)
[<kiwipiepy.sw_tokenizer.SwTokenizer object at 0x0000023F82CE9040>]