zeroSteiner / rule-engine

A lightweight, optionally typed expression language with a custom grammar for matching arbitrary Python objects.
https://zerosteiner.github.io/rule-engine/
BSD 3-Clause "New" or "Revised" License
433 stars 54 forks source link

Pickling rules #81

Closed acuthber closed 5 months ago

acuthber commented 5 months ago

I have a lot of data I need to run through rules and wanted to speed up with multiprocessing. I am facing an issue with pickling rules with both pickle and dill.

import pickle
pickle.dumps(rule_engine.Rule("test"))
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[108], line 2
      1 import pickle
----> 2 pickle.dumps(rule_engine.Rule("test"))

TypeError: cannot pickle '_thread._local' object

It looks to be because of the rule builtins from the rule context. I am unsure if there is a way around it so wanted to ask if you had any insight. Thanks!

Using dill I get more info on what is wrong:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[109], line 4
      1 import dill
      3 with dill.detect.trace():
----> 4     dill.dumps(test_rule)

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:278, in dumps(obj, protocol, byref, fmode, recurse, **kwds)
    254 """
    255 Pickle an object to a string.
    256 
   (...)
    275 Default values for keyword arguments can be set in :mod:`dill.settings`.
    276 """
    277 file = StringIO()
--> 278 dump(obj, file, protocol, byref, fmode, recurse, **kwds)#, strictio)
    279 return file.getvalue()

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:250, in dump(obj, file, protocol, byref, fmode, recurse, **kwds)
    248 _kwds = kwds.copy()
    249 _kwds.update(dict(byref=byref, fmode=fmode, recurse=recurse))
--> 250 Pickler(file, protocol, **_kwds).dump(obj)
    251 return

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:418, in Pickler.dump(self, obj)
    416 def dump(self, obj): #NOTE: if settings change, need to update attributes
    417     logger.trace_setup(self)
--> 418     StockPickler.dump(self, obj)

File ~/miniconda3/lib/python3.10/pickle.py:487, in _Pickler.dump(self, obj)
    485 if self.proto >= 4:
    486     self.framer.start_framing()
--> 487 self.save(obj)
    488 self.write(STOP)
    489 self.framer.end_framing()

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:412, in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File ~/miniconda3/lib/python3.10/pickle.py:603, in _Pickler.save(self, obj, save_persistent_id)
    599     raise PicklingError("Tuple returned by %s must have "
    600                         "two to six elements" % reduce)
    602 # Save the reduce() output and finally memoize the object
--> 603 self.save_reduce(obj=obj, *rv)

File ~/miniconda3/lib/python3.10/pickle.py:717, in _Pickler.save_reduce(self, func, args, state, listitems, dictitems, state_setter, obj)
    715 if state is not None:
    716     if state_setter is None:
--> 717         save(state)
    718         write(BUILD)
    719     else:
    720         # If a state_setter is specified, call it instead of load_build
    721         # to update obj's with its previous state.
    722         # First, push state_setter and its tuple of expected arguments
    723         # (obj, state) onto the stack.

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:412, in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File ~/miniconda3/lib/python3.10/pickle.py:560, in _Pickler.save(self, obj, save_persistent_id)
    558 f = self.dispatch.get(t)
    559 if f is not None:
--> 560     f(self, obj)  # Call unbound method with explicit self
    561     return
    563 # Check private dispatch table if any, or else
    564 # copyreg.dispatch_table

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:1212, in save_module_dict(pickler, obj)
   1209     if is_dill(pickler, child=False) and pickler._session:
   1210         # we only care about session the first pass thru
   1211         pickler._first_pass = False
-> 1212     StockPickler.save_dict(pickler, obj)
   1213     logger.trace(pickler, "# D2")
   1214 return

File ~/miniconda3/lib/python3.10/pickle.py:972, in _Pickler.save_dict(self, obj)
    969     self.write(MARK + DICT)
    971 self.memoize(obj)
--> 972 self._batch_setitems(obj.items())

File ~/miniconda3/lib/python3.10/pickle.py:998, in _Pickler._batch_setitems(self, items)
    996     for k, v in tmp:
    997         save(k)
--> 998         save(v)
    999     write(SETITEMS)
   1000 elif n:

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:412, in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File ~/miniconda3/lib/python3.10/pickle.py:603, in _Pickler.save(self, obj, save_persistent_id)
    599     raise PicklingError("Tuple returned by %s must have "
    600                         "two to six elements" % reduce)
    602 # Save the reduce() output and finally memoize the object
--> 603 self.save_reduce(obj=obj, *rv)

File ~/miniconda3/lib/python3.10/pickle.py:717, in _Pickler.save_reduce(self, func, args, state, listitems, dictitems, state_setter, obj)
    715 if state is not None:
    716     if state_setter is None:
--> 717         save(state)
    718         write(BUILD)
    719     else:
    720         # If a state_setter is specified, call it instead of load_build
    721         # to update obj's with its previous state.
    722         # First, push state_setter and its tuple of expected arguments
    723         # (obj, state) onto the stack.

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:412, in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File ~/miniconda3/lib/python3.10/pickle.py:560, in _Pickler.save(self, obj, save_persistent_id)
    558 f = self.dispatch.get(t)
    559 if f is not None:
--> 560     f(self, obj)  # Call unbound method with explicit self
    561     return
    563 # Check private dispatch table if any, or else
    564 # copyreg.dispatch_table

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:1212, in save_module_dict(pickler, obj)
   1209     if is_dill(pickler, child=False) and pickler._session:
   1210         # we only care about session the first pass thru
   1211         pickler._first_pass = False
-> 1212     StockPickler.save_dict(pickler, obj)
   1213     logger.trace(pickler, "# D2")
   1214 return

File ~/miniconda3/lib/python3.10/pickle.py:972, in _Pickler.save_dict(self, obj)
    969     self.write(MARK + DICT)
    971 self.memoize(obj)
--> 972 self._batch_setitems(obj.items())

File ~/miniconda3/lib/python3.10/pickle.py:998, in _Pickler._batch_setitems(self, items)
    996     for k, v in tmp:
    997         save(k)
--> 998         save(v)
    999     write(SETITEMS)
   1000 elif n:

File ~/jupyter/.venv/lib/python3.10/site-packages/dill/_dill.py:412, in Pickler.save(self, obj, save_persistent_id)
    410     msg = "Can't pickle %s: attribute lookup builtins.generator failed" % GeneratorType
    411     raise PicklingError(msg)
--> 412 StockPickler.save(self, obj, save_persistent_id)

File ~/miniconda3/lib/python3.10/pickle.py:578, in _Pickler.save(self, obj, save_persistent_id)
    576 reduce = getattr(obj, "__reduce_ex__", None)
    577 if reduce is not None:
--> 578     rv = reduce(self.proto)
    579 else:
    580     reduce = getattr(obj, "__reduce__", None)

TypeError: cannot pickle '_thread._local' object
acuthber commented 5 months ago

It seems this is by design. The rule Context uses threading.local() which prevents pickling, eg:

import threading
import pickle

class Obj:
    def __init__(self):
        self.tls = threading.local()

pickle.dumps(Obj())

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[115], line 8
      5     def __init__(self):
      6         self.tls = threading.local()
----> 8 pickle.dumps(Obj())

TypeError: cannot pickle '_thread._local' object

I have no idea if this is a terrible idea but creating a custom context and overridingself._thread_local seems to work provided we also use dill rather than pickle

from rule_engine.engine import _ThreadLocalStorage

class MockTls: 
    def __init__(self):
        self.storage = _ThreadLocalStorage()

class CustomRuleContext(rule_engine.Context):
    def __init__(self, *args, **kwargs):
        predefined_functions = kwargs.pop("predefined_functions", {})
        super().__init__(*args, **kwargs)
        self.builtins = rule_engine.engine.builtins.Builtins.from_defaults(
            predefined_functions,
            timezone=self.default_timezone,
        )
        self._thread_local = MockTls()

c = CustomRuleContext()
rule = rule_engine.Rule("test", context=CustomRuleContext())
dill.dumps(rule)
acuthber commented 5 months ago

It is neglible to just recreate the context as well as the rules (in my case < 100) within every new process the process pool creates. I do not think it is worth the hassle to allow rules/context to be pickled.