Open w601sxs opened 4 years ago
What version of xgboost, and what's the output of client.get_versions
?
Hi Tom,
Xgboost version is 0.90,
and looks like this gets installed with pip install dask-ml[complete]
Collecting xgboost; extra == "complete" (from dask-ml[complete]) Using cached https://files.pythonhosted.org/packages/c1/24/5fe7237b2eca13ee0cfb100bec8c23f4e69ce9df852a64b0493d49dae4e0/xgboost-0.90-py2.py3-none-manylinux1_x86_64.whl
And output of client.get_versions() is ...
{'scheduler': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'workers': {'tcp://10.0.100.158:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.107.26:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.126.71:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.27.54:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.28.104:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.46.0:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.61.79:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}},
'tcp://10.0.66.44:9000': {'host': (('python', '3.6.9.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-127.182.amzn2.x86_64'),
('machine', 'x86_64'),
('processor', ''),
('byteorder', 'little'),
('LC_ALL', 'C.UTF-8'),
('LANG', 'C.UTF-8'),
('LOCALE', 'en_US.UTF-8')),
'packages': {'required': (('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.1'),
('cloudpickle', '1.2.2'),
('tornado', '6.0.3'),
('toolz', '0.10.0')),
'optional': (('numpy', '1.17.4'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', '1.7.0'))}}},
'client': {'host': [('python', '3.6.5.final.0'),
('python-bits', 64),
('OS', 'Linux'),
('OS-release', '4.14.152-98.182.amzn1.x86_64'),
('machine', 'x86_64'),
('processor', 'x86_64'),
('byteorder', 'little'),
('LC_ALL', 'None'),
('LANG', 'en_US.UTF-8'),
('LOCALE', 'en_US.UTF-8')],
'packages': {'required': [('dask', '2.9.0'),
('distributed', '2.9.0'),
('msgpack', '0.6.0'),
('cloudpickle', '0.5.3'),
('tornado', '5.0.2'),
('toolz', '0.9.0')],
'optional': [('numpy', '1.14.3'),
('pandas', '0.25.3'),
('bokeh', '1.4.0'),
('lz4', None),
('dask_ml', '1.1.1'),
('blosc', None)]}}}
Got this error too, deploying through dask-kubernetes.
Tried with mix of versions from dask 2.8.1 to latest (with matching distributed, tornado, etc... via get_versions across the cluster)
Installing xgboost through pip install dask-xgboost
Can either of you provide a reproducible example? http://matthewrocklin.com/blog/work/2018/02/28/minimal-bug-reports.
For example, does this fail?
In [1]: from dask_ml.datasets import make_classification
In [2]: from distributed import Client
In [3]: c = Client()
In [4]: X, y = make_classification(chunks=20)
In [5]: import dask_xgboost as dxgb
In [6]: bst = dxgb.train(c, {}, X, y)
Hi Tom,
I tried your example, and it throws the following error:
distributed.protocol.pickle - INFO - Failed to deserialize b"\x80\x04\x95\x8c\x02\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x19_rehydrate_skeleton_class\x94\x93\x94(h\x00\x8c\x14_make_skeleton_class\x94\x93\x94(h\x00\x8c\r_builtin_type\x94\x93\x94\x8c\tClassType\x94\x85\x94R\x94\x8c\x0cXGBoostError\x94\x8c\x08builtins\x94\x8c\tException\x94\x93\x94\x85\x94}\x94\x8c\x07doc\x94\x8c Error thrown by xgboost trainer.\x94s\x8c ffb5442a993b47439fd682d3734c1d3e\x94Nt\x94R\x94}\x94\x8c\nmodule\x94\x8c\x0cxgboost.core\x94stRXe\x01\x00\x00XGBoost Library (libxgboost.so) could not be loaded.\nLikely causes:\n OpenMP runtime is not installed (vcomp140.dll or libgomp-1.dll for Windows, libgomp.so for UNIX-like OSes)\n You are running 32-bit Python on a 64-bit OS\nError message(s): ['dlopen: cannot load any more object with static TLS', 'dlopen: cannot load any more object with static TLS']\n\x94\x85\x94R\x94." Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> distributed.protocol.core - CRITICAL - Failed to deserialize Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/core.py", line 124, in loads value = _deserialize(head, fs, deserializers=deserializers) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 268, in deserialize return loads(header, frames) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 62, in pickle_loads return pickle.loads(b"".join(frames)) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> distributed.utils - ERROR - Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/utils.py", line 663, in log_errors yield File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/client.py", line 1151, in _handle_report msgs = await self.scheduler_comm.comm.read() File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/tcp.py", line 222, in read frames, deserialize=self.deserialize, deserializers=deserializers File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/utils.py", line 69, in from_frames res = _from_frames() File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/utils.py", line 55, in _from_frames frames, deserialize=deserialize, deserializers=deserializers File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/core.py", line 124, in loads value = _deserialize(head, fs, deserializers=deserializers) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 268, in deserialize return loads(header, frames) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 62, in pickle_loads return pickle.loads(b"".join(frames)) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'>
and doesn't return control to the notebook from dask (still keeps running/waiting [*])
Trying to see if this formats the error better
distributed.protocol.pickle - INFO - Failed to deserialize b"\x80\x04\x95\x8c\x02\x00\x00\x00\x00\x00\x00\x8c\x17cloudpickle.cloudpickle\x94\x8c\x19_rehydrate_skeleton_class\x94\x93\x94(h\x00\x8c\x14_make_skeleton_class\x94\x93\x94(h\x00\x8c\r_builtin_type\x94\x93\x94\x8c\tClassType\x94\x85\x94R\x94\x8c\x0cXGBoostError\x94\x8c\x08builtins\x94\x8c\tException\x94\x93\x94\x85\x94}\x94\x8c\x07__doc__\x94\x8c Error thrown by xgboost trainer.\x94s\x8c ffb5442a993b47439fd682d3734c1d3e\x94Nt\x94R\x94}\x94\x8c\n__module__\x94\x8c\x0cxgboost.core\x94stRXe\x01\x00\x00XGBoost Library (libxgboost.so) could not be loaded.\nLikely causes:\n * OpenMP runtime is not installed (vcomp140.dll or libgomp-1.dll for Windows, libgomp.so for UNIX-like OSes)\n * You are running 32-bit Python on a 64-bit OS\nError message(s): ['dlopen: cannot load any more object with static TLS', 'dlopen: cannot load any more object with static TLS']\n\x94\x85\x94R\x94." Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> distributed.protocol.core - CRITICAL - Failed to deserialize Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/core.py", line 124, in loads value = _deserialize(head, fs, deserializers=deserializers) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 268, in deserialize return loads(header, frames) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 62, in pickle_loads return pickle.loads(b"".join(frames)) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> distributed.utils - ERROR - Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'> Traceback (most recent call last): File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/utils.py", line 663, in log_errors yield File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/client.py", line 1151, in _handle_report msgs = await self.scheduler_comm.comm.read() File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/tcp.py", line 222, in read frames, deserialize=self.deserialize, deserializers=deserializers File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/utils.py", line 69, in from_frames res = _from_frames() File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/comm/utils.py", line 55, in _from_frames frames, deserialize=deserialize, deserializers=deserializers File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/core.py", line 124, in loads value = _deserialize(head, fs, deserializers=deserializers) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 268, in deserialize return loads(header, frames) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/serialize.py", line 62, in pickle_loads return pickle.loads(b"".join(frames)) File "/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/distributed/protocol/pickle.py", line 59, in loads return pickle.loads(x) AttributeError: Can't get attribute '_make_skeleton_class' on <module 'cloudpickle.cloudpickle' from '/home/ec2-user/anaconda3/envs/python3/lib/python3.6/site-packages/cloudpickle/cloudpickle.py'>
It looks like you have a version mismatch between your client and the scheduler / workers for at least cloudpickle and msgpack. Can you address those first?
Using get_versions(check=True)
may help.
I have a 4 worker cluster with Dask; all examples on arrays and dataframes work but when I try the xgboost example I get an error:
ERROR 'coroutine' object is not iterable
Full error is: