rapidsai / cuml

cuML - RAPIDS Machine Learning Library
https://docs.rapids.ai/api/cuml/stable/
Apache License 2.0
4.2k stars 527 forks source link

[BUG] raft::cuda_error thrown from experimental RF backend #3107

Closed hcho3 closed 3 years ago

hcho3 commented 3 years ago

Describe the bug The experimental RF backend crashes with an exception raft::cuda_error.

Output: [W] [19:16:40.163584] Using experimental backend for growing trees

[W] [19:16:43.017118] Using experimental backend for growing trees

[W] [19:16:45.835559] Using experimental backend for growing trees

[W] [19:16:48.644952] Using experimental backend for growing trees

[W] [19:16:51.465764] Using experimental backend for growing trees

[W] [19:16:54.285171] Using experimental backend for growing trees

[W] [19:16:57.105698] Using experimental backend for growing trees

[W] [19:16:59.935863] Using experimental backend for growing trees

[W] [19:17:02.781407] Using experimental backend for growing trees

[W] [19:17:05.623277] Using experimental backend for growing trees

terminate called after throwing an instance of 'raft::cuda_error' what(): CUDA error encountered at: file=/home/phcho/Desktop/cuml/cpp/build/raft/src/raft/cpp/include/raft/mr/host/allocator.hpp line=48: call='cudaFreeHost(p)', Reason=cudaErrorIllegalAddress:an illegal memory access was encountered

Obtained 64 stack frames
#0 in /home/phcho/miniconda3/envs/cuml_dev/lib/python3.8/site-packages/cuml/raft/common/handle.cpython-38-x86_64-linux-gnu.so(_ZN4raft9exception18collect_call_stackEv+0x46) [0x7f43c30bf0b6]
#1 in /home/phcho/miniconda3/envs/cuml_dev/lib/python3.8/site-packages/cuml/raft/common/handle.cpython-38-x86_64-linux-gnu.so(_ZN4raft10cuda_errorC1ERKNSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE+0x69) [0x7f43c30bf7d9]
#2 in /home/phcho/miniconda3/envs/cuml_dev/lib/python3.8/site-packages/cuml/raft/common/handle.cpython-38-x86_64-linux-gnu.so(_ZN4raft2mr4host17default_allocator10deallocateEPvmP11CUstream_st+0x12c) [0x7f43c30bf94c]
#3 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN4raft2mr11buffer_baseIcNS0_4host9allocatorEED2Ev+0x98) [0x7f43e5bb9908]
#4 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML12DecisionTree9grow_treeINS0_9RegTraitsIfiEEffiEEvSt10shared_ptrIN4raft2mr6device9allocatorEES4_INS6_4host9allocatorEEPKT0_T2_SG_PKT1_SF_PSG_SK_iiRKNS0_18DecisionTreeParamsEP11CUstream_stRSt6vectorI14SparseTreeNodeISD_SH_iESaISS_EERSG_SW_+0xe33) [0x7f43e5bffa83]
#5 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML12DecisionTree16DecisionTreeBaseIffE5plantERSt6vectorI14SparseTreeNodeIffiESaIS5_EEPKfiiSA_Pjiii+0x561) [0x7f43e5c005e1]
#6 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML12DecisionTree16DecisionTreeBaseIffE8base_fitESt10shared_ptrIN4raft2mr6device9allocatorEES3_INS5_4host9allocatorEEP11CUstream_stPKfiiSF_PjiiRSt6vectorI14SparseTreeNodeIffiESaISJ_EEibS3_I15TemporaryMemoryIffEE+0x1e9) [0x7f43e5c147e9]
#7 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML12DecisionTree21DecisionTreeRegressorIfE3fitESt10shared_ptrIN4raft2mr6device9allocatorEES3_INS5_4host9allocatorEEP11CUstream_stPKfiiSF_PjiRPNS0_16TreeMetaDataNodeIffEENS0_18DecisionTreeParamsES3_I15TemporaryMemoryIffEE+0xec) [0x7f43e5c1525c]
#8 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(+0x48cc7a) [0x7f43e5de8c7a]
#9 in /home/phcho/miniconda3/envs/cuml_dev/lib/python3.8/site-packages/sklearn/utils/../../../../libgomp.so.1(GOMP_parallel+0x112) [0x7f44f705f492]
#10 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML11rfRegressorIfE3fitERKN4raft8handle_tEPKfiiPfRPNS_20RandomForestMetaDataIffEE+0x6e4) [0x7f43e5ddd6f4]
#11 in /home/phcho/miniconda3/envs/cuml_dev/lib/libcuml++.so(_ZN2ML3fitERKN4raft8handle_tERPNS_20RandomForestMetaDataIffEEPfiiS8_NS_9RF_paramsEi+0x1e4) [0x7f43e5dd4064]
#12 in /home/phcho/miniconda3/envs/cuml_dev/lib/python3.8/site-packages/cuml/ensemble/randomforestregressor.cpython-38-x86_64-linux-gnu.so(+0x2d9f7) [0x7f43c2a879f7]
#13 in python(_PyObject_MakeTpCall+0x3bf) [0x556e5c85aa1f]
#14 in python(+0x168f4a) [0x556e5c893f4a]
#15 in python(PyObject_Call+0x7d) [0x556e5c8609bd]
#16 in python(_PyEval_EvalFrameDefault+0x1e6a) [0x556e5c90957a]
#17 in python(_PyEval_EvalCodeWithName+0x929) [0x556e5c8f3b59]
#18 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#19 in python(PyObject_Call+0x2e9) [0x556e5c860c29]
#20 in python(_PyEval_EvalFrameDefault+0x1e6a) [0x556e5c90957a]
#21 in python(_PyEval_EvalCodeWithName+0x260) [0x556e5c8f3490]
#22 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#23 in python(_PyEval_EvalFrameDefault+0x71a) [0x556e5c907e2a]
#24 in python(_PyFunction_Vectorcall+0x1b7) [0x556e5c8f4637]
#25 in python(_PyObject_FastCallDict+0x5f) [0x556e5c88904f]
#26 in python(+0x19c5ab) [0x556e5c8c75ab]
#27 in python(_PyObject_MakeTpCall+0x3bf) [0x556e5c85aa1f]
#28 in python(_PyEval_EvalFrameDefault+0x4de0) [0x556e5c90c4f0]
#29 in python(_PyFunction_Vectorcall+0x1b7) [0x556e5c8f4637]
#30 in python(+0x1b3da0) [0x556e5c8deda0]
#31 in python(_PyObject_MakeTpCall+0x228) [0x556e5c85a888]
#32 in python(_PyEval_EvalFrameDefault+0x4de0) [0x556e5c90c4f0]
#33 in python(_PyEval_EvalCodeWithName+0x260) [0x556e5c8f3490]
#34 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#35 in python(+0x168dbe) [0x556e5c893dbe]
#36 in python(_PyEval_EvalFrameDefault+0x15a6) [0x556e5c908cb6]
#37 in python(_PyFunction_Vectorcall+0x1b7) [0x556e5c8f4637]
#38 in python(_PyEval_EvalFrameDefault+0x4bf) [0x556e5c907bcf]
#39 in python(_PyFunction_Vectorcall+0x1b7) [0x556e5c8f4637]
#40 in python(_PyEval_EvalFrameDefault+0x4bf) [0x556e5c907bcf]
#41 in python(_PyEval_EvalCodeWithName+0x929) [0x556e5c8f3b59]
#42 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#43 in python(_PyObject_FastCallDict+0x5f) [0x556e5c88904f]
#44 in python(+0x19c5ab) [0x556e5c8c75ab]
#45 in python(_PyObject_MakeTpCall+0x3bf) [0x556e5c85aa1f]
#46 in python(_PyEval_EvalFrameDefault+0x4de0) [0x556e5c90c4f0]
#47 in python(_PyEval_EvalCodeWithName+0x929) [0x556e5c8f3b59]
#48 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#49 in python(PyObject_Call+0x2e9) [0x556e5c860c29]
#50 in python(_PyEval_EvalFrameDefault+0x1e6a) [0x556e5c90957a]
#51 in python(_PyEval_EvalCodeWithName+0x929) [0x556e5c8f3b59]
#52 in python(_PyFunction_Vectorcall+0x594) [0x556e5c8f4a14]
#53 in python(_PyEval_EvalFrameDefault+0x15a6) [0x556e5c908cb6]
#54 in python(_PyEval_EvalCodeWithName+0x260) [0x556e5c8f3490]
#55 in python(PyEval_EvalCode+0x23) [0x556e5c8f4d03]
#56 in python(+0x23c722) [0x556e5c967722]
#57 in python(+0x24f212) [0x556e5c97a212]
#58 in python(PyRun_FileExFlags+0x9e) [0x556e5c97d2fe]
#59 in python(PyRun_SimpleFileExFlags+0x1b9) [0x556e5c97d4e9]
#60 in python(Py_RunMain+0x39e) [0x556e5c97d99e]
#61 in python(Py_BytesMain+0x39) [0x556e5c97db99]
#62 in /lib/x86_64-linux-gnu/libc.so.6(__libc_start_main+0xe7) [0x7f44fb4f2b97]
#63 in python(+0x1f6eb9) [0x556e5c921eb9]

Aborted (core dumped)

Steps/Code to reproduce bug

from sklearn.model_selection import cross_validate, KFold
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import make_regression
from cuml.ensemble import RandomForestRegressor as cumlRandomForestRegressor
import numpy as np

X, y = make_regression(n_samples=645187, n_features=259, random_state=100)
X, y = X.astype(np.float32), y.astype(np.float32)

params = {
    'n_estimators': 100,
    'max_depth': 25,
    'bootstrap': True,
    'max_features': 1.0,
    'random_state': 0,
    'rows_sample': 0.001,
    'use_experimental_backend': True
}

cuml_clf = cumlRandomForestRegressor(n_bins=8, n_streams=1, split_algo=1, **params)
cv_fold = KFold(n_splits=10, shuffle=True, random_state=2020)

scores = cross_validate(cuml_clf, X, y, cv=cv_fold, return_train_score=True,
                        scoring='neg_root_mean_squared_error')

Expected behavior The program should not crash.

Environment details (please complete the following information):

Conda environment detail ("conda list")

``` # packages in environment at /home/phcho/miniconda3/envs/cuml_dev: # # Name Version Build Channel _libgcc_mutex 0.1 conda_forge conda-forge _openmp_mutex 4.5 1_llvm conda-forge abseil-cpp 20200225.2 he1b5a44_2 conda-forge aiohttp 3.7.2 py38h1e0a361_0 conda-forge alabaster 0.7.12 py_0 conda-forge appdirs 1.4.4 pyh9f0ad1d_0 conda-forge argon2-cffi 20.1.0 py38h1e0a361_2 conda-forge arrow-cpp 1.0.1 py38h41c2bc5_14_cuda conda-forge arrow-cpp-proc 2.0.0 cuda conda-forge asn1crypto 1.4.0 pyh9f0ad1d_0 conda-forge async-timeout 3.0.1 py_1000 conda-forge async_generator 1.10 py_0 conda-forge atk 2.36.0 3 conda-forge atk-1.0 2.36.0 h63f31ab_3 conda-forge attrs 20.2.0 pyh9f0ad1d_0 conda-forge autoconf 2.69 pl526h14c3975_9 conda-forge automake 1.16.2 pl526_1 conda-forge aws-c-common 0.4.59 he1b5a44_0 conda-forge aws-c-event-stream 0.1.6 h84e28f3_5 conda-forge aws-checksums 0.1.9 he252421_2 conda-forge aws-sam-translator 1.27.0 pyh9f0ad1d_0 conda-forge aws-sdk-cpp 1.8.63 h9b98462_0 conda-forge aws-xray-sdk 0.95 py_0 conda-forge babel 2.8.0 py_0 conda-forge backcall 0.2.0 pyh9f0ad1d_0 conda-forge backports 1.0 py_2 conda-forge backports.functools_lru_cache 1.6.1 py_0 conda-forge backports.tempfile 1.0 py_0 conda-forge backports.weakref 1.0.post1 py38h32f6830_1002 conda-forge beautifulsoup4 4.9.3 pyhb0f4dca_0 conda-forge benchmark 1.5.1 he1b5a44_2 conda-forge black 19.10b0 py_4 conda-forge blas 2.17 openblas conda-forge bleach 3.2.1 pyh9f0ad1d_0 conda-forge blinker 1.4 py_1 conda-forge blosc 1.20.1 he1b5a44_0 conda-forge bokeh 2.2.3 py38h32f6830_0 conda-forge boost 1.72.0 py38h1e42940_1 conda-forge boost-cpp 1.72.0 h8e57a91_0 conda-forge boto 2.49.0 py_0 conda-forge boto3 1.16.8 pyhd8ed1ab_0 conda-forge botocore 1.19.8 pyhd3deb0d_0 conda-forge brotli 1.0.9 he1b5a44_3 conda-forge brotlipy 0.7.0 py38h8df0ef7_1001 conda-forge bzip2 1.0.8 h516909a_3 conda-forge c-ares 1.16.1 h516909a_3 conda-forge ca-certificates 2020.6.20 hecda079_0 conda-forge cachetools 4.1.1 py_0 conda-forge cairo 1.16.0 hcf35c78_1003 conda-forge certifi 2020.6.20 py38h924ce5b_2 conda-forge cffi 1.14.3 py38h1bdcb99_1 conda-forge cfitsio 3.470 hce51eda_7 conda-forge cfn-lint 0.39.0 py38h32f6830_0 conda-forge chardet 3.0.4 py38h924ce5b_1008 conda-forge clang 8.0.1 hc9558a2_2 conda-forge clang-tools 8.0.1 hc9558a2_2 conda-forge clangxx 8.0.1 2 conda-forge click 7.1.2 pyh9f0ad1d_0 conda-forge click-plugins 1.1.1 py_0 conda-forge cligj 0.7.0 py_0 conda-forge cloudpickle 1.6.0 py_0 conda-forge cmake 3.17.0 h28c56e5_0 conda-forge cmake_setuptools 0.1.3 py_0 rapidsai cmarkgfm 0.4.2 py38h1e0a361_3 conda-forge colorama 0.4.4 pyh9f0ad1d_0 conda-forge colorcet 2.0.1 py_0 conda-forge commonmark 0.9.1 py_0 conda-forge conda 4.8.3 py38h32f6830_2 conda-forge conda-build 3.20.3 py38h32f6830_0 conda-forge conda-package-handling 1.7.2 py38h8df0ef7_0 conda-forge conda-verify 3.1.1 py38h32f6830_1002 conda-forge cookies 2.2.1 py_0 conda-forge coverage 5.3 py38h1e0a361_1 conda-forge cryptography 3.2 py38hb23e4d4_0 conda-forge cudatoolkit 10.2.89 h6bb024c_0 nvidia cudf 0.17.0a201029 cuda_10.2_py38_gea0b5d285c_132 rapidsai-nightly cudnn 7.6.5 cuda10.2_0 cuml 0.14.0+2498.g7dbd33d09.dirty pypi_0 pypi cupy 8.1.0 py38h0c20449_0 conda-forge curl 7.71.1 he644dc0_8 conda-forge cycler 0.10.0 py_2 conda-forge cyrus-sasl 2.1.27 h063b49f_1 conda-forge cython 0.29.21 py38h348cfbe_1 conda-forge cytoolz 0.11.0 py38h1e0a361_1 conda-forge dask 2.30.0+32.g94bdd4e3 pypi_0 pypi dask-cuda 0.17.0a201022 py38_19 rapidsai-nightly dask-cudf 0.17.0a201029 py38_gea0b5d285c_132 rapidsai-nightly dask-glm 0.2.0 py_1 conda-forge dask-labextension 3.0.0 py_0 conda-forge dask-ml 1.7.0 py_0 conda-forge datashader 0.11.1 pyh9f0ad1d_0 conda-forge datashape 0.5.4 py_1 conda-forge dbus 1.13.6 h7a60e0d_1 conda-forge decorator 4.4.2 py_0 conda-forge defusedxml 0.6.0 py_0 conda-forge distributed 2.30.0+14.g24007c2b pypi_0 pypi dlpack 0.3 he1b5a44_1 conda-forge docker-py 4.3.1 py38h32f6830_1 conda-forge docker-pycreds 0.4.0 py_0 conda-forge docutils 0.16 py38h924ce5b_2 conda-forge double-conversion 3.1.5 he1b5a44_2 conda-forge doxygen 1.8.20 h0e019cf_0 conda-forge ecdsa 0.13 py_0 conda-forge entrypoints 0.3 py38h32f6830_1002 conda-forge expat 2.2.9 he1b5a44_2 conda-forge fa2 0.3.5 py38h1e0a361_0 conda-forge faiss-proc 1.0.0 cuda rapidsai-nightly fastavro 1.0.0.post1 py38h1e0a361_1 conda-forge fastrlock 0.5 py38h950e882_1 conda-forge feather-format 0.4.1 pyh9f0ad1d_0 conda-forge filelock 3.0.12 pyh9f0ad1d_0 conda-forge filterpy 1.4.5 py_1 conda-forge fiona 1.8.13 py38h033e0f6_1 conda-forge flake8 3.8.4 py_0 conda-forge flask 1.1.2 pyh9f0ad1d_0 conda-forge flatbuffers 1.10.0 hf484d3e_1002 conda-forge font-ttf-dejavu-sans-mono 2.37 hab24e00_0 conda-forge font-ttf-inconsolata 2.001 hab24e00_0 conda-forge font-ttf-source-code-pro 2.030 hab24e00_0 conda-forge font-ttf-ubuntu 0.83 hab24e00_0 conda-forge fontconfig 2.13.1 h86ecdb6_1001 conda-forge fonts-conda-ecosystem 1 0 conda-forge fonts-conda-forge 1 0 conda-forge freetype 2.10.4 he06d7ca_0 conda-forge freexl 1.0.5 h516909a_1002 conda-forge fribidi 1.0.10 h516909a_0 conda-forge fsspec 0.8.4 py_0 conda-forge future 0.18.2 py38h32f6830_2 conda-forge gcsfs 0.7.1 py_0 conda-forge gdal 3.0.4 py38h172510d_10 conda-forge gdk-pixbuf 2.38.2 h3f25603_6 conda-forge geopandas 0.8.1 py_0 conda-forge geos 3.8.1 he1b5a44_0 conda-forge geotiff 1.6.0 h05acad5_0 conda-forge gettext 0.19.8.1 hf34092f_1004 conda-forge gflags 2.2.2 he1b5a44_1004 conda-forge giflib 5.2.1 h516909a_2 conda-forge git 2.29.1 pl5262he3b78ea_0 conda-forge glib 2.66.2 he1b5a44_0 conda-forge glob2 0.7 py_0 conda-forge glog 0.4.0 h49b9bf7_3 conda-forge gmock 1.10.0 4 conda-forge gmp 6.2.0 he1b5a44_3 conda-forge gobject-introspection 1.66.1 py38he66682d_2 conda-forge google-auth 1.22.0 py_0 conda-forge google-auth-oauthlib 0.4.2 pyhd8ed1ab_0 conda-forge graphite2 1.3.13 he1b5a44_1001 conda-forge graphviz 2.42.3 h6939c30_2 conda-forge grpc-cpp 1.32.0 h7997a97_1 conda-forge gtest 1.10.0 hc9558a2_4 conda-forge gtk2 2.24.32 h586f36d_1 conda-forge gts 0.7.6 h17b2bb4_1 conda-forge harfbuzz 2.4.0 h9f30f68_3 conda-forge hdf4 4.2.13 hf30be14_1003 conda-forge hdf5 1.10.6 nompi_h54c07f9_1110 conda-forge heapdict 1.0.1 py_0 conda-forge holoviews 1.13.5 pyh9f0ad1d_0 conda-forge httpretty 1.0.2 py_0 conda-forge hypothesis 5.39.0 pyhd8ed1ab_0 conda-forge icu 64.2 he1b5a44_1 conda-forge idna 2.8 py38_1000 conda-forge imagesize 1.2.0 py_0 conda-forge importlib-metadata 2.0.0 py_1 conda-forge importlib_metadata 2.0.0 1 conda-forge iniconfig 1.1.1 pyh9f0ad1d_0 conda-forge ipykernel 5.3.4 py38h1cdfbd6_1 conda-forge ipython 7.15.0 py38h32f6830_0 conda-forge ipython_genutils 0.2.0 py_1 conda-forge ipywidgets 7.5.1 pyh9f0ad1d_1 conda-forge isort 5.0.9 py38h32f6830_0 conda-forge itsdangerous 1.1.0 py_0 conda-forge jedi 0.17.2 py38h32f6830_1 conda-forge jeepney 0.4.3 py_0 conda-forge jinja2 2.11.2 pyh9f0ad1d_0 conda-forge jmespath 0.10.0 pyh9f0ad1d_0 conda-forge joblib 0.17.0 py_0 conda-forge jpeg 9d h516909a_0 conda-forge json-c 0.13.1 hbfbb72e_1002 conda-forge json5 0.9.5 pyh9f0ad1d_0 conda-forge jsondiff 1.1.2 py_0 conda-forge jsonpatch 1.24 py_0 conda-forge jsonpickle 1.4.1 pyh9f0ad1d_0 conda-forge jsonpointer 2.0 py_0 conda-forge jsonschema 3.2.0 py_2 conda-forge junit-xml 1.9 pyh9f0ad1d_0 conda-forge jupyter-server-proxy 1.5.0 py_0 conda-forge jupyter_client 6.1.7 py_0 conda-forge jupyter_core 4.6.3 py38h32f6830_2 conda-forge jupyter_sphinx 0.3.1 py38h32f6830_1 conda-forge jupyterlab 2.1.5 py_0 conda-forge jupyterlab_pygments 0.1.2 pyh9f0ad1d_0 conda-forge jupyterlab_server 1.2.0 py_0 conda-forge kealib 1.4.13 h33137a7_1 conda-forge keyring 21.4.0 py38h32f6830_2 conda-forge kiwisolver 1.3.0 py38hbf85e49_0 conda-forge krb5 1.17.1 hfafb76e_3 conda-forge lapack 3.6.1 ha44fe06_2 conda-forge lcms2 2.11 hbd6801e_0 conda-forge ld_impl_linux-64 2.35 h769bd43_9 conda-forge libarchive 3.4.3 hf837322_0 conda-forge libblas 3.8.0 17_openblas conda-forge libcblas 3.8.0 17_openblas conda-forge libcudf 0.17.0a201029 cuda10.2_gea0b5d285c_132 rapidsai-nightly libcumlprims 0.17.0a201007 cuda10.2_ge1f4c3a_0 rapidsai-nightly libcurl 7.71.1 hcdd3856_8 conda-forge libcypher-parser 0.6.2 1 rapidsai libdap4 3.20.6 h1d1bd15_1 conda-forge libedit 3.1.20191231 he28a2e2_2 conda-forge libev 4.33 h516909a_1 conda-forge libevent 2.1.10 hcdb4288_3 conda-forge libfaiss 1.6.3 he61ee18_3_cuda conda-forge libffi 3.2.1 he1b5a44_1007 conda-forge libgcc-ng 7.5.0 h5dbcf3e_17 conda-forge libgcrypt 1.8.4 hf484d3e_1000 conda-forge libgdal 3.0.4 he6a97d6_10 conda-forge libgfortran 3.0.0 1 conda-forge libgfortran-ng 7.5.0 hae1eefd_17 conda-forge libgfortran4 7.5.0 hae1eefd_17 conda-forge libglib 2.66.2 h0dae87d_0 conda-forge libgpg-error 1.39 he1b5a44_0 conda-forge libgsasl 1.8.0 2 conda-forge libhwloc 2.3.0 h3c4fd83_0 conda-forge libiconv 1.16 h516909a_0 conda-forge libkml 1.3.0 hd79254b_1012 conda-forge liblapack 3.8.0 17_openblas conda-forge liblapacke 3.8.0 17_openblas conda-forge liblief 0.10.1 he1b5a44_2 conda-forge libllvm10 10.0.1 he513fc3_3 conda-forge libllvm8 8.0.1 hc9558a2_0 conda-forge libnetcdf 4.7.4 nompi_hefab0ff_106 conda-forge libnghttp2 1.41.0 h8cfc5f6_2 conda-forge libntlm 1.4 h516909a_1002 conda-forge libopenblas 0.3.10 pthreads_hb3c22a3_5 conda-forge libpng 1.6.37 hed695b0_2 conda-forge libpq 12.3 h5513abc_2 conda-forge libprotobuf 3.13.0.1 h8b12597_0 conda-forge librdkafka 1.5.0 h40bdf00_0 conda-forge librmm 0.17.0a201022 cuda10.2_g210c863_36 rapidsai-nightly libsodium 1.0.18 h516909a_1 conda-forge libspatialindex 1.9.3 he1b5a44_3 conda-forge libspatialite 4.3.0a h2482549_1038 conda-forge libssh2 1.9.0 hab1572f_5 conda-forge libstdcxx-ng 7.5.0 h2ae2ef3_17 conda-forge libthrift 0.13.0 h5aa387f_6 conda-forge libtiff 4.1.0 hc7e4089_6 conda-forge libtool 2.4.6 hebb1f50_1006 conda-forge libutf8proc 2.5.0 h516909a_2 conda-forge libuuid 2.32.1 h14c3975_1000 conda-forge libuv 1.34.0 h516909a_0 conda-forge libwebp 1.1.0 h56121f0_4 conda-forge libwebp-base 1.1.0 h516909a_3 conda-forge libxcb 1.13 h14c3975_1002 conda-forge libxml2 2.9.10 hee79883_0 conda-forge lightgbm 3.0.0 py38h950e882_1 conda-forge llvm-openmp 11.0.0 hfc4b9b4_1 conda-forge llvmlite 0.34.0 py38h4f45e52_2 conda-forge locket 0.2.0 py_2 conda-forge lz4-c 1.9.2 he1b5a44_3 conda-forge lzo 2.10 h516909a_1000 conda-forge m4 1.4.18 h516909a_1001 conda-forge make 4.3 hd18ef5c_1 conda-forge markdown 3.3.3 pyh9f0ad1d_0 conda-forge markupsafe 1.1.1 py38h8df0ef7_2 conda-forge matplotlib-base 3.3.2 py38h4d1ce4f_1 conda-forge mccabe 0.6.1 py_1 conda-forge mimesis 4.0.0 pyh9f0ad1d_0 conda-forge mistune 0.8.4 py38h1e0a361_1002 conda-forge mock 4.0.2 py38h32f6830_1 conda-forge more-itertools 8.5.0 py_0 conda-forge moto 1.3.14 py_0 conda-forge msgpack-python 1.0.0 py38hbf85e49_2 conda-forge multidict 4.7.5 py38h1e0a361_2 conda-forge multipledispatch 0.6.0 py_0 conda-forge munch 2.5.0 py_0 conda-forge mypy 0.782 py_0 conda-forge mypy_extensions 0.4.3 py38h32f6830_2 conda-forge nbclient 0.5.1 py_0 conda-forge nbconvert 6.0.7 py38h32f6830_2 conda-forge nbformat 5.0.8 py_0 conda-forge nbsphinx 0.7.1 pyh9f0ad1d_0 conda-forge nccl 2.7.8.1 hc6a2c23_1 conda-forge ncurses 6.2 he1b5a44_2 conda-forge nest-asyncio 1.4.1 py_0 conda-forge networkx 2.5 py_0 conda-forge nltk 3.4.4 py_0 conda-forge nodejs 13.13.0 hf5d1a2b_0 conda-forge notebook 6.1.4 py38h32f6830_1 conda-forge numba 0.51.2 py38hc5bc63f_0 conda-forge numexpr 2.7.1 py38hc5bc63f_3 conda-forge numpy 1.19.2 py38hf89b668_1 conda-forge numpydoc 1.1.0 py_1 conda-forge nvtx 0.2.1 py38h1e0a361_2 conda-forge oauthlib 3.0.1 py_0 conda-forge olefile 0.46 pyh9f0ad1d_1 conda-forge openjpeg 2.3.1 h981e76c_3 conda-forge openssl 1.1.1h h516909a_0 conda-forge orc 1.6.5 hd3605a7_0 conda-forge packaging 20.4 pyh9f0ad1d_0 conda-forge pandas 1.1.3 py38hddd6c8b_2 conda-forge pandoc 1.19.2 0 conda-forge pandocfilters 1.4.2 py_1 conda-forge panel 0.10.1 pyhd8ed1ab_0 conda-forge pango 1.42.4 h7062337_4 conda-forge param 1.10.0 py_0 conda-forge parquet-cpp 1.5.1 2 conda-forge parso 0.7.1 pyh9f0ad1d_0 conda-forge partd 1.1.0 py_0 conda-forge patchelf 0.11 he1b5a44_0 conda-forge pathspec 0.8.0 pyh9f0ad1d_0 conda-forge patsy 0.5.1 py_0 conda-forge pcre 8.44 he1b5a44_0 conda-forge perl 5.26.2 h36c2ea0_1008 conda-forge pexpect 4.8.0 pyh9f0ad1d_2 conda-forge pickleshare 0.7.5 py_1003 conda-forge pillow 8.0.1 py38h9776b28_0 conda-forge pip 20.2.4 py_0 conda-forge pixman 0.38.0 h516909a_1003 conda-forge pkg-config 0.29.2 h516909a_1008 conda-forge pkginfo 1.6.1 pyh9f0ad1d_0 conda-forge pluggy 0.13.1 py38h924ce5b_3 conda-forge poppler 0.87.0 h4190859_1 conda-forge poppler-data 0.4.10 0 conda-forge postgresql 12.3 h8573dbc_2 conda-forge proj 7.0.0 h966b41f_5 conda-forge prometheus_client 0.8.0 pyh9f0ad1d_0 conda-forge prompt-toolkit 3.0.8 py_0 conda-forge protobuf 3.13.0.1 py38h950e882_1 conda-forge psutil 5.7.3 py38h8df0ef7_0 conda-forge pthread-stubs 0.4 h14c3975_1001 conda-forge ptyprocess 0.6.0 py_1001 conda-forge py 1.9.0 pyh9f0ad1d_0 conda-forge py-cpuinfo 7.0.0 pyh9f0ad1d_0 conda-forge py-lief 0.10.1 py38h348cfbe_2 conda-forge pyarrow 1.0.1 py38h31aad1c_14_cuda conda-forge pyasn1 0.4.8 py_0 conda-forge pyasn1-modules 0.2.7 py_0 conda-forge pycodestyle 2.6.0 pyh9f0ad1d_0 conda-forge pycosat 0.6.3 py38h8df0ef7_1005 conda-forge pycparser 2.20 pyh9f0ad1d_2 conda-forge pyct 0.4.6 py_0 conda-forge pyct-core 0.4.6 py_0 conda-forge pydeck 0.5.0 pyh9f0ad1d_0 conda-forge pyee 7.0.4 pyh9f0ad1d_0 conda-forge pyflakes 2.2.0 pyh9f0ad1d_0 conda-forge pygments 2.7.2 py_0 conda-forge pyjwt 1.7.1 py_0 conda-forge pynvml 8.0.4 py_1 conda-forge pyopenssl 19.1.0 py_1 conda-forge pyparsing 2.4.7 pyh9f0ad1d_0 conda-forge pyppeteer 0.2.2 py_1 conda-forge pyproj 2.6.1.post1 py38h7521cb9_0 conda-forge pyrsistent 0.17.3 py38h1e0a361_1 conda-forge pysocks 1.7.1 py38h924ce5b_2 conda-forge pytables 3.6.1 py38hf9f05d5_3 conda-forge pytest 6.1.2 py38h578d9bd_0 conda-forge pytest-asyncio 0.12.0 py38h32f6830_2 conda-forge pytest-benchmark 3.2.3 pyh9f0ad1d_0 conda-forge pytest-cov 2.10.1 pyh9f0ad1d_0 conda-forge pytest-timeout 1.4.2 pyh9f0ad1d_0 conda-forge python 3.8.6 h852b56e_0_cpython conda-forge python-confluent-kafka 1.5.0 py38h1e0a361_0 conda-forge python-dateutil 2.8.1 py_0 conda-forge python-jose 3.1.0 pyh9f0ad1d_0 conda-forge python-libarchive-c 2.9 py38h924ce5b_2 conda-forge python-louvain 0.13 py_0 conda-forge python_abi 3.8 1_cp38 conda-forge pytz 2020.1 pyh9f0ad1d_0 conda-forge pyviz_comms 0.7.6 pyh9f0ad1d_0 conda-forge pyyaml 5.3.1 py38h8df0ef7_1 conda-forge pyzmq 19.0.2 py38ha71036d_2 conda-forge rapidjson 1.1.0 he1b5a44_1002 conda-forge rapids-build-env 0.17.0a201029 cuda10.2_py38_ge49a2fa_92 rapidsai-nightly rapids-doc-env 0.17.0a201029 py38_ge49a2fa_92 rapidsai-nightly rapids-notebook-env 0.17.0a201029 cuda10.2_py38_ge49a2fa_92 rapidsai-nightly re2 2020.10.01 he1b5a44_0 conda-forge readline 8.0 he28a2e2_2 conda-forge readme_renderer 27.0 pyh9f0ad1d_0 conda-forge recommonmark 0.6.0 py_0 conda-forge regex 2020.10.28 py38h25fe258_0 conda-forge requests 2.24.0 pyh9f0ad1d_0 conda-forge requests-oauthlib 1.3.0 pyh9f0ad1d_0 conda-forge requests-toolbelt 0.9.1 py_0 conda-forge responses 0.12.0 pyh9f0ad1d_0 conda-forge rfc3986 1.4.0 pyh9f0ad1d_0 conda-forge rhash 1.3.6 h516909a_1001 conda-forge ripgrep 12.1.1 h516909a_1 conda-forge rmm 0.17.0a201022 cuda_10.2_py38_g210c863_36 rapidsai-nightly rpy2 3.3.6 pypi_0 pypi rsa 4.6 pyh9f0ad1d_0 conda-forge rtree 0.9.4 py38h08f867b_1 conda-forge ruamel_yaml 0.15.80 py38h8df0ef7_1003 conda-forge s3fs 0.4.2 py_0 conda-forge s3transfer 0.3.3 py_3 conda-forge scikit-learn 0.23.1 py38h3a94b23_0 conda-forge scipy 1.5.1 py38h18bccfc_0 conda-forge seaborn 0.11.0 0 conda-forge seaborn-base 0.11.0 py_0 conda-forge secretstorage 3.1.2 py38h32f6830_2 conda-forge send2trash 1.5.0 py_0 conda-forge setuptools 49.6.0 py38h924ce5b_2 conda-forge shapely 1.7.1 py38hc7361b7_1 conda-forge shellcheck 0.7.1 0 conda-forge simpervisor 0.3 py_1 conda-forge six 1.15.0 pyh9f0ad1d_0 conda-forge snappy 1.1.8 he1b5a44_3 conda-forge snowballstemmer 2.0.0 py_0 conda-forge sortedcontainers 2.2.2 pyh9f0ad1d_0 conda-forge soupsieve 2.0.1 py_1 conda-forge spdlog 1.7.0 hc9558a2_2 conda-forge sphinx 3.2.1 py_0 conda-forge sphinx-copybutton 0.3.0 pyh9f0ad1d_0 conda-forge sphinx-markdown-tables 0.0.15 pypi_0 pypi sphinx_rtd_theme 0.5.0 pyh9f0ad1d_0 conda-forge sphinxcontrib-applehelp 1.0.2 py_0 conda-forge sphinxcontrib-devhelp 1.0.2 py_0 conda-forge sphinxcontrib-htmlhelp 1.0.3 py_0 conda-forge sphinxcontrib-jsmath 1.0.1 py_0 conda-forge sphinxcontrib-qthelp 1.0.3 py_0 conda-forge sphinxcontrib-serializinghtml 1.1.4 py_0 conda-forge sphinxcontrib-websupport 1.2.4 pyh9f0ad1d_0 conda-forge sqlite 3.33.0 h4cf870e_1 conda-forge sshpubkeys 3.1.0 py_0 conda-forge statsmodels 0.12.1 py38h0b5ebd8_0 conda-forge streamz 0.6.0 pyh9f0ad1d_0 conda-forge tbb 2020.2 hc9558a2_0 conda-forge tblib 1.6.0 py_0 conda-forge terminado 0.9.1 py38h32f6830_1 conda-forge testpath 0.4.4 py_0 conda-forge threadpoolctl 2.1.0 pyh5ca1d4c_0 conda-forge tiledb 1.7.7 h8efa9f0_3 conda-forge tk 8.6.10 hed695b0_1 conda-forge toml 0.10.1 pyh9f0ad1d_0 conda-forge toolz 0.11.1 py_0 conda-forge tornado 6.0.4 py38h1e0a361_2 conda-forge tqdm 4.51.0 pyh9f0ad1d_0 conda-forge traitlets 5.0.5 py_0 conda-forge treelite 0.93 py38h950e882_2 conda-forge treelite-runtime 0.93 pypi_0 pypi twine 3.2.0 py38h32f6830_1 conda-forge typed-ast 1.4.1 py38h516909a_0 conda-forge typing-extensions 3.7.4.3 0 conda-forge typing_extensions 3.7.4.3 py_0 conda-forge tzcode 2020a h516909a_0 conda-forge tzlocal 2.1 pypi_0 pypi ucx 1.8.1+g6b29558 cuda10.2_0 rapidsai ucx-proc 1.0.0 gpu rapidsai ucx-py 0.17.0a201029 py38_g6b29558_15 rapidsai-nightly umap-learn 0.4.6 py38h32f6830_0 conda-forge urllib3 1.25.11 py_0 conda-forge wcwidth 0.2.5 pyh9f0ad1d_2 conda-forge webencodings 0.5.1 py_1 conda-forge websocket-client 0.57.0 py38h32f6830_3 conda-forge websockets 8.1 py38h1e0a361_2 conda-forge werkzeug 1.0.1 pyh9f0ad1d_0 conda-forge wheel 0.35.1 pyh9f0ad1d_0 conda-forge widgetsnbextension 3.5.1 py38h32f6830_4 conda-forge wrapt 1.12.1 py38h1e0a361_1 conda-forge xarray 0.16.1 py_0 conda-forge xerces-c 3.2.2 h8412b87_1004 conda-forge xmltodict 0.12.0 py_0 conda-forge xorg-kbproto 1.0.7 h14c3975_1002 conda-forge xorg-libice 1.0.10 h516909a_0 conda-forge xorg-libsm 1.2.3 h84519dc_1000 conda-forge xorg-libx11 1.6.12 h516909a_0 conda-forge xorg-libxau 1.0.9 h14c3975_0 conda-forge xorg-libxdmcp 1.1.3 h516909a_0 conda-forge xorg-libxext 1.3.4 h516909a_0 conda-forge xorg-libxpm 3.5.13 h516909a_0 conda-forge xorg-libxrender 0.9.10 h516909a_1002 conda-forge xorg-libxt 1.1.5 h516909a_1003 conda-forge xorg-renderproto 0.11.1 h14c3975_1002 conda-forge xorg-xextproto 7.3.0 h14c3975_1002 conda-forge xorg-xproto 7.0.31 h14c3975_1007 conda-forge xz 5.2.5 h516909a_1 conda-forge yaml 0.2.5 h516909a_0 conda-forge yarl 1.6.2 py38h1e0a361_0 conda-forge zeromq 4.3.3 he1b5a44_2 conda-forge zict 2.0.0 py_0 conda-forge zipp 3.4.0 py_0 conda-forge zlib 1.2.11 h516909a_1010 conda-forge zstd 1.4.5 h6597ccf_2 conda-forge ```

hcho3 commented 3 years ago

Backtrace from GDB:

#0  __cxxabiv1::__cxa_throw (obj=0x5555b6bee7d0, tinfo=0x7ffee7df1ae0 <typeinfo for raft::cuda_error>, 
    dest=0x7ffed0f10a42 <raft::cuda_error::~cuda_error()>)
    at /home/conda/feedstock_root/build_artifacts/ctng-compilers_1601678720612/work/.build/x86_64-conda-linux-gnu/src/gcc/libstdc++-v3/libsupc++/eh_throw.cc:75
#1  0x00007ffed10defd8 in ML::DecisionTree::Builder<ML::DecisionTree::RegTraits<float, int> >::doSplit (this=0x7fffffff8c10, h_nodes=..., s=0x5555abf61780)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/batched-levelalgo/builder_base.cuh:346
#2  0x00007ffed10d026a in ML::DecisionTree::Builder<ML::DecisionTree::RegTraits<float, int> >::train (this=0x7fffffff8c10, h_nodes=..., 
    num_leaves=@0x5555b1960cb0: 0, depth=@0x5555b1960cac: 0, s=0x5555abf61780)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/batched-levelalgo/builder_base.cuh:261
#3  0x00007ffed108c96c in ML::DecisionTree::grow_tree<ML::DecisionTree::RegTraits<float, int>, float, float, int> (d_allocator=..., h_allocator=..., 
    data=0x7ffd1e000000, ncols=259, nrows=580669, labels=0x7ffd56400000, quantiles=0x7ffd85e10a00, rowids=0x7ffd85e00000, colids=0x7ffd85e14000, 
    n_sampled_rows=580, unique_labels=1, params=..., stream=0x5555abf61780, sparsetree=..., num_leaves=@0x5555b1960cb0: 0, depth=@0x5555b1960cac: 0)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/batched-levelalgo/builder.cuh:62
#4  0x00007ffed106a9bd in ML::DecisionTree::grow_tree<float, int> (d_allocator=..., h_allocator=..., data=0x7ffd1e000000, ncols=259, nrows=580669, 
    labels=0x7ffd56400000, quantiles=0x7ffd85e10a00, rowids=0x7ffd85e00000, colids=0x7ffd85e14000, n_sampled_rows=580, unique_labels=1, params=..., 
    stream=0x5555abf61780, sparsetree=..., num_leaves=@0x5555b1960cb0: 0, depth=@0x5555b1960cac: 0)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/batched-levelalgo/builder.cuh:124
#5  0x00007ffed10581d9 in ML::DecisionTree::DecisionTreeBase<float, float>::plant (this=0x5555b1960c98, sparsetree=..., data=0x7ffd1e000000, ncols=259, 
    nrows=580669, labels=0x7ffd56400000, rowids=0x7ffd85e00000, n_sampled_rows=580, unique_labels=1, treeid=0)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/decisiontree_impl.cuh:294
#6  0x00007ffed10587b3 in ML::DecisionTree::DecisionTreeBase<float, float>::base_fit (this=0x5555b1960c98, device_allocator_in=..., host_allocator_in=..., 
    stream_in=0x5555abf61780, data=0x7ffd1e000000, ncols=259, nrows=580669, labels=0x7ffd56400000, rowids=0x7ffd85e00000, n_sampled_rows=580, 
    unique_labels=1, sparsetree=..., treeid=0, is_classifier=false, in_tempmem=...)
    at /home/phcho/Desktop/cuml/cpp/src/decisiontree/decisiontree_impl.cuh:410
#7  0x00007ffed105fa54 in ML::DecisionTree::DecisionTreeRegressor<float>::fit (this=0x5555b1960c98, device_allocator_in=..., host_allocator_in=..., 
    stream_in=0x5555abf61780, data=0x7ffd1e000000, ncols=259, nrows=580669, labels=0x7ffd56400000, rowids=0x7ffd85e00000, n_sampled_rows=580, 
    tree=@0x7fffffffaa98: 0x5555a5f71328, tree_parameters=..., in_tempmem=...) at /home/phcho/Desktop/cuml/cpp/src/decisiontree/decisiontree_impl.cuh:472