Open prihoda opened 4 years ago
I'll try to get to this when I can, but segfaults inside of numba can be very difficult to track down well.
Thanks, let me know if I can help
I also get a segfault on 0.4.2. My conda env:
channels:
- plotly
- comet_ml
- conda-forge
- defaults
dependencies:
- _libgcc_mutex=0.1=conda_forge
- _openmp_mutex=4.5=1_llvm
- argtable2=2.13=h14c3975_1001
- attrs=19.3.0=py_0
- aubio=0.4.9=py36h866369f_1
- audioread=2.1.8=py36h9f0ad1d_2
- backcall=0.1.0=py_0
- backoff=1.8.0=py_0
- bleach=3.1.3=pyh8c360ce_0
- blosc=1.17.1=he1b5a44_0
- bokeh=2.0.0=py36h9f0ad1d_0
- boto3=1.12.14=py_0
- botocore=1.15.26=pyh9f0ad1d_0
- bzip2=1.0.8=h516909a_2
- ca-certificates=2020.4.5.1=hecc5488_0
- cachetools=3.1.1=py_0
- certifi=2020.4.5.1=py36h9f0ad1d_0
- cffi=1.14.0=py36hd463f26_0
- chardet=3.0.4=py36h9f0ad1d_1006
- chart-studio=1.1.0=py_0
- click=7.0=py_0
- cloudpickle=1.3.0=py_0
- colorcet=2.0.1=py_0
- cryptography=2.8=py36h45558ae_2
- cycler=0.10.0=py_2
- cytoolz=0.10.1=py36h516909a_0
- dask=2.12.0=py_0
- dask-core=2.12.0=py_0
- datashader=0.10.0=py_0
- datashape=0.5.4=py_1
- dbus=1.13.6=he372182_0
- decorator=4.4.2=py_0
- defusedxml=0.6.0=py_0
- distributed=2.12.0=py36_0
- docutils=0.15.2=py36_0
- eigen=3.3.7=hc9558a2_1001
- entrypoints=0.3=py36h9f0ad1d_1001
- expat=2.2.9=he1b5a44_2
- ffmpeg=4.1.3=h167e202_0
- fftw=3.3.8=nompi_h7f3a6c3_1110
- fontconfig=2.13.1=h86ecdb6_1001
- freetype=2.10.1=he06d7ca_0
- fsspec=0.6.3=py_0
- gettext=0.19.8.1=hc5be6a0_1002
- glib=2.58.3=py36hd3ed26a_1003
- gmp=6.2.0=he1b5a44_2
- gnutls=3.6.5=hd3a4fd2_1002
- gst-plugins-base=1.14.5=h0935bb2_2
- gstreamer=1.14.5=h36ae1b5_2
- hdf5=1.10.2=hc401514_3
- heapdict=1.0.1=py_0
- icu=64.2=he1b5a44_1
- idna=2.9=py_1
- imageio=2.8.0=py_0
- importlib-metadata=1.5.0=py36h9f0ad1d_1
- importlib_metadata=1.5.0=1
- ipykernel=5.1.4=py36h5ca1d4c_0
- ipython=7.13.0=py36h9f0ad1d_2
- ipython_genutils=0.2.0=py_1
- ipywidgets=7.5.1=py_0
- jedi=0.16.0=py36h9f0ad1d_1
- jinja2=2.11.1=py_0
- jmespath=0.9.5=py_0
- joblib=0.14.1=py_0
- jpeg=9c=h14c3975_1001
- json5=0.9.0=py_0
- jsonschema=3.2.0=py36h9f0ad1d_1
- jupyter_client=6.1.0=py_0
- jupyter_core=4.6.3=py36h9f0ad1d_1
- jupyterlab=2.0.1=py_0
- jupyterlab_server=1.0.7=py_0
- kiwisolver=1.1.0=py36hdb11119_1
- lame=3.100=h14c3975_1001
- lapack=3.6.1=ha44fe06_2
- ld_impl_linux-64=2.34=h53a641e_0
- libblas=3.8.0=16_openblas
- libcblas=3.8.0=16_openblas
- libclang=9.0.1=default_hde54327_0
- libffi=3.2.1=he1b5a44_1007
- libflac=1.3.1=he1b5a44_1002
- libgcc-ng=9.2.0=h24d8f2e_2
- libgfortran=3.0.0=1
- libgfortran-ng=7.3.0=hdf63c60_5
- libiconv=1.15=h516909a_1006
- liblapack=3.8.0=16_openblas
- libllvm8=8.0.1=hc9558a2_0
- libllvm9=9.0.1=hc9558a2_0
- libogg=1.3.2=h516909a_1002
- libopenblas=0.3.9=h5ec1e0e_0
- libpng=1.6.37=hed695b0_1
- librosa=0.7.2=py_0
- libsndfile=1.0.28=he1b5a44_1000
- libsodium=1.0.17=h516909a_0
- libstdcxx-ng=9.2.0=hdf63c60_2
- libtiff=4.1.0=hc7e4089_6
- libuuid=2.32.1=h14c3975_1000
- libvorbis=1.3.5=h516909a_1002
- libwebp-base=1.1.0=h516909a_3
- libxcb=1.13=h14c3975_1002
- libxkbcommon=0.10.0=he1b5a44_0
- libxml2=2.9.10=hee79883_0
- llvm-openmp=9.0.1=hc9558a2_2
- llvmlite=0.31.0=py36hfa65bc7_1
- locket=0.2.0=py_2
- lz4-c=1.8.3=he1b5a44_1001
- lzo=2.10=h14c3975_1000
- markupsafe=1.1.1=py36h8c4c3a4_1
- matplotlib=3.2.0=1
- matplotlib-base=3.2.0=py36h250f245_1
- mistune=0.8.4=py36h516909a_1000
- more-itertools=8.2.0=py_0
- mpg123=1.25.8=hf484d3e_1000
- msgpack-python=1.0.0=py36hdb11119_1
- multipledispatch=0.6.0=py_0
- nbconvert=5.6.1=py36_0
- nbformat=5.0.4=py_0
- ncurses=6.1=hf484d3e_1002
- nettle=3.4.1=h1bed415_1002
- networkx=2.4=py_1
- notebook=6.0.3=py36_0
- nspr=4.25=he1b5a44_0
- nss=3.47=he751ad9_0
- numba=0.48.0=py36hb3f55d8_0
- numexpr=2.7.1=py36hb3f55d8_0
- numpy=1.18.1=py36h95a1406_0
- olefile=0.46=py_0
- openh264=1.8.0=hdbcaa40_1000
- openssl=1.1.1g=h516909a_0
- packaging=20.1=py_0
- pandas=1.0.1=py36hb3f55d8_0
- pandoc=2.9.2=0
- pandocfilters=1.4.2=py_1
- param=1.9.3=py_0
- parso=0.6.2=py_0
- partd=1.1.0=py_0
- pcre=8.44=he1b5a44_0
- pexpect=4.8.0=py36h9f0ad1d_1
- pickleshare=0.7.5=py36h9f0ad1d_1001
- pillow=7.0.0=py36h8328e55_1
- pip=20.0.2=py_2
- plotly=4.7.0=py_0
- pluggy=0.12.0=py_0
- prometheus_client=0.7.1=py_0
- prompt-toolkit=3.0.4=py_0
- psutil=5.7.0=py36h8c4c3a4_1
- pthread-stubs=0.4=h14c3975_1001
- ptyprocess=0.6.0=py_1001
- py=1.8.1=py_0
- pycparser=2.20=py_0
- pyct=0.4.6=py_0
- pyct-core=0.4.6=py_0
- pygments=2.6.1=py_0
- pyopenssl=19.1.0=py_1
- pyparsing=2.4.6=py_0
- pyqt=5.12.3=py36hcca6a23_1
- pyrsistent=0.15.7=py36h8c4c3a4_1
- pysocks=1.7.1=py36h9f0ad1d_1
- pysoundfile=0.10.2=py_1001
- pytables=3.4.4=py36h4f72b40_1
- pytest=5.4.1=py36h9f0ad1d_0
- python=3.6.10=h9d8adfe_1009_cpython
- python-dateutil=2.8.1=py_0
- python-dotenv=0.12.0=py_0
- python_abi=3.6=1_cp36m
- pytz=2019.3=py_0
- pywavelets=1.1.1=py36hc1659b7_0
- pyyaml=5.3.1=py36h8c4c3a4_0
- pyzmq=19.0.0=py36h9947dbf_1
- qt=5.12.5=hd8c4c69_1
- readline=8.0=hf8c457e_0
- requests=2.23.0=pyh8c360ce_2
- resampy=0.2.2=py_0
- retrying=1.3.3=py_2
- s3transfer=0.3.3=py36_0
- scikit-image=0.16.2=py36hb3f55d8_0
- scikit-learn=0.22.1=py36hcdab131_1
- scipy=1.4.1=py36h921218d_0
- send2trash=1.5.0=py_0
- setuptools=46.0.0=py36h9f0ad1d_2
- six=1.14.0=py_1
- sortedcontainers=2.1.0=py_0
- sqlite=3.30.1=hcee41ef_0
- tbb=2020.1=hc9558a2_0
- tblib=1.6.0=py_0
- terminado=0.8.3=py36h9f0ad1d_1
- testpath=0.3.1=py36_1
- tk=8.6.10=hed695b0_0
- toolz=0.10.0=py_0
- tornado=6.0.4=py36h8c4c3a4_1
- tqdm=4.43.0=py_0
- traitlets=4.3.3=py36h9f0ad1d_1
- typing_extensions=3.7.4.1=py36h9f0ad1d_1
- urllib3=1.25.7=py36h9f0ad1d_1
- wcwidth=0.1.8=py_0
- webencodings=0.5.1=py_1
- wheel=0.34.2=py_1
- widgetsnbextension=3.5.1=py36_0
- x264=1!152.20180806=h14c3975_0
- xarray=0.15.0=py_0
- xorg-libxau=1.0.9=h14c3975_0
- xorg-libxdmcp=1.1.3=h516909a_0
- xz=5.2.4=h516909a_1002
- yaafe=0.70=py36h0bee7d0_1
- yaml=0.2.2=h516909a_1
- zeromq=4.3.2=he1b5a44_2
- zict=2.0.0=py_0
- zipp=3.1.0=py_0
- zlib=1.2.11=h516909a_1006
- zstd=1.4.4=h3b9ef0a_2
- pip:
- pyqt5-sip==4.19.18
- pyqtwebengine==5.12.1
After switching to 0.3.10 (and not changing anything else) the problem is gone.
Sorry to revive a potentially dead thread, but this issue seems to be rearing its head again. I'm getting a segfault in this exact same spot as well. It started when I began trying to play around with the numba threading layers (setting it tbb
) in order to use UMAP with ProcessPoolExecutor. It happened very suddenly, and now consistently happens whenever I try to run UMAP inside a script, regardless of threading layer or if it is running inside a process pool.
The weird thing is that the seg fault does not occur if I just run UMAP inside of a python terminal, it only occurs when I run it via command line through a script.
The error looks like this:
08/07/2021 08:38:28 AM INFO: Finding disconnections...
Fatal Python error: Segmentation fault
Current thread 0x00007fd5f4381700 (most recent call first):
File "/home/n10853499/.conda/envs/rosella-dev/lib/python3.8/site-packages/umap/umap_.py", line 313 in nearest_neighbors
File "/home/n10853499/.conda/envs/rosella-dev/lib/python3.8/site-packages/umap/umap_.py", line 557 in fuzzy_simplicial_set
File "/home/n10853499/.conda/envs/rosella-dev/lib/python3.8/site-packa^CSegmentation fault (core dumped)
and my conda environment looks like this:
# packages in environment at /home/n10853499/.conda/envs/rosella-dev:
#
# Name Version Build Channel
_libgcc_mutex 0.1 conda_forge conda-forge
_openmp_mutex 4.5 1_gnu conda-forge
attrs 21.2.0 pyhd8ed1ab_0 conda-forge
backcall 0.2.0 pyh9f0ad1d_0 conda-forge
backports 1.0 py_2 conda-forge
backports.functools_lru_cache 1.6.4 pyhd8ed1ab_0 conda-forge
biopython 1.79 py38h497a2fe_0 conda-forge
blis 0.8.1 h7f98852_1 conda-forge
brotlipy 0.7.0 py38h497a2fe_1001 conda-forge
bwa 0.7.17 h5bf99c6_8 bioconda
bzip2 1.0.8 h7f98852_4 conda-forge
ca-certificates 2021.5.30 ha878542_0 conda-forge
cachecontrol 0.12.6 py_0 conda-forge
certifi 2021.5.30 py38h578d9bd_0 conda-forge
cffi 1.14.4 py38ha312104_0 conda-forge
chardet 4.0.0 py38h578d9bd_1 conda-forge
charset-normalizer 2.0.0 pyhd8ed1ab_0 conda-forge
cryptography 3.4.7 py38ha5dfef3_0 conda-forge
curl 7.71.1 he644dc0_3 conda-forge
cycler 0.10.0 py_2 conda-forge
cython 0.29.24 py38h709712a_0 conda-forge
decorator 5.0.9 pyhd8ed1ab_0 conda-forge
flight-genome 1.2.1 pyh5e36f6f_0 bioconda
freetype 2.10.4 h0708190_1 conda-forge
gsl 2.6 he838d99_2 conda-forge
hdbscan 0.8.27 py38h5c078b8_0 conda-forge
hdmedians 0.14.2 py38hb5d20a5_0 conda-forge
htslib 1.9 h4da6232_3 bioconda
idna 3.1 pyhd3deb0d_0 conda-forge
imageio 2.9.0 py_0 conda-forge
iniconfig 1.1.1 pyh9f0ad1d_0 conda-forge
ipython 7.26.0 py38he5a9106_0 conda-forge
ipython_genutils 0.2.0 py_1 conda-forge
jedi 0.18.0 py38h578d9bd_2 conda-forge
joblib 0.17.0 py_0 conda-forge
jpeg 9d h36c2ea0_0 conda-forge
k8 0.2.5 h9a82719_1 bioconda
kiwisolver 1.3.1 py38h1fd1430_1 conda-forge
krb5 1.17.2 h926e7f8_0 conda-forge
lcms2 2.12 hddcbb42_0 conda-forge
ld_impl_linux-64 2.36.1 hea4e1c9_2 conda-forge
libcblas 3.9.0 10_openblas conda-forge [38/1790]
libcurl 7.71.1 hcdd3856_3 conda-forge
libdeflate 1.6 h516909a_0 conda-forge
libedit 3.1.20191231 h46ee950_2 conda-forge
libffi 3.2.1 he1b5a44_1007 conda-forge
libgcc-ng 11.1.0 hc902ee8_8 conda-forge
libgfortran-ng 11.1.0 h69a702a_8 conda-forge
libgfortran5 11.1.0 h6c583b3_8 conda-forge
libgomp 11.1.0 hc902ee8_8 conda-forge
liblapack 3.9.0 10_openblas conda-forge
libllvm10 10.0.1 he513fc3_3 conda-forge
libopenblas 0.3.17 pthreads_h8fe5266_1 conda-forge
libpng 1.6.37 h21135ba_2 conda-forge
libssh2 1.9.0 ha56f1ee_6 conda-forge
libstdcxx-ng 11.1.0 h56837e0_8 conda-forge
libtiff 4.3.0 hf544144_0 conda-forge
libwebp-base 1.2.0 h7f98852_2 conda-forge
llvmlite 0.36.0 py38h4630a5e_0 conda-forge
lockfile 0.12.2 py_1 conda-forge
lz4-c 1.9.3 h9c3ff4c_1 conda-forge
matplotlib-base 3.4.2 py38hcc49a3a_0 conda-forge
matplotlib-inline 0.1.2 pyhd8ed1ab_2 conda-forge
minimap2 2.21 h5bf99c6_0 bioconda
more-itertools 8.8.0 pyhd8ed1ab_0 conda-forge
msgpack-python 1.0.2 py38h1fd1430_1 conda-forge
natsort 7.1.1 pyhd8ed1ab_0 conda-forge
ncurses 6.1 hf484d3e_1002 conda-forge
numba 0.53.1 py38h8b71fd7_1 conda-forge
numpy 1.21.1 py38h9894fe3_0 conda-forge
olefile 0.46 pyh9f0ad1d_1 conda-forge
openblas 0.3.17 pthreads_h4748800_1 conda-forge
openjpeg 2.4.0 hb52868f_1 conda-forge
openssl 1.1.1k h7f98852_0 conda-forge
packaging 21.0 pyhd8ed1ab_0 conda-forge
pandas 1.3.1 py38h1abd341_0 conda-forge
parallel 20160622 1 bioconda
parso 0.8.2 pyhd8ed1ab_0 conda-forge
patsy 0.5.1 py_0 conda-forge
perl 5.32.1 0_h7f98852_perl5 conda-forge
perl-threaded 5.26.0 0 bioconda
pexpect 4.8.0 pyh9f0ad1d_2 conda-forge
pickleshare 0.7.5 py_1003 conda-forge
pillow 8.3.1 py38h8e6f84c_0 conda-forge
pip 21.2.2 pyhd8ed1ab_0 conda-forge
pkg-config 0.29.2 h36c2ea0_1008 conda-forge
pluggy 0.13.1 py38h578d9bd_4 conda-forge
prompt-toolkit 3.0.19 pyha770c72_0 conda-forge
ptyprocess 0.7.0 pyhd3deb0d_0 conda-forge
py 1.10.0 pyhd3deb0d_0 conda-forge
pycparser 2.20 pyh9f0ad1d_2 conda-forge
pygments 2.9.0 pyhd8ed1ab_0 conda-forge
pynndescent 0.5.4 pyh6c4a22f_0 conda-forge
pyopenssl 20.0.1 pyhd8ed1ab_0 conda-forge
pyparsing 2.4.7 pyh9f0ad1d_0 conda-forge
pysam 0.16.0.1 py38hbdc2ae9_1 bioconda
pysocks 1.7.1 py38h578d9bd_3 conda-forge
pytest 6.2.4 py38h578d9bd_0 conda-forge
python 3.8.5 h4d41432_2_cpython conda-forge
python-dateutil 2.8.2 pyhd8ed1ab_0 conda-forge
python_abi 3.8 2_cp38 conda-forge
pytz 2021.1 pyhd8ed1ab_0 conda-forge
readline 8.0 h46ee950_1 conda-forge
requests 2.26.0 pyhd8ed1ab_0 conda-forge
rosella 0.3.3 h443a992_0 bioconda
samtools 1.9 h10a08f8_12 bioconda
scikit-bio 0.5.6 py38h0b5ebd8_4 conda-forge
scikit-learn 0.24.2 py38hdc147b9_0 conda-forge
scipy 1.7.1 py38h56a6a73_0 conda-forge
seaborn 0.11.1 hd8ed1ab_1 conda-forge
seaborn-base 0.11.1 pyhd8ed1ab_1 conda-forge
setuptools 49.6.0 py38h578d9bd_3 conda-forge
six 1.16.0 pyh6c4a22f_0 conda-forge
sqlite 3.32.3 hcee41ef_1 conda-forge
starcode 1.4 h779adbc_1 bioconda
statsmodels 0.12.2 py38h5c078b8_0 conda-forge
tbb 2020.2 h4bd325d_4 conda-forge
threadpoolctl 2.2.0 pyh8a188c0_0 conda-forge
tk 8.6.10 h21135ba_1 conda-forge
toml 0.10.2 pyhd8ed1ab_0 conda-forge
tornado 6.1 py38h497a2fe_1 conda-forge
traitlets 5.0.5 py_0 conda-forge
umap-learn 0.5.1 py38h578d9bd_1 conda-forge
urllib3 1.26.6 pyhd8ed1ab_0 conda-forge
vt 2015.11.10 he941832_3 bioconda
wcwidth 0.2.5 pyh9f0ad1d_2 conda-forge
wheel 0.36.2 pyhd3deb0d_0 conda-forge
xz 5.2.5 h516909a_1 conda-forge
zlib 1.2.11 h516909a_1010 conda-forge
zstd 1.5.0 ha95c52a_0 conda-forge
The following changes seem to have partially fixed my issues. Numba parallelism seems to break compute_membership_strengths
and fast_knn_indices
for whatever reason:
diff --git a/umap/umap_.py b/umap/umap_.py
index 0ebb8f3..824a97d 100644
--- a/umap/umap_.py
+++ b/umap/umap_.py
@@ -352,7 +352,7 @@ def nearest_neighbors(
"rhos": numba.types.float32[::1],
"val": numba.types.float32,
},
- parallel=True,
+ parallel=False,
fastmath=True,
)
def compute_membership_strengths(
diff --git a/umap/utils.py b/umap/utils.py
index 5eb7ddd..d6d3601 100644
--- a/umap/utils.py
+++ b/umap/utils.py
@@ -11,7 +11,7 @@ from sklearn.utils.validation import check_is_fitted
import scipy.sparse
-@numba.njit(parallel=True)
+@numba.njit(parallel=False)
def fast_knn_indices(X, n_neighbors):
"""A fast computation of knn indices.
The problem is that this seems to be significantly slower than it was previously which makes sense. Additionally, a second segfault begins to occur on a different set of data at a rather random point in the pynndescent
module:
Python error: Segmentation fault
Thread 0x00007f3d61e58700 (most recent call first):
File "/home/n10853499/.conda/envs/rosella-dev/lib/python3.8/site-packages/pynndescent/pynndescent_.py", line 876 in __init__
Which is a call to this function:
self._neighbor_graph = nn_descent(
self._raw_data,
self.n_neighbors,
self.rng_state,
effective_max_candidates,
self._distance_func,
self.n_iters,
self.delta,
low_memory=self.low_memory,
rp_tree_init=True,
init_graph=_init_graph,
leaf_array=leaf_array,
verbose=verbose,
)
mor specifically, line 876 is where self.n_neighbors
is used. I'm really not sure what is going on here, these errors are occuring in a fresh conda environment so I'm kind of at a loss.
Downgrading numba does not fix this issue. Downgrading pynndescent doesn't fix this issue either.
I am getting a
Segmentation fault (core dumped)
onfit
with any input data on linux.UMAP version:
0.4.2
(happens with0.4.1
as well)OS:
centos-release-7-6.1810.2.el7.centos.x86_64
I also tried running using Binder on ubuntu but there it works all OK.
Traceback with
python -q -X faulthandler
:I added print statements before the
smooth_knn_dist
call and got:So the error happens somewhere in the
smooth_knn_dist
function.Output of
pip freeze
:Output of
conda env export
: