Closed fhoering closed 2 months ago
@jsirois It really seems like a regression from this commit: https://github.com/pantsbuild/pex/pull/907
I don't repro either the warning line or the import error. I add steps after yours to build a pex file and then dump its metadata so we might compare how our repro attempts differ:
jsirois@gill ~ $ cd /tmp/
jsirois@gill /tmp $ python3.6 -m venv pex_venv
jsirois@gill /tmp $ . pex_venv/bin/activate
(pex_venv) jsirois@gill /tmp $ pip install -U pip
Collecting pip
Using cached https://files.pythonhosted.org/packages/54/0c/d01aa759fdc501a58f431eb594a17495f15b88da142ce14b5845662c13f3/pip-20.0.2-py2.py3-none-any.whl
Installing collected packages: pip
Found existing installation: pip 18.1
Uninstalling pip-18.1:
Successfully uninstalled pip-18.1
Successfully installed pip-20.0.2
(pex_venv) jsirois@gill /tmp $ pip install pex==2.1.5
Collecting pex==2.1.5
Using cached pex-2.1.5-py2.py3-none-any.whl (2.3 MB)
Installing collected packages: pex
Successfully installed pex-2.1.5
(pex_venv) jsirois@gill /tmp $ ./pex_venv/bin/pex --version
pex 2.1.5
(pex_venv) jsirois@gill /tmp $ ./pex_venv/bin/pex --disable-cache tensorflow==1.15.2
Python 3.6.10 (default, Jan 21 2020, 16:15:00)
[GCC 9.2.0] on linux
Type "help", "copyright", "credits" or "license" for more information.
(InteractiveConsole)
>>> import tensorflow
>>>
now exiting InteractiveConsole...
(pex_venv) jsirois@gill /tmp $ ./pex_venv/bin/pex --disable-cache tensorflow==1.15.2 -o tensorflow-1.15.2.pex
(pex_venv) jsirois@gill /tmp $ unzip -qc tensorflow-1.15.2.pex PEX-INFO | jq .
{
"always_write_cache": false,
"build_properties": {
"class": "CPython",
"pex_version": "2.1.5",
"platform": "manylinux2014_x86_64",
"version": [
3,
6,
10
]
},
"code_hash": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"distributions": {
"Keras_Applications-1.0.8-py3-none-any.whl": "4a3844e4584efc7b604070781fa3966ed0549f86",
"Keras_Preprocessing-1.1.0-py2.py3-none-any.whl": "eea4389be0653f3aecae4e54ff629125ff6ceed5",
"Markdown-3.2.1-py2.py3-none-any.whl": "ab0b41b14987db2bee5eeda9baa74bb9192441f7",
"Werkzeug-1.0.0-py2.py3-none-any.whl": "238bd42f9044920cc491bbb7cb50381a6d88586f",
"absl_py-0.9.0-py3-none-any.whl": "19667c3fa849673c37ae382d443b97476d8d8a6c",
"astor-0.8.1-py2.py3-none-any.whl": "3f429c60766092a8661b317c5cb803230090e3c7",
"gast-0.2.2-py3-none-any.whl": "8ba99b3a954b6dd7ce7e1da91bdfc8a41d7b4967",
"google_pasta-0.1.8-py3-none-any.whl": "bd064a5d602ba99126ed12a3d0de66fceea48475",
"grpcio-1.27.2-cp36-cp36m-manylinux2010_x86_64.whl": "5a9e5e93e0ebf1a9befcff9ab2ef36971522461d",
"h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl": "320d5956c47494bc6f494b4e30adc6b4d6d18694",
"numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl": "15ff2a2d34343ed4b1242311b5fbc317ec5efa23",
"opt_einsum-3.2.0-py3-none-any.whl": "6d3c81ba0c835ee4631ac110e31ac2da9630606a",
"protobuf-3.11.3-cp36-cp36m-manylinux1_x86_64.whl": "774a5bd659ad3ac5c490de4839ba732c91513d1e",
"setuptools-46.0.0-py3-none-any.whl": "d7bfe6c9ee7b731472566968212efdd2b6bab3b2",
"six-1.14.0-py2.py3-none-any.whl": "6416d6145f6717f2ccbcc7d921014816fdc436e6",
"tensorboard-1.15.0-py3-none-any.whl": "aefe5e1e595631de4072207ed606c51187f1dcb4",
"tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl": "736892256bd4973de0edfefa35244f64b4d9f2ec",
"tensorflow_estimator-1.15.1-py2.py3-none-any.whl": "54583ef68a86dc889da12feae07d0a987158a5de",
"termcolor-1.1.0-py3-none-any.whl": "00d8ecd400cc85801b7f3f37cba65adaed5c28ce",
"wheel-0.34.2-py2.py3-none-any.whl": "1c2e8f3d9c0b501efc21cd886bfb66f850d9e168",
"wrapt-1.12.1-cp36-cp36m-linux_x86_64.whl": "46c5419b2767a92a5ae2c551fb30984f88654c26"
},
"emit_warnings": true,
"ignore_errors": false,
"inherit_path": "false",
"interpreter_constraints": [],
"pex_path": null,
"requirements": [
"Keras-Applications==1.0.8",
"Keras-Preprocessing==1.1.0",
"Markdown==3.2.1",
"Werkzeug==1.0.0",
"absl-py==0.9.0",
"astor==0.8.1",
"gast==0.2.2",
"google-pasta==0.1.8",
"grpcio==1.27.2",
"h5py==2.10.0",
"numpy==1.18.1",
"opt-einsum==3.2.0",
"protobuf==3.11.3",
"setuptools==46.0.0",
"six==1.14.0",
"tensorboard==1.15.0",
"tensorflow-estimator==1.15.1",
"tensorflow==1.15.2",
"termcolor==1.1.0",
"wheel==0.34.2; python_version >= \"3\" and python_version >= \"3\"",
"wrapt==1.12.1"
],
"zip_safe": true
}
(pex_venv) jsirois@gill /tmp $
Only diff is the python version and the hash of tensorflow & wrapt is different. I'm on centos7:
{
"always_write_cache": false,
"build_properties": {
"class": "CPython",
"pex_version": "2.1.5",
"platform": "manylinux2014_x86_64",
"version": [
3,
6,
8
]
},
"code_hash": "da39a3ee5e6b4b0d3255bfef95601890afd80709",
"distributions": {
"Keras_Applications-1.0.8-py3-none-any.whl": "4a3844e4584efc7b604070781fa3966ed0549f86",
"Keras_Preprocessing-1.1.0-py2.py3-none-any.whl": "eea4389be0653f3aecae4e54ff629125ff6ceed5",
"Markdown-3.2.1-py2.py3-none-any.whl": "ab0b41b14987db2bee5eeda9baa74bb9192441f7",
"Werkzeug-1.0.0-py2.py3-none-any.whl": "238bd42f9044920cc491bbb7cb50381a6d88586f",
"absl_py-0.9.0-py3-none-any.whl": "19667c3fa849673c37ae382d443b97476d8d8a6c",
"astor-0.8.1-py2.py3-none-any.whl": "3f429c60766092a8661b317c5cb803230090e3c7",
"gast-0.2.2-py3-none-any.whl": "8ba99b3a954b6dd7ce7e1da91bdfc8a41d7b4967",
"google_pasta-0.1.8-py3-none-any.whl": "bd064a5d602ba99126ed12a3d0de66fceea48475",
"grpcio-1.27.2-cp36-cp36m-manylinux2010_x86_64.whl": "5a9e5e93e0ebf1a9befcff9ab2ef36971522461d",
"h5py-2.10.0-cp36-cp36m-manylinux1_x86_64.whl": "320d5956c47494bc6f494b4e30adc6b4d6d18694",
"numpy-1.18.1-cp36-cp36m-manylinux1_x86_64.whl": "15ff2a2d34343ed4b1242311b5fbc317ec5efa23",
"opt_einsum-3.2.0-py3-none-any.whl": "6d3c81ba0c835ee4631ac110e31ac2da9630606a",
"protobuf-3.11.3-cp36-cp36m-manylinux1_x86_64.whl": "774a5bd659ad3ac5c490de4839ba732c91513d1e",
"setuptools-46.0.0-py3-none-any.whl": "d7bfe6c9ee7b731472566968212efdd2b6bab3b2",
"six-1.14.0-py2.py3-none-any.whl": "6416d6145f6717f2ccbcc7d921014816fdc436e6",
"tensorboard-1.15.0-py3-none-any.whl": "aefe5e1e595631de4072207ed606c51187f1dcb4",
"tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl": "300ae2eb8c40e63f229cb5dfe2f51a4fea85dfa2",
"tensorflow_estimator-1.15.1-py2.py3-none-any.whl": "54583ef68a86dc889da12feae07d0a987158a5de",
"termcolor-1.1.0-py3-none-any.whl": "00d8ecd400cc85801b7f3f37cba65adaed5c28ce",
"wheel-0.34.2-py2.py3-none-any.whl": "1c2e8f3d9c0b501efc21cd886bfb66f850d9e168",
"wrapt-1.12.1-cp36-cp36m-linux_x86_64.whl": "057a34bf9dcfaabf13d97a82887f24d24fcfd7c3"
},
"emit_warnings": true,
"ignore_errors": false,
"inherit_path": "false",
"interpreter_constraints": [],
"pex_path": null,
"requirements": [
"Keras-Applications==1.0.8",
"Keras-Preprocessing==1.1.0",
"Markdown==3.2.1",
"Werkzeug==1.0.0",
"absl-py==0.9.0",
"astor==0.8.1",
"gast==0.2.2",
"google-pasta==0.1.8",
"grpcio==1.27.2",
"h5py==2.10.0",
"numpy==1.18.1",
"opt-einsum==3.2.0",
"protobuf==3.11.3",
"setuptools==46.0.0",
"six==1.14.0",
"tensorboard==1.15.0",
"tensorflow-estimator==1.15.1",
"tensorflow==1.15.2",
"termcolor==1.1.0",
"wheel==0.34.2; python_version >= \"3\" and python_version >= \"3\"",
"wrapt==1.12.1"
],
"zip_safe": true
}
When I compile Python 3.6.10 from scratch is seems to work. Not sure that this is the difference. What change from the changelog could impact this ? https://docs.python.org/3.6/whatsnew/changelog.html#python-3-6-10-final
I see no obvious change that would affect this either @fhoering. I'm not sure what to say. I suspect, under your python 3.6.8, pip install tensorflow==1.15.2
fails just like Pex does.
No it is not broken inside the virtual env, there tensorflow actually works. Also this happens on multiple machines for different users. I also get it inside a docker container. There must be something, it is too deterministic, maybe the Python installation of Centos7.
In my docker I use this:
yum install -y python3 \
python3-devel \
python3-pip
So I will do more tests like recompiling python 3.6.8 and keep you posted.
I think I actually could even provide you a dockerfile where this fails. Maybe even pulling centos7 and installing python as above is enough.
This looks like a bug fixed in the tensorflow 2 series here: https://github.com/tensorflow/tensorflow/commit/c40c5dfbd6f15108e41a268e81fdd6111720091f
In older versions, the variable would then be where purelib and platlib are defined to be for the python in question. On a stock CentOS7 I find:
$ cat <<EOF | docker build -t pex-issues-913 -
FROM centos:7
RUN yum -y install python3 python3-devel python3-pip
EOF
$ docker run --rm -it pex-issues-913 python3.6 -c 'import sysconfig; print("\n".join(p + ": " + sysconfig.get_paths()[p] for p in ("purelib", "platlib")))'
purelib: /usr/lib/python3.6/site-packages
platlib: /usr/lib64/python3.6/site-packages
On my arch linux box I find:
$ python3.6 -c 'import sysconfig; print("\n".join(p + ": " + sysconfig.get_paths()[p] for p in ("purelib", "platlib")))'
purelib: /usr/lib/python3.6/site-packages
platlib: /usr/lib/python3.6/site-packages
And on the canonical python 3.6 docker image (Debian Buster, custom build) I find:
$ docker run --rm -it python:3.6 python3.6 -c 'import sysconfig; print("\n".join(p + ": " + sysconfig.get_paths()[p] for p in ("purelib", "platlib")))'
purelib: /usr/local/lib/python3.6/site-packages
platlib: /usr/local/lib/python3.6/site-packages
Beyond noting the likely variable in success vs failure here, I have not yet determined exactly how this variable leads to the success / failure split, but maybe that pushes the ball along to help your investigations.
I observed a similar behavior
tensorflow==1.15.2
and pex==2.1.9
tensorflow==1.15.2
and pex==2.1.4
. However, pex==2.1.5
and 2.1.9
don't work (we can assume something broke @ 2.1.5
)For now, using pex==2.1.4
to be able to package tensorflow==1.1.5.2
Ok, circling back, this was fixed by 2.1.133. Alas, centos 7 is no more (at least the centos:7 image isn't, but I can use rockylinux:8 to get a facsimile:
:; cat <<EOF | docker build -t pex-issues-913 - FROM rockylinux:8
RUN yum -y install python3 python3-devel python3-pip
EOF
:; docker run --rm -it pex-issues-913 python3.6 -c 'import sysconfig; print("\n".join(p + ": " + sysconfig.get_paths()[p] for p in ("purelib", "platlib")))'
purelib: /usr/lib/python3.6/site-packages
platlib: /usr/lib64/python3.6/site-packages
So, same problematic split purelib / platlib that tripped Pex>2.1.4
up:
# Good in Pex 2.1.4:
:; docker run --rm -it pex-issues-913 python3.6 -c 'import sysconfig; print("\n".join(p + ": " + sysconfig.get_paths()[p] for p in ("purelib", "platlib")))'
purelib: /usr/lib/python3.6/site-packages
platlib: /usr/lib64/python3.6/site-packages
# Bad in Pex 2.1.5
:; docker run --rm -it pex-issues-913 bash -c 'python3.6 -mvenv pex.venv && source pex.venv/bin/activate && pip -q install pex==2.1.5 && pex --version && pex --disable-cache tensorflow==1.15.2 -- -c
"import tensorflow; print(tensorflow.__file__)"'
pex 2.1.5
WARNING: Target directory /tmp/tmpny0v8b4t/installed_wheels/94f9f1d978d9665ef102abf2e89004602c84f25f/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl.d6aa0a8a68a34b01a4e61c8ff709d6d2/tensorflow_core already exists. Specify --upgrade to force replacement.
Traceback (most recent call last):
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 393, in execute
exit_code = self._wrap_coverage(self._wrap_profiling, self._execute)
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 325, in _wrap_coverage
return runner(*args)
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 356, in _wrap_profiling
return runner(*args)
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 441, in _execute
return self.execute_interpreter()
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 479, in execute_interpreter
self.execute_content('-c <cmd>', content, argv0='-c')
File "/tmp/tmpllc_n8ca/.bootstrap/pex/pex.py", line 530, in execute_content
exec_function(ast, globals_map)
File "/tmp/tmpllc_n8ca/.bootstrap/pex/compatibility.py", line 77, in exec_function
exec(ast, globals_map, locals_map)
File "-c <cmd>", line 1, in <module>
File "/tmp/tmpllc_n8ca/.deps/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl/tensorflow/__init__.py", line 102, in <module>
from tensorflow.python.util import deprecation_wrapper as _deprecation
File "/tmp/tmpllc_n8ca/.deps/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl/tensorflow/__init__.py", line 50, in __getattr__
module = self._load()
File "/tmp/tmpllc_n8ca/.deps/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl/tensorflow/__init__.py", line 44, in _load
module = _importlib.import_module(self.__name__)
File "/usr/lib64/python3.6/importlib/__init__.py", line 126, in import_module
return _bootstrap._gcd_import(name[level:], package, level)
ModuleNotFoundError: No module named 'tensorflow_core.python'
Things are still bad (but differently) in Pex 2.1.132 but are fixed in Pex 2.1.133:
# Bad, differently:
:; docker run --rm -it pex-issues-913 bash -c 'python3.6 -mvenv pex.venv && source pex.venv/bin/activate && pip -q install pex==2.1.132 && pex --version && pex --disable-cache tensorflow==1.15.2 -- -
c "import tensorflow; print(tensorflow.__file__)"'
2.1.132
pid 36 -> /tmp/tmpw32mmgd9/venvs/9089b13598b90e0f8047ceb38dcae8c26b52ec97/2e582d52f642776ae1b90ff40fa6891e2f01b326/bin/python -sE /tmp/tmpw32mmgd9/venvs/9089b13598b90e0f8047ceb38dcae8c26b52ec97/2e582d52f642776ae1b90ff40fa6891e2f01b326/pex --disable-pip-version-check --no-python-version-warning --exists-action a --no-input --use-deprecated legacy-resolver --isolated -q --cache-dir /tmp/tmpw32mmgd9/pip_cache download --dest /tmp/tmpw32mmgd9/downloads/resolver_download.cejdcn2v/pex.venv.bin.python3.6 tensorflow==1.15.2 --index-url https://pypi.org/simple --retries 5 --timeout 15 exited with 1 and STDERR:
Re-execing from /tmp/tmpw32mmgd9/venvs/9089b13598b90e0f8047ceb38dcae8c26b52ec97/2e582d52f642776ae1b90ff40fa6891e2f01b326/bin/python
Traceback (most recent call last):
File "/tmp/tmpw32mmgd9/venvs/9089b13598b90e0f8047ceb38dcae8c26b52ec97/2e582d52f642776ae1b90ff40fa6891e2f01b326/pex", line 50, in <module>
os.execv(python, argv)
FileNotFoundError: [Errno 2] No such file or directory
# Good again:
:; docker run --rm -it pex-issues-913 bash -c 'python3.6 -mvenv pex.venv && source pex.venv/bin/activate && pip -q install pex==2.1.133 && pex --version && pex --disable-cache tensorflow==1.15.2 -- -
c "import tensorflow; print(tensorflow.__file__)"'
2.1.133
/tmp/tmpmphq3gyr/installed_wheels/34551b8cc446342dac8fdcf79e775e0841aeea38e927b73e2ecf4b2d01277264/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl/tensorflow/__init__.py
And things remain good to this day:
:; docker run --rm -it pex-issues-913 bash -c 'python3.6 -mvenv pex.venv && source pex.venv/bin/activate && pip -q install pex && pex --disable-cache tensorflow==1.15.2 -- -c "import tensorflow; prin
t(tensorflow.__file__)"'
/tmp/tmp50v576j2/installed_wheels/34551b8cc446342dac8fdcf79e775e0841aeea38e927b73e2ecf4b2d01277264/tensorflow-1.15.2-cp36-cp36m-manylinux2010_x86_64.whl/tensorflow/__init__.py
It looks like the fix came in #1867.
We have an error with tensorflow now that seems to be linked to the latest pex release 2.1.5. Same protocol works with pex==2.1.4
Note that there is also a new warning that is displayed (my /tmp is all empty before an after the run)