aboutcode-org / scancode-plugins

A set of plugins either delivered as builtin scancode-toolkit or extra plugins
2 stars 8 forks source link

Interferes with python-magic #30

Open progval opened 1 year ago

progval commented 1 year ago

Hi,

I'd like to use both python-magic and scancode in the same process; but importing scancode first prevents python-magic from finding its files:

strace -e file python3 -c "import scancode.api; import magic; magic.Magic(mime=True, mime_encoding=True)"

shows that python-magic looks for magic.mgc in /home/linuxbrew/.linuxbrew/Cellar/, which doesn't exist:

openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/loader.py", {st_mode=S_IFREG|0644, st_size=1168, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/loader.py", {st_mode=S_IFREG|0644, st_size=1168, ...}) = 0
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__pycache__/loader.cpython-39.pyc", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/dev/null", O_RDWR|O_CLOEXEC) = 3
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=3321003, si_uid=85916, si_status=0, si_utime=0, si_stime=0} ---
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/compat.py", {st_mode=S_IFREG|0644, st_size=8316, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/compat.py", {st_mode=S_IFREG|0644, st_size=8316, ...}) = 0
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__pycache__/compat.cpython-39.pyc", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/dev/null", O_RDWR|O_CLOEXEC) = 3
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=3321015, si_uid=85916, si_status=0, si_utime=0, si_stime=0} ---
stat("/home/infres/ext-8972/.magic.mgc", 0x7ffc2287d510) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7ffc2287d510) = -1 ENOENT (No such file or directory)
access("/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic.mime.mgc", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", 0x7ffc2287d4f0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic.mgc", 0x7ffc2287d510) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7ffc2287d510) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", 0x7ffc2287d4f0) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic.mgc", 0x7ffc2287f990) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7ffc2287f990) = -1 ENOENT (No such file or directory)
access("/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic.mime.mgc", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", 0x7ffc2287f970) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/linuxbrew/.linuxbrew/Cellar/libmagic/5.39/share/misc/magic", O_RDONLY) = -1 ENOENT (No such file or directory)
Traceback (most recent call last):
  File "<string>", line 1, in <module>
openat(AT_FDCWD, "<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/python39.zip/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/python3.9/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/python3.9/lib-dynload/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/infres/ext-8972/swh-environment/swh-dataset/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/infres/ext-8972/swh-environment/swh-journal/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/home/infres/ext-8972/swh-environment/swh-graph/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/local/lib/python3.9/dist-packages/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/lib/python3/dist-packages/<string>", O_RDONLY|O_CLOEXEC) = -1 ENOENT (No such file or directory)
  File "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", line 73, in __init__
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", O_RDONLY|O_CLOEXEC) = 3
    magic_load(self.cookie, magic_file)
  File "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", line 332, in magic_load
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", O_RDONLY|O_CLOEXEC) = 3
    return _magic_load(cookie, coerce_filename(filename))
  File "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", line 225, in errorcheck_negative_one
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__init__.py", O_RDONLY|O_CLOEXEC) = 3
    raise MagicException(err)
magic.MagicException: b'could not find any valid magic files!'
+++ exited with 1 +++

while, without the scancode import, /usr/share/misc/magic.mime.mgc is found successfully:

strace -e file python3 -c "import magic; magic.Magic(mime=True, mime_encoding=True)"
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic", O_RDONLY|O_NONBLOCK|O_CLOEXEC|O_DIRECTORY) = 3
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/loader.py", {st_mode=S_IFREG|0644, st_size=1168, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/loader.py", {st_mode=S_IFREG|0644, st_size=1168, ...}) = 0
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__pycache__/loader.cpython-39.pyc", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/dev/null", O_RDWR|O_CLOEXEC) = 3
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=3361625, si_uid=85916, si_status=0, si_utime=0, si_stime=0} ---
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib/x86_64-linux-gnu/libmagic.so.1", O_RDONLY|O_CLOEXEC) = 3
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic", {st_mode=S_IFDIR|0755, st_size=4096, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/compat.py", {st_mode=S_IFREG|0644, st_size=8316, ...}) = 0
stat("/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/compat.py", {st_mode=S_IFREG|0644, st_size=8316, ...}) = 0
openat(AT_FDCWD, "/home/infres/ext-8972/.local/lib/python3.9/site-packages/magic/__pycache__/compat.cpython-39.pyc", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/dev/null", O_RDWR|O_CLOEXEC) = 3
--- SIGCHLD {si_signo=SIGCHLD, si_code=CLD_EXITED, si_pid=3361626, si_uid=85916, si_status=0, si_utime=0, si_stime=0} ---
stat("/home/infres/ext-8972/.magic.mgc", 0x7fffd7512c60) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7fffd7512c60) = -1 ENOENT (No such file or directory)
access("/etc/magic.mime.mgc", R_OK)     = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/etc/magic", {st_mode=S_IFREG|0644, st_size=111, ...}) = 0
openat(AT_FDCWD, "/etc/magic", O_RDONLY) = 3
access("/usr/share/misc/magic.mime.mgc", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/misc/magic.mgc", O_RDONLY) = 3
stat("/home/infres/ext-8972/.magic.mgc", 0x7fffd7512c60) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7fffd7512c60) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/etc/magic", {st_mode=S_IFREG|0644, st_size=111, ...}) = 0
openat(AT_FDCWD, "/etc/magic", O_RDONLY) = 3
openat(AT_FDCWD, "/usr/share/misc/magic.mgc", O_RDONLY) = 3
stat("/home/infres/ext-8972/.magic.mgc", 0x7fffd75150e0) = -1 ENOENT (No such file or directory)
stat("/home/infres/ext-8972/.magic", 0x7fffd75150e0) = -1 ENOENT (No such file or directory)
access("/etc/magic.mime.mgc", R_OK)     = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/etc/magic.mgc", O_RDONLY) = -1 ENOENT (No such file or directory)
stat("/etc/magic", {st_mode=S_IFREG|0644, st_size=111, ...}) = 0
openat(AT_FDCWD, "/etc/magic", O_RDONLY) = 3
access("/usr/share/misc/magic.mime.mgc", R_OK) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/misc/magic.mgc", O_RDONLY) = 3
+++ exited with 0 +++

this is on Debian 11. Python package versions:

$ pip3 freeze | grep -E "(typecode|scancode|magic)"
python-magic==0.4.27
scancode-toolkit==31.2.5
typecode==30.0.0
typecode-libmagic==5.39.210531
pombredanne commented 1 year ago

@progval Thanks for the detailed report. Kudos for the strace output!

So clearly typecode-libmagic==5.39.210531 and python-magic compete for attention and whichever gets imported first loads libmagic first and get a first cut at loading a magic library. AFAICR python-magic likely looks for default locations and fails as the ones provided in the pre-built are not "standard" (and not used by ScanCode FWIW since we load a well-known magic database also provided by the plugin).

There is a way for ScanCode to reuse the system-installed libmagic instead of using the typecode-libmagic plugin-provided one that should come handy for this.

Let me get you the details that will involve using scancode-toolkit-mini and https://github.com/nexB/scancode-plugins/tree/main/builtins/extractcode_libarchive_system_provided and a few extra settings