Closed mjlbach closed 7 months ago
Note: I tried debugging this by modifying distrobox-init
on the host, which is symlinked into the container as an entrypoint in the container, but I'm not seeing any of the modifications (namely print statements) being reflected in the logfiles, despite cat /bin/entrypoint
reflecting the change
This script "fixes" it
#!/bin/sh
get_locked_mount_flags() (
source="$1"
prev=""
locked_flags=""
# If we can't read the file/directory, exit
if ! ls "${source}" 2> /dev/null > /dev/null; then
return 0
fi
# Get mount flags of given file/directory, using nearest mountpoint.
# Earlier versions of findmnt did not check parents until it found a mountpoint,
# so we use a workaround with dirname.
while true; do
flags="$(findmnt --noheadings --output OPTIONS --target "${source}" || :)"
# shellcheck disable=SC2181
if [ -n "${flags}" ]; then
break
fi
prev="${source}"
source="$(dirname "${source}")"
[ "${source}" = "${prev}" ] && return 1
done
for flag in nodev noexec nosuid; do
if printf "%s" "${flags}" | grep -q "${flag}"; then
# Locked flag found, append to list while avoiding leading/trailing commas
locked_flags="${locked_flags:+${locked_flags},}${flag}"
fi
done
printf "%s" "${locked_flags}"
)
# init_readlink is a simplistic implementation for
# readlink -fm
# we use this as readlink -fm does not work on
# busybox systems, and we need the path even for broken links.
# Arguments:
# source file
# Outputs:
# original path the link is pointing
init_readlink() {
# shellcheck disable=SC2010
ls -l "${1}" | grep -Eo '\->.*' | cut -d' ' -f2- | sed 's|\.\./|/|g'
}
# Bind mount or error.
# Arguments:
# source_dir
# target_dir
# mount_flags -> optional
# Outputs:
# No output if all ok
# Error if not
mount_bind() (
source_dir="$1"
target_dir="$2"
mount_flags=""
if [ "$#" -gt 2 ]; then
mount_flags="$3"
fi
# Adjust source_dir in order to point to /run/host if it's a symlink
if [ -L "${source_dir}" ]; then
source_dir="$(init_readlink "${source_dir}")"
if ! printf "%s" "${source_dir}" | grep -q "/run/host"; then
source_dir="/run/host${source_dir}"
fi
fi
# if source dir doesn't exist, just exit
if [ ! -d "${source_dir}" ] && [ ! -f "${source_dir}" ]; then
return 0
fi
# if target_dir exists, check if it is a mountpoint and umount it.
if [ -e "${target_dir}" ] && findmnt "${target_dir}" > /dev/null; then
umount "${target_dir}"
fi
# if target_dir exists, and is a symlink, remove it
if [ -L "${target_dir}" ]; then
rm -f "${target_dir}"
fi
# if the source_dir exists, then create the target_dir
if [ -d "${source_dir}" ]; then
if ! mkdir -p "${target_dir}"; then
printf "Warning: cannot create mount target directory: %s\n" "${target_dir}"
return 1
fi
# if instead it's a file, create it with touch
elif [ -f "${source_dir}" ]; then
if [ ! -d "$(dirname "${target_dir}")" ]; then
mkdir -p "$(dirname "${target_dir}")"
fi
# if we encounter a broken link, and we touch it
# then remove the broken link, the next touch
# will cover it.
if ! touch "${target_dir}"; then
printf "Warning: cannot create mount target file: %s\n" "${target_dir}"
return 1
fi
fi
# Add mountflags if needed, if no are specified, use rslave as default.
if [ "${mount_flags}" = "" ]; then
mount_flags="rslave"
fi
# bind mount source_dir to target_dir, return error if not successful
if ! mount --rbind -o "${mount_flags}" "${source_dir}" "${target_dir}"; then
printf "Warning: failed to bind mount %s to %s\n" "${source_dir}" "${target_dir}"
return 1
fi
return 0
)
nvidia_lib=/run/host/usr/lib64/libnvidia-ml.so.545.29.06
dest_file=/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.545.29.06
mkdir -p "$(dirname "${dest_file}")"
cp -d "${nvidia_lib}" "${dest_file}"
nvidia_lib=/run/host/usr/lib64/libnvidia-ml.so
dest_file=/usr/lib/x86_64-linux-gnu/libnvidia-ml.so
mkdir -p "$(dirname "${dest_file}")"
cp -d "${nvidia_lib}" "${dest_file}"
nvidia_lib=/run/host/usr/lib64/libnvidia-ml.so.1
dest_file=/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1
mkdir -p "$(dirname "${dest_file}")"
cp -d "${nvidia_lib}" "${dest_file}"
So it seems like this is silently failing on the container entrypoint.
I'm seeing something similar on a Fedora host and Arch container, however in my case it seems like the libraries are being mounted properly - but nvidia-smi
still doesn't work. I can reproduce this immediately after container initialization and on subsequent enters.
(With the same Fedora host and a Fedora container instead of Arch, nvidia-smi does work.)
Here's what things look like inside the distrobox:
$ ls -la /usr/lib | grep nvidia
lrwxrwxrwx 1 root root 26 Jan 28 23:22 libEGL_nvidia.so.0 -> libEGL_nvidia.so.545.29.06
-rwxr-xr-x 6 nobody nobody 1248684 Dec 31 1969 libEGL_nvidia.so.545.29.06
lrwxrwxrwx 1 root root 32 Jan 28 23:22 libGLESv1_CM_nvidia.so.1 -> libGLESv1_CM_nvidia.so.545.29.06
-rwxr-xr-x 6 nobody nobody 71208 Dec 31 1969 libGLESv1_CM_nvidia.so.545.29.06
lrwxrwxrwx 1 root root 29 Jan 28 23:22 libGLESv2_nvidia.so.2 -> libGLESv2_nvidia.so.545.29.06
-rwxr-xr-x 6 nobody nobody 128548 Dec 31 1969 libGLESv2_nvidia.so.545.29.06
lrwxrwxrwx 1 root root 26 Jan 28 23:22 libGLX_nvidia.so.0 -> libGLX_nvidia.so.545.29.06
-rwxr-xr-x 6 nobody nobody 1230564 Dec 31 1969 libGLX_nvidia.so.545.29.06
lrwxrwxrwx 1 root root 32 Jan 28 23:22 libnvidia-allocator.so.1 -> libnvidia-allocator.so.545.29.06
-rwxr-xr-x 6 nobody nobody 174848 Dec 31 1969 libnvidia-allocator.so.545.29.06
-rwxr-xr-x 6 nobody nobody 27954508 Dec 31 1969 libnvidia-eglcore.so.545.29.06
lrwxrwxrwx 1 root root 29 Jan 28 23:22 libnvidia-encode.so -> libnvidia-encode.so.545.29.06
lrwxrwxrwx 1 root root 29 Jan 28 23:22 libnvidia-encode.so.1 -> libnvidia-encode.so.545.29.06
-rwxr-xr-x 6 nobody nobody 275964 Dec 31 1969 libnvidia-encode.so.545.29.06
lrwxrwxrwx 1 root root 26 Jan 28 23:22 libnvidia-fbc.so.1 -> libnvidia-fbc.so.545.29.06
-rwxr-xr-x 6 nobody nobody 145100 Dec 31 1969 libnvidia-fbc.so.545.29.06
-rwxr-xr-x 6 nobody nobody 30060080 Dec 31 1969 libnvidia-glcore.so.545.29.06
-rwxr-xr-x 6 nobody nobody 769788 Dec 31 1969 libnvidia-glsi.so.545.29.06
-rwxr-xr-x 6 nobody nobody 12897364 Dec 31 1969 libnvidia-glvkspirv.so.545.29.06
-rwxr-xr-x 6 nobody nobody 61597888 Dec 31 1969 libnvidia-gpucomp.so.545.29.06
lrwxrwxrwx 1 root root 25 Jan 28 23:22 libnvidia-ml.so -> libnvidia-ml.so.545.29.06
lrwxrwxrwx 1 root root 25 Jan 28 23:22 libnvidia-ml.so.1 -> libnvidia-ml.so.545.29.06
-rwxr-xr-x 6 nobody nobody 2086168 Dec 31 1969 libnvidia-ml.so.545.29.06
lrwxrwxrwx 1 root root 27 Jan 28 23:22 libnvidia-nvvm.so -> libnvidia-nvvm.so.545.29.06
lrwxrwxrwx 1 root root 27 Jan 28 23:22 libnvidia-nvvm.so.4 -> libnvidia-nvvm.so.545.29.06
-rwxr-xr-x 6 nobody nobody 93231464 Dec 31 1969 libnvidia-nvvm.so.545.29.06
lrwxrwxrwx 1 root root 29 Jan 28 23:22 libnvidia-opencl.so.1 -> libnvidia-opencl.so.545.29.06
-rwxr-xr-x 6 nobody nobody 21895104 Dec 31 1969 libnvidia-opencl.so.545.29.06
lrwxrwxrwx 1 root root 34 Jan 28 23:22 libnvidia-opticalflow.so.1 -> libnvidia-opticalflow.so.545.29.06
-rwxr-xr-x 6 nobody nobody 46316 Dec 31 1969 libnvidia-opticalflow.so.545.29.06
lrwxrwxrwx 1 root root 37 Jan 28 23:22 libnvidia-ptxjitcompiler.so.1 -> libnvidia-ptxjitcompiler.so.545.29.06
-rwxr-xr-x 6 nobody nobody 29397292 Dec 31 1969 libnvidia-ptxjitcompiler.so.545.29.06
-rwxr-xr-x 6 nobody nobody 21880 Dec 31 1969 libnvidia-tls.so.545.29.06
drwxr-xr-x 1 nobody nobody 8 Dec 31 1969 nvidia
$ nvidia-smi
NVIDIA-SMI couldn't find libnvidia-ml.so library in your system. Please make sure that the NVIDIA Display Driver is properly installed and present in your system.
Please also try adding directory that contains libnvidia-ml.so to your system PATH.
ldconfig -p
also correctly shows all of the NVIDIA libraries.
Same problem here, for me it seems like the 64-bit nvidia libs are not being mounted, instead 32-bit ones are mounted in place of the 64 bit libs. For example:
$ file /usr/lib64/libnvidia-ml.so.545.29.06
/usr/lib64/libnvidia-ml.so.545.29.06: ELF 32-bit LSB shared object, Intel 80386, version 1 (SYSV), dynamically linked, stripped
It seems the correct libs are not being mounted because the 32 bit ones are alredy mounted:
+ for nvidia_lib in ${NVIDIA_LIBS}
++ printf %s /run/host/usr/lib64/libnvidia-ml.so.545.29.06
++ sed 's|/run/host/usr/lib/x86_64-linux-gnu/|/usr/lib64/|g'
++ sed 's|/run/host/usr/lib/i386-linux-gnu/|/usr/lib32/|g'
++ sed 's|/run/host/usr/lib64/|/usr/lib64/|g'
++ sed 's|/run/host/usr/lib32/|/usr/lib32/|g'
+ dest_file=/usr/lib64/libnvidia-ml.so.545.29.06
+ '[' -e /usr/lib64/libnvidia-ml.so.545.29.06 ']'
+ continue
Unfortunately I dont really know where these 32-bit libs are being mounted incorrectly.
Here The Complete log of the nvidia related stuff: arch.log
Host: Fedora Container: Arch (Bazzite)
The 64-bit nvidia libs are not being mounted, instead 32-bit ones are mounted in place of the 64 bit libs
This is happening for me as well and I'm guessing this is the problem. Distrobox is mounting NVIDIA libraries from /lib
on the (Fedora) host instead of /lib64
.
That would also explain why disabling NVIDIA integration in Distrobox and installing the correct version of nvidia-utils
in the container serves as a workaround - that way, the 64-bit libs are installed.
I think I have it figured out. On Arch, lib
and lib64
are both symlinked to /usr/lib
:
lrwxrwxrwx 1 root root 7 Jan 19 12:10 lib -> usr/lib
lrwxrwxrwx 1 root root 7 Jan 19 12:10 lib64 -> usr/lib
During initialization, Distrobox tries to mount 32-bit libs into lib
and 64-bit libs into lib64
, since that's how they are laid out on the host. It likely fails to mount the 64-bit ones since the 32-bit ones were just mounted in the same place (as @sdaqo suggests)
Yeah I also think I found the problem in the code:
this command
find /run/host/etc/ /run/host/usr/ \
-path "/run/host/usr/lib/i386-linux-gnu/*" -prune -o \
-path "/run/host/usr/lib/x86_64-linux-gnu/*" -prune -o \
-path "/run/host/usr/lib32/*" -prune -o \
-path "/run/host/usr/lib64/*" -prune -o \
-iname "*nvidia*" -not -type d -print 2> /dev/null || :
also includes all the libs in /run/host/usr/lib/*
(I dont really know why??).
Also another thing about this command, why does it even search lib paths, if I read correctly above this command it says:
# First we find all non-lib files we need, this includes
# - binaries
# - confs
# - egl files
# - icd files
# Excluding here the libs, we will threat them later specifically
non-lib
The -prune flag for find excludes the given path - it may just need /lib added along with /lib32
ah ok, that make sense, do you want to make a PR?
Not sure if this is the full fix, will do some more digging later today and verify things are still working as expected with the change
Just for the sake of testing: I tried creating a new container with this patch and the nvidia integration now works:
diff --git a/distrobox-init b/distrobox-init
index 03fdd27..2052ead 100755
--- a/distrobox-init
+++ b/distrobox-init
@@ -1503,6 +1503,7 @@ if [ "${nvidia}" -eq 1 ]; then
-path "/run/host/usr/lib/x86_64-linux-gnu/*" -prune -o \
-path "/run/host/usr/lib32/*" -prune -o \
-path "/run/host/usr/lib64/*" -prune -o \
+ -path "/run/host/usr/lib/*" -prune -o \
-iname "*nvidia*" -not -type d -print 2> /dev/null || :)"
for nvidia_file in ${NVIDIA_FILES}; do
dest_file="$(printf "%s" "${nvidia_file}" | sed 's|/run/host||g')"
For me the issue was that the libs may already exist as empty libs in the container from previous uses. Currently, they are skipped and not remounted during init:
Then, they will be removed later as they are empty (though this does not matter, as they would not work anyway):
So the solution for me is to simply not skip them, but to always remount.
Just for the sake of testing: I tried creating a new container with this patch and the nvidia integration now works:
diff --git a/distrobox-init b/distrobox-init index 03fdd27..2052ead 100755 --- a/distrobox-init +++ b/distrobox-init @@ -1503,6 +1503,7 @@ if [ "${nvidia}" -eq 1 ]; then -path "/run/host/usr/lib/x86_64-linux-gnu/*" -prune -o \ -path "/run/host/usr/lib32/*" -prune -o \ -path "/run/host/usr/lib64/*" -prune -o \ + -path "/run/host/usr/lib/*" -prune -o \ -iname "*nvidia*" -not -type d -print 2> /dev/null || :)" for nvidia_file in ${NVIDIA_FILES}; do dest_file="$(printf "%s" "${nvidia_file}" | sed 's|/run/host||g')"
@sdaqo This also seems to fix things on my end. Do you want to make the PR since you have the patch?
@Aleko2286 I have intermittently seen the empty library issue previously, but it also seems to have been resolved with @sdaqo's patch. I used to get either empty or 32-bit libraries depending on how the container runtime was feeling that day, but excluding /usr/lib
from NVIDIA_FILES
but keeping it on NVIDIA_LIBS
seems to make that logic behave better across the board.
@sdaqo's patch did not work for me for an ubuntu guest system. Ubuntu does not have that issue though; you probably need to patch both.
So the solution for me is to simply not skip them, but to always remount.
A possible approach for that might be to move the empty lib cleaning to before the libraries are actually mounted in - that way the empty ones would get cleaned up and remounted all at once. Not sure what the potential side effects of that are, though. It might be worth investigating why those libs are being made empty in the first place - it seems like that should not happen very often based on the comments around line 1580, and I recall it being pretty rare on my system.
I do think sdaqo's change is ready to PR, since it seems to fix the issue we're seeing with Arch guests.
So I just tested it for an arch guest with only @sdaqo's patch applied and I am running into the exact same problem as with ubuntu. If the container was stopped before, then the libs will be empty the next time and it will not remount them.
Maybe the cleaning up of the empty files just doesn't work? This is just a hunch but last time while testing I tried to delete the Nvidia libs from inside the container and got a device is busy error - probably because it was a mount, after that I tried to un-mount the files and that worked just fine, maybe we need to try unmounting as well? And putting the empty lib check at the top seems like a good idea as well. @Aleko2286 do you want to try that, because I am currently not running into this issue.
Yeah for example:
$ cd /usr/lib
$ sudo find -iname "libnvidia-ml.so.5*" -delete
find: cannot delete β./libnvidia-ml.so.545.29.06β: Device or resource busy
If I now sudo umount ./libnvidia-ml.so.545.29.06
, that works but now there is an empty file...
$ sudo find -empty -iname "libnvidia-ml.so.5*"
./libnvidia-ml.so.545.29.06
This is probably why sometimes there are empty files, they get un-mounted somewhere and then if you start your box again they are empty.
Anyway we can now just sudo rm ./libnvidia-ml.so.545.29.06
to get rid of the empty file.
Maybe you can try this @Aleko2286 :
diff --git a/distrobox-init b/distrobox-init
index 03fdd27..1f864da 100755
--- a/distrobox-init
+++ b/distrobox-init
@@ -1478,6 +1478,16 @@ done
if [ "${nvidia}" -eq 1 ]; then
printf "distrobox: Setting up host's nvidia integration...\n"
+ # Refresh ldconfig cache, also detect if there are empty files remaining
+ # and clean them.
+ # This could happen when upgrading drivers and changing versions.
+ empty_libs="$(ldconfig 2>&1 | grep -Eo "File.*is empty" | cut -d' ' -f2)"
+ if [ -n "${empty_libs}" ]; then
+ # shellcheck disable=SC2086
+ find ${empty_libs} -exec sh -c 'rm -f "$1" 2> /dev/null || umount $1' sh {} ';' || :
+ find /usr/ /etc/ -empty -iname "*nvidia*" -exec sh -c 'rm -f "$1" 2> /dev/null || umount $1' sh {} ';' || :
+ fi
+
# Find where the system expects libraries to be put
lib32_dir="/usr/lib/"
lib64_dir="/usr/lib/"
@@ -1503,6 +1513,7 @@ if [ "${nvidia}" -eq 1 ]; then
-path "/run/host/usr/lib/x86_64-linux-gnu/*" -prune -o \
-path "/run/host/usr/lib32/*" -prune -o \
-path "/run/host/usr/lib64/*" -prune -o \
+ -path "/run/host/usr/lib/*" -prune -o \
-iname "*nvidia*" -not -type d -print 2> /dev/null || :)"
for nvidia_file in ${NVIDIA_FILES}; do
dest_file="$(printf "%s" "${nvidia_file}" | sed 's|/run/host||g')"
@@ -1574,15 +1585,8 @@ if [ "${nvidia}" -eq 1 ]; then
mount_bind "${nvidia_lib}" "${dest_file}" ro
done
- # Refresh ldconfig cache, also detect if there are empty files remaining
- # and clean them.
- # This could happen when upgrading drivers and changing versions.
- empty_libs="$(ldconfig 2>&1 | grep -Eo "File.*is empty" | cut -d' ' -f2)"
- if [ -n "${empty_libs}" ]; then
- # shellcheck disable=SC2086
- find ${empty_libs} -delete 2> /dev/null || :
- find /usr/ /etc/ -empty -iname "*nvidia*" -delete 2> /dev/null || :
- fi
+ # Refresh ldconfig cache
+ ldconfig 2>&1 /dev/null
fi
###############################################################################
I opened a PR for the first patch for now
So about the patch above I tried this and yes it works, but for whatever reason I have to wait some seconds before the libraries appear:
$ ./distrobox enter archlinux
Starting container... [ OK ]
Installing basic packages... [ OK ]
Setting up devpts mounts... [ OK ]
Setting up read-only mounts... [ OK ]
Setting up read-write mounts... [ OK ]
Setting up host's sockets integration... [ OK ]
Setting up host's nvidia integration... [ OK ]
Integrating host's themes, icons, fonts... [ OK ]
Setting up package manager exceptions... [ OK ]
Setting up pacman exceptions... [ OK ]
Setting up pacman hooks... [ OK ]
Setting up distrobox profile... [ OK ]
Setting up sudo... [ OK ]
Setting up groups... [ OK ]
Setting up users... [ OK ]
Setting up skel... [ OK ]
Container Setup Complete!
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
$ find /usr/lib -iname 'libnvidia*'
/usr/lib/libnvidia-allocator.so.1
/usr/lib/libnvidia-allocator.so.545.29.06
/usr/lib/libnvidia-api.so.1
/usr/lib/libnvidia-cfg.so.1
/usr/lib/libnvidia-cfg.so.545.29.06
/usr/lib/libnvidia-container-go.so.1
/usr/lib/libnvidia-container-go.so.1.14.5
/usr/lib/libnvidia-container.so.1
/usr/lib/libnvidia-container.so.1.14.5
/usr/lib/libnvidia-egl-gbm.so.1
/usr/lib/libnvidia-egl-gbm.so.1.1.1
/usr/lib/libnvidia-egl-wayland.so.1
/usr/lib/libnvidia-egl-wayland.so.1.1.13
/usr/lib/libnvidia-eglcore.so.545.29.06
/usr/lib/libnvidia-encode.so
/usr/lib/libnvidia-encode.so.1
/usr/lib/libnvidia-encode.so.545.29.06
/usr/lib/libnvidia-fbc.so.1
/usr/lib/libnvidia-fbc.so.545.29.06
/usr/lib/libnvidia-glcore.so.545.29.06
/usr/lib/libnvidia-glsi.so.545.29.06
/usr/lib/libnvidia-glvkspirv.so.545.29.06
/usr/lib/libnvidia-gpucomp.so.545.29.06
/usr/lib/libnvidia-gtk3.so.545.29.06
/usr/lib/libnvidia-ml.so
This does not happen the first time after creating the container, only after stopping and then starting it again.
This is very weird...
This does not happen the first time after creating the container, only after stopping and then starting it again.
This is very weird...
The enter command will not wait for the init script to have finished. But I don't think that's really an issue, it should take at most a few seconds to finish.
diff --git a/distrobox-init b/distrobox-init
index 03fdd27..1f864da 100755
--- a/distrobox-init
+++ b/distrobox-init
@@ -1478,6 +1478,16 @@ done
if [ "${nvidia}" -eq 1 ]; then
printf "distrobox: Setting up host's nvidia integration...\n"
+ # Refresh ldconfig cache, also detect if there are empty files remaining
+ # and clean them.
+ # This could happen when upgrading drivers and changing versions.
+ empty_libs="$(ldconfig 2>&1 | grep -Eo "File.*is empty" | cut -d' ' -f2)"
+ if [ -n "${empty_libs}" ]; then
+ # shellcheck disable=SC2086
+ find ${empty_libs} -exec sh -c 'rm -f "$1" 2> /dev/null || umount $1' sh {} ';' || :
+ find /usr/ /etc/ -empty -iname "*nvidia*" -exec sh -c 'rm -f "$1" 2> /dev/null || umount $1' sh {} ';' || :
+ fi
+
# Find where the system expects libraries to be put
lib32_dir="/usr/lib/"
lib64_dir="/usr/lib/"
@@ -1503,6 +1513,7 @@ if [ "${nvidia}" -eq 1 ]; then
-path "/run/host/usr/lib/x86_64-linux-gnu/*" -prune -o \
-path "/run/host/usr/lib32/*" -prune -o \
-path "/run/host/usr/lib64/*" -prune -o \
+ -path "/run/host/usr/lib/*" -prune -o \
-iname "*nvidia*" -not -type d -print 2> /dev/null || :)"
for nvidia_file in ${NVIDIA_FILES}; do
dest_file="$(printf "%s" "${nvidia_file}" | sed 's|/run/host||g')"
@@ -1574,15 +1585,8 @@ if [ "${nvidia}" -eq 1 ]; then
mount_bind "${nvidia_lib}" "${dest_file}" ro
done
- # Refresh ldconfig cache, also detect if there are empty files remaining
- # and clean them.
- # This could happen when upgrading drivers and changing versions.
- empty_libs="$(ldconfig 2>&1 | grep -Eo "File.*is empty" | cut -d' ' -f2)"
- if [ -n "${empty_libs}" ]; then
- # shellcheck disable=SC2086
- find ${empty_libs} -delete 2> /dev/null || :
- find /usr/ /etc/ -empty -iname "*nvidia*" -delete 2> /dev/null || :
- fi
+ # Refresh ldconfig cache
+ ldconfig 2>&1 /dev/null
fi
###############################################################################
This works.
This has been all merged. thanks everyone for the work on finding this out!
Describe the bug
Intermittently (weirdly seems like every other time for me), when starting a distrobox creator made with the
--nvidia
flag, some of the libraries are not mounted into HOSTTo Reproduce
Create the container
Enter the container and try running nvidia-smi
Stop the container so distrobox remounts the nvidia libraries on next start
Remount on next start
Expected behavior The required libraries are reliably mounted into
/usr
Logs Run the commands with
--verbose
and post the log here as a file upload Attach also the output ofpodman logs
ordocker logs
, possibly with--latest
flagDesktop (please complete the following information):
podman
4.8.1
distrobox: 1.6.0.1
Fedora Kinoite 39
curl
Additional context Add any other context about the problem here.