Closed fire-burner closed 6 months ago
My guess is debian disabled some of the required kernel options. Run the following, and provide the output:
sudo grep -E 'CONFIG_FTRACE|CONFIG_KPROBES|CONFIG_PCI_QUIRKS|CONFIG_KALLSYMS|CONFIG_KALLSYMS_ALL|CONFIG_FUNCTION_TRACER' /boot/config-$(uname -r)
Same problem while upgrading archlinux.
Config seem to be correcte :
[root@host jeremie]# gzip -d -c /proc/config.gz > /tmp/config
[root@host jeremie]# sudo grep -E 'CONFIG_FTRACE|CONFIG_KPROBES|CONFIG_PCI_QUIRKS|CONFIG_KALLSYMS|CONFIG_KALLSYMS_ALL|CONFIG_FUNCTION_TRACER' /tmp/config
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
CONFIG_KALLSYMS_BASE_RELATIVE=y
CONFIG_KPROBES=y
CONFIG_KPROBES_ON_FTRACE=y
CONFIG_PCI_QUIRKS=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_MCOUNT_USE_CC=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
# CONFIG_FTRACE_STARTUP_TEST is not set
# CONFIG_KPROBES_SANITY_TEST is not set
[root@host jeremie]
Uname result :
Linux host 5.15.5-arch1-1 #1 SMP PREEMPT Thu, 25 Nov 2021 22:09:33 +0000 x86_64 GNU/Linux
Hook seem to be installed :
[root@host jeremie]# dmesg |grep vend
[ 3.104353] vendor_reset: loading out-of-tree module taints kernel.
[ 3.104429] vendor_reset: module verification failed: signature and/or required key missing - tainting kernel
[ 3.180564] vendor_reset_hook: installed
How over it seem that kernel standard mecanism is still used :
[ 75.574870] VFIO - User Level meta-driver version: 0.3
[ 75.605979] vfio-pci 0000:0b:00.0: vgaarb: changed VGA decodes: olddecodes=io+mem,decodes=io+mem:owns=none
[ 75.621957] vfio_pci: add [1002:731f[ffffffff:ffffffff]] class 0x000000/00000000
[ 75.621967] vfio_pci: add [1002:1479[ffffffff:ffffffff]] class 0x000000/00000000
[ 75.621974] vfio_pci: add [1002:1478[ffffffff:ffffffff]] class 0x000000/00000000
[ 75.621981] vfio_pci: add [1002:ab38[ffffffff:ffffffff]] class 0x000000/00000000
[ 75.886264] audit: type=1130 audit(1638312078.857:64): pid=1 uid=0 auid=4294967295 ses=4294967295 msg='unit=virtlogd comm="systemd" exe="/usr/lib/systemd/systemd" hostname=? addr=? terminal=? res=success'
[ 75.921925] tun: Universal TUN/TAP device driver, 1.6
[ 75.922628] br_lan: port 2(vnet0) entered blocking state
[ 75.922635] br_lan: port 2(vnet0) entered disabled state
[ 75.922747] device vnet0 entered promiscuous mode
[ 75.922764] audit: type=1700 audit(1638312078.894:65): dev=vnet0 prom=256 old_prom=0 auid=4294967295 uid=0 gid=0 ses=4294967295
[ 75.922876] audit: type=1300 audit(1638312078.894:65): arch=c000003e syscall=16 success=yes exit=0 a0=23 a1=89a2 a2=7f7567ffe040 a3=eb items=0 ppid=1 pid=675 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="rpc-worker" exe="/usr/bin/libvirtd" key=(null)
[ 75.922879] audit: type=1327 audit(1638312078.894:65): proctitle=2F7573722F62696E2F6C69627669727464002D2D74696D656F757400313230
[ 75.922976] br_lan: port 2(vnet0) entered blocking state
[ 75.922979] br_lan: port 2(vnet0) entered forwarding state
[ 75.923051] audit: type=2501 audit(1638312078.894:66): pid=675 uid=0 auid=4294967295 ses=4294967295 msg='virt=kvm resrc=net reason=open vm="Windows10_Mining" uuid=739dbe77-a039-4d73-8aba-1d35a0dd6a22 net=52:54:00:17:c9:eb path="/dev/net/tun" rdev=0A:C8 exe="/usr/bin/libvirtd" hostname=? addr=? terminal=? res=success'
[ 75.993137] audit: type=2501 audit(1638312078.964:67): pid=675 uid=0 auid=4294967295 ses=4294967295 msg='virt=kvm resrc=net reason=open vm="Windows10_Mining" uuid=739dbe77-a039-4d73-8aba-1d35a0dd6a22 net=52:54:00:17:c9:eb path="/dev/vhost-net" rdev=0A:EE exe="/usr/bin/libvirtd" hostname=? addr=? terminal=? res=success'
[ 76.051679] audit: type=1334 audit(1638312079.020:68): prog-id=23 op=LOAD
[ 76.051683] audit: type=1300 audit(1638312079.020:68): arch=c000003e syscall=321 success=yes exit=34 a0=5 a1=7f7567ffe0c0 a2=78 a3=7f7574037010 items=0 ppid=1 pid=675 auid=4294967295 uid=0 gid=0 euid=0 suid=0 fsuid=0 egid=0 sgid=0 fsgid=0 tty=(none) ses=4294967295 comm="rpc-worker" exe="/usr/bin/libvirtd" key=(null)
[ 76.051686] audit: type=1327 audit(1638312079.020:68): proctitle=2F7573722F62696E2F6C69627669727464002D2D74696D656F757400313230
[ 77.725214] vfio-pci 0000:0b:00.0: enabling device (0002 -> 0003)
[ 77.725652] vfio-pci 0000:0b:00.0: vfio_ecap_init: hiding ecap 0x19@0x270
[ 77.725667] vfio-pci 0000:0b:00.0: vfio_ecap_init: hiding ecap 0x1b@0x2d0
[ 77.725672] vfio-pci 0000:0b:00.0: vfio_ecap_init: hiding ecap 0x25@0x400
[ 77.725674] vfio-pci 0000:0b:00.0: vfio_ecap_init: hiding ecap 0x26@0x410
[ 77.725677] vfio-pci 0000:0b:00.0: vfio_ecap_init: hiding ecap 0x27@0x440
[ 129.272071] br_lan: port 2(vnet0) entered disabled state
[ 129.272320] device vnet0 left promiscuous mode
[ 129.272334] br_lan: port 2(vnet0) entered disabled state
[ 129.272357] kauditd_printk_skb: 25 callbacks suppressed
[ 129.272360] audit: type=1700 audit(1638312132.244:94): dev=vnet0 prom=0 old_prom=256 auid=4294967295 uid=65534 gid=992 ses=4294967295
[ 130.557432] vfio-pci 0000:0b:00.0: can't change power state from D0 to D3hot (config space inaccessible)
[ 130.557440] vfio-pci 0000:0b:00.1: can't change power state from D0 to D3hot (config space inaccessible)
[ 130.761354] snd_hda_intel 0000:0b:00.1: can't change power state from D3cold to D0 (config space inaccessible)
[ 130.761375] snd_hda_intel 0000:0b:00.1: can't change power state from D3cold to D0 (config space inaccessible)
[ 130.761488] snd_hda_intel 0000:0b:00.1: Force to non-snoop mode
[ 130.761724] snd_hda_intel 0000:0b:00.1: number of I/O streams is 30, forcing separate stream tags
[ 130.762157] audit: type=1334 audit(1638312133.734:95): prog-id=0 op=UNLOAD
[ 130.766867] audit: type=2500 audit(1638312133.737:96): pid=675 uid=0 auid=4294967295 ses=4294967295 msg='virt=kvm op=stop reason=shutdown vm="Windows10_Mining" uuid=739dbe77-a039-4d73-8aba-1d35a0dd6a22 vm-pid=-1 exe="/usr/bin/libvirtd" hostname=? addr=? terminal=? res=success'
[ 130.866743] snd_hda_intel 0000:0b:00.1: CORB reset timeout#2, CORBRP = 65535
[ 130.866791] hdaudio hdaudioC0D0: no AFG or MFG node found
[ 130.866798] hdaudio hdaudioC0D1: no AFG or MFG node found
[ 130.866804] hdaudio hdaudioC0D2: no AFG or MFG node found
[ 130.866810] hdaudio hdaudioC0D3: no AFG or MFG node found
[ 130.866811] snd_hda_intel 0000:0b:00.1: no codecs initialized
[ 172.550923] audit: type=1101 audit(1638312175.520:97): pid=888 uid=0 auid=1000 ses=1 msg='op=PAM:accounting grantors=pam_unix,pam_permit,pam_time acct="root" exe="/usr/bin/sudo" hostname=? addr=? terminal=/dev/pts/0 res=success'
Same problem while upgrading archlinux.
I can confirm similar output and replication using Linux 5.15.5-arch1-1 x86_64
Here is my output:
user@host:~$ sudo grep -E 'CONFIG_FTRACE|CONFIG_KPROBES|CONFIG_PCI_QUIRKS|CONFIG_KALLSYMS|CONFIG_KALLSYMS_ALL|CONFIG_FUNCTION_TRACER' /boot/config-$(uname -r)
CONFIG_KALLSYMS=y
CONFIG_KALLSYMS_ALL=y
CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y
CONFIG_KALLSYMS_BASE_RELATIVE=y
CONFIG_KPROBES=y
CONFIG_KPROBES_ON_FTRACE=y
CONFIG_PCI_QUIRKS=y
CONFIG_FTRACE=y
CONFIG_FUNCTION_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_FTRACE_MCOUNT_RECORD=y
CONFIG_FTRACE_MCOUNT_USE_CC=y
# CONFIG_FTRACE_RECORD_RECURSION is not set
# CONFIG_FTRACE_STARTUP_TEST is not set
# CONFIG_KPROBES_SANITY_TEST is not set
kernel: linux-image-5.15.0-1-amd64/testing,now 5.15.3-1 amd64
Since vendor-reset depends on the internal structure of the kernel, it is affected by this patch in version 5.15.
AFTER loading the module (after dmesg says vendor_reset_hook: installed),
run echo 'device_specific' > /sys/bus/pci/devices/<pci_device_id_here>/reset_method
as root privilege.
and then try to boot the VM.
Also experiencing this issue on 5.15.3-gentoo-x86_64 (custom kernel, but oldconfig from when it worked with 5.10.x / 5.13.x / 5.14.x).
CONFIG_KALLSYMS=y CONFIG_KALLSYMS_ALL=y CONFIG_KALLSYMS_ABSOLUTE_PERCPU=y CONFIG_KALLSYMS_BASE_RELATIVE=y CONFIG_KPROBES=y CONFIG_KPROBES_ON_FTRACE=y CONFIG_PCI_QUIRKS=y CONFIG_FTRACE=y CONFIG_FUNCTION_TRACER=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FTRACE_MCOUNT_RECORD=y CONFIG_FTRACE_MCOUNT_USE_CC=y CONFIG_FTRACE_RECORD_RECURSION is not set CONFIG_FTRACE_STARTUP_TEST is not set CONFIG_KPROBES_SANITY_TEST is not set
Since vendor-reset depends on the internal structure of the kernel, it is affected by this patch in version 5.15.
AFTER loading the module (after dmesg says vendor_reset_hook: installed), run
echo 'device_specific' > /sys/bus/pci/devices/<pci_device_id_here>/reset_method
as root privilege. and then try to boot the VM.
This worked for me (Debian testing kernel 5.15.5). And I had no longer delay when restarting the VM than before.
I assume this fix could easily be added to the vendor-reset module?
I can confirm that this issue is also present on the newly released 5.16 as well.
Did you test, if the work around is still working? https://github.com/gnif/vendor-reset/issues/46#issuecomment-992282166
Hi all, i have the same problem with Arch since the upgrade from 5.10.89-lts to 5.15.14-1-lts. I tried the workaround from pluser with: echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method
and now i have the file: /sys/bus/pci/devices/0000:0c:00.0/reset_method with the text: device_specific
But it doesn't work for me. I always get the error: "Unknown PCI header type '127' for device '0000:0c:00.0'" after a restart of the vm. Did i not understand the workaround correctly?
Thanks for any explanations! Best regards, johno
**EDIT: i think it works now perfect! I had to create the file before a VM is startet!**
i created a systemd service to make this automatically. Thanks for the great workaround!
PS.: here my script in the file: /lib/systemd/system/vrwa.service:
[Unit]
Description=vrwa Service
After=multi-user.target
[Service]
ExecStart=/usr/bin/bash -c 'echo device_specific > /sys/bus/pci/devices/0000:0c:00.0/reset_method'
[Install]
WantedBy=multi-user.target
and i enabled it with systemd: systemctl enable vrwa
PPS.: you have to find out the correct pci-address of your card by yourself of course!
just confirm that systemd service is required and fixes it on Debian Bullseye with Linux 5.16. had to add an ExecStartPre section as i passthrough multiple GPUs.
For systems running OpenRC for init and not systemd (like my gentoo system), you need to create a file under /etc/local.d, in my case I named it navi10_reset_method.start. The .start is important as this will run at boot.
Essentially it does the same thing as above, just not as a systemd unit.
nickle@lux /etc/local.d $ cat navi10_reset_method.start echo "Setting Navi10 device to device specific reset method..." echo 'device_specific' > /sys/bus/pci/devices/0000\:30\:00.0/reset_method
You can of course use the systemd unit (or rc init file), but if you are using libvirtd+QEMU then hooks are available. Create the following snippet as /etc/libvirt/hooks/qemu .
#!/bin/sh
GUEST_NAME="$1"
HOOK_NAME="$2"
STATE_NAME="$3"
MISC="${@:4}"
if [[ "${HOOK_NAME}" == "start" ]]; then
echo 'device_specific' > /sys/bus/pci/devices/<pci_device_id_here>/reset_method
fi
See libvirt manual for details.
thank you pluser for the tipp! And again thanks very, very much for the workaround!! :+1:
Has anyone managed to do this with vfio-pci binding the GPU? My 6700 XT responds with:
vfio-pci 0000:0b:00.0: Unsupported reset method 'device_specific'
echo: write error: Invalid argument
Whenever 'device_specific' is being sent to reset_method on the device. It is currently set as 'bus'.
Tried sh, bash, zsh, sudo, su. Tried different kernels (5.16.2-zen1-1-zen, 5.15.16-1-lts). vendor-reset module is confirmed loaded 6s into boot according to dmesg. All required kernel configs are =y.
Anything I'm missing here?
Edit: Seems like it's not supported, should have read that first. People seem to think the 6000 series is immune to this problem, but apparently it is not. Weird and annoying.
Radeon 6700 XT may not be supported.
Compare the result of lspci -nn
with device-db.h to see if it is supported.
Your device must be defined in device-db.h.
You can of course use the systemd unit (or rc init file), but if you are using libvirtd+QEMU then hooks are available. Create the following snippet as /etc/libvirt/hooks/qemu .
#!/bin/sh GUEST_NAME="$1" HOOK_NAME="$2" STATE_NAME="$3" MISC="${@:4}" if [[ "${HOOK_NAME}" == "start" ]]; then echo 'device_specific' > /sys/bus/pci/devices/<pci_device_id_here>/reset_method fi
See libvirt manual for details.
The hook unfortunately doesn't work for me on Debian testing.
I've fixed this on my system by staying on Linux 5.14
vendor-reset + echo device_specific > /sys/bus/pci/devices/0000:28:00.0/reset_method in hook / systemd doesn't seem to work for me, am i missing something? Using arch 5.16 with vendor-reset-dkms-git from AUR. shutting VM's keeps resulting in black screens. initial VFIO setup is done with rising prism guide.
Are there any plans from anybody to figure out how to fix it in the source so this workaround isn't needed anymore?
I got it working after more trying on the latest kernel. I edited my vfio-bind script so that device_specific is assigned immediately-before the vfio-pci pass-off and straight after for good measure. It now looks like this:
#!/bin/bash
modprobe vfio-pci
for dev in "$@"; do
vendor=$(cat /sys/bus/pci/devices/$dev/vendor)
device=$(cat /sys/bus/pci/devices/$dev/device)
if [ -e /sys/bus/pci/devices/$dev/driver ]; then
echo 'device_specific' > /sys/bus/pci/devices/$dev/reset_method
echo $dev > /sys/bus/pci/devices/$dev/driver/unbind
fi
echo $vendor $device > /sys/bus/pci/drivers/vfio-pci/new_id
echo 'device_specific' > /sys/bus/pci/devices/$dev/reset_method
done
@cmdrkotori hello, my vm os is success stared, but when i shutdown vm os, my screen is to sleep, how can i use vendor-reset ?
modprobe -r vfio-pci modprobe vendor-reset echo 'device_specific' > /sys/bus/pci/devices/0000:26:00:00/reset_method
i can run this script when shutdown vm os? r7 3800x + rx 580, i study the github: https://github.com/ledisthebest/LEDs-single-gpu-passthrough
Would have been nice if Proxmox provided a hint on how to create those hook-scripts. I had to set the parameter -w, which I still don't know what it does and just stumbled over it on dozens of threads.
So here's a working PERL script - tested with windows 11 -> needs to be set for the desired VM. You can cut out even more code if you like. Reboot your system if you're done.
# create the file and save the code below into it (CTRL+X > Y)
nano /var/lib/vz/snippets/vendor-reset.pl
# make the file executable
chmod +x /var/lib/vz/snippets/vendor-reset.pl
# replace <vmid> with your desired vmid - e.g. 100 and replace the path if needed
# make sure you've got snippets activated for one of your storages
qm set <vmid> local:snippets/vendor-reset.pl
#!/usr/bin/perl -w
use strict;
use warnings;
print "GUEST HOOK: " . join(' ', @ARGV). "\n";
my $vmid = shift;
my $phase = shift;
# Reuse our command
my $cmd ="echo 'device_specific' > /sys/bus/pci/devices/0000:03:00.0/reset_method";
if ($phase eq 'pre-start') {
print "$vmid is starting, doing preparations.\n";
system($cmd);
} else {
die "got unknown phase '$phase'\n";
}
exit(0);
Should this work on 5.17? I tried the libvirt hook above. I have a Vega 64 and have to suspend and wake before passing it through to VM.
@eararipe This solution works fine for me under 5.17 w/ Navi 10.
@nochristrequired Thanks for the confirmation. I'm going to see why it didn't work this weekend.
I loaded the module, updated initramfs, but lsmod shows it being used by 0. <- means nothing.
Update -> It worked! I needed to fix permissions on /etc/libvirt/hook/qemu for the script to run. Now the VM starts without having to suspend the host first.
On the latest kernel I'm getting the log message
pci-stub 0000:0f:00.0: Unsupported reset method 'device_specific'
and passthrough doesn't work any more.
qm set <vmid> local:snippets/vendor-reset.pl
@Pandiora Thanks for your proxmox snippet. I've tried it out but it fails.
$ qm set <vmid> -hookscript local:snippets/vendor-reset.pl
GUEST HOOK: 103 pre-start
103 is starting, doing preparations.
sh: 1: echo: echo: I/O error
kvm: ../hw/pci/pci.c:1487: pci_irq_handler: Assertion `0 <= irq_num && irq_num < PCI_NUM_PINS' failed.
TASK ERROR: start failed: QEMU exited with code 1
I'm getting echo: write error: Invalid argument
on 5.15.59-gentoo-dist
@EsmailELBoBDev2 Same, I can't change the reset method to any of the reset methods in pci.c, it is stuck on bus
. Trying all possible values throws the same write error and produces this result in dmesg
[ 689.396614] vfio-pci 0000:0f:00.0: Unsupported reset method 'device_specific'
[ 709.101147] vfio-pci 0000:0f:00.0: Unsupported reset method 'acpi'
[ 715.200579] vfio-pci 0000:0f:00.0: Unsupported reset method 'flr'
[ 724.961395] vfio-pci 0000:0f:00.0: Unsupported reset method 'af_flr'
[ 729.669243] vfio-pci 0000:0f:00.0: Unsupported reset method 'pm'
[ 733.137040] vfio-pci 0000:0f:00.0: Unsupported reset method 'bus'
So the project has to be rewritten to use the default bus reset method.
In addition, trying to run qemu in this state produces the expected error
qemu-system-x86_64: vfio_err_notifier_handler(0000:0f:00.0) Unrecoverable error detected. Please collect any data possible and then kill the guest
It also appears that @gnif isn't watching this issue tracker anymore.
@cmdrkotori
The module is still working for me, even on 5.19.8 (Gentoo) using an /etc/local.d script, so that it handles it on boot:
echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method
Does this not work for you?
Edit: if you've changed kernels, you'll likely need to rebuild the module. I have a hook to rebuild it every time I deploy a new kernel version.
The module was working for me recently. See this comment above. But now I simply cannot change to device_specific
to activate it, it is not accepting any reset method, even when I try to set to what it already is.
The module is still working for me, even on 5.19.8 (Gentoo) using an /etc/local.d script, so that it handles it on boot:
echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method
Does this not work for you?
Edit: if you've changed kernels, you'll likely need to rebuild the module. I have a hook to rebuild it every time I deploy a new kernel version.
what how,
gentoo /home/esmailelbob # echo 'device_specific' > /sys/bus/pci/devices/0000\:09\:00.0/reset_method
bash: echo: write error: Invalid argument
gentoo /home/esmailelbob # uname -r
5.15.59-gentoo-dist
@EsmailELBoBDev2 Same, I can't change the reset method to any of the reset methods in pci.c, it is stuck on
bus
. Trying all possible values throws the same write error and produces this result in dmesg[ 689.396614] vfio-pci 0000:0f:00.0: Unsupported reset method 'device_specific' [ 709.101147] vfio-pci 0000:0f:00.0: Unsupported reset method 'acpi' [ 715.200579] vfio-pci 0000:0f:00.0: Unsupported reset method 'flr' [ 724.961395] vfio-pci 0000:0f:00.0: Unsupported reset method 'af_flr' [ 729.669243] vfio-pci 0000:0f:00.0: Unsupported reset method 'pm' [ 733.137040] vfio-pci 0000:0f:00.0: Unsupported reset method 'bus'
So the project has to be rewritten to use the default bus reset method.
In addition, trying to run qemu in this state produces the expected error
qemu-system-x86_64: vfio_err_notifier_handler(0000:0f:00.0) Unrecoverable error detected. Please collect any data possible and then kill the guest
what do you mean with rewrite?
what how,
gentoo /home/esmailelbob # echo 'device_specific' > /sys/bus/pci/devices/0000\:09\:00.0/reset_method bash: echo: write error: Invalid argument gentoo /home/esmailelbob # uname -r 5.15.59-gentoo-dist
You're getting write errors because of the escape characters when you're doing tab completion:
descartes ~ # echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method
is successful
descartes ~ # echo 'device_specific' > /sys/bus/pci/devices/0000\:0c\:00.0/reset_method
-bash: echo: write error: Invalid argument
is unsuccessful
descartes ~ # uname -r
5.19.8-gentoo-enso
what how,
gentoo /home/esmailelbob # echo 'device_specific' > /sys/bus/pci/devices/0000\:09\:00.0/reset_method bash: echo: write error: Invalid argument gentoo /home/esmailelbob # uname -r 5.15.59-gentoo-dist
You're getting write errors because of the escape characters when you're doing tab completion:
descartes ~ # echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method descartes ~ # echo 'device_specicic' > /sys/bus/pci/devices/0000\:0c\:00.0/reset_method -bash: echo: write error: Invalid argument descartes ~ # uname -r 5.19.8-gentoo-enso
Removed \ part and still :(
echo -n "device_specific" > /sys/bus/pci/devices/0000:09:00.0/reset_method
bash: echo: write error: Invalid argument
echo 'device_specific' > /sys/bus/pci/devices/0000:09:00.0/reset_method
bash: echo: write error: Invalid argument
what how,
gentoo /home/esmailelbob # echo 'device_specific' > /sys/bus/pci/devices/0000\:09\:00.0/reset_method bash: echo: write error: Invalid argument gentoo /home/esmailelbob # uname -r 5.15.59-gentoo-dist
You're getting write errors because of the escape characters when you're doing tab completion:
descartes ~ # echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method descartes ~ # echo 'device_specicic' > /sys/bus/pci/devices/0000\:0c\:00.0/reset_method -bash: echo: write error: Invalid argument descartes ~ # uname -r 5.19.8-gentoo-enso
Removed \ part and still :(
echo -n "device_specific" > /sys/bus/pci/devices/0000:09:00.0/reset_method bash: echo: write error: Invalid argument echo 'device_specific' > /sys/bus/pci/devices/0000:09:00.0/reset_method bash: echo: write error: Invalid argument
Do you have the module loaded? I just rmmod vendor-reset and tried the echo and it gives the same error (write error: Invalid argument).
Try:
modprobe vendor-reset
echo 'device_specific' > /sys/bus/pci/devices/0000:09:00.0/reset_method
Edit: also provide dmesg output when issuing the echo, please.
what how,
gentoo /home/esmailelbob # echo 'device_specific' > /sys/bus/pci/devices/0000\:09\:00.0/reset_method bash: echo: write error: Invalid argument gentoo /home/esmailelbob # uname -r 5.15.59-gentoo-dist
You're getting write errors because of the escape characters when you're doing tab completion:
descartes ~ # echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method descartes ~ # echo 'device_specicic' > /sys/bus/pci/devices/0000\:0c\:00.0/reset_method -bash: echo: write error: Invalid argument descartes ~ # uname -r 5.19.8-gentoo-enso
Removed \ part and still :(
echo -n "device_specific" > /sys/bus/pci/devices/0000:09:00.0/reset_method bash: echo: write error: Invalid argument echo 'device_specific' > /sys/bus/pci/devices/0000:09:00.0/reset_method bash: echo: write error: Invalid argument
Do you have the module loaded? I just rmmod vendor-reset and tried the echo and it gives the same error (write error: Invalid argument).
Try:
modprobe vendor-reset echo 'device_specific' > /sys/bus/pci/devices/0000:09:00.0/reset_method
Edit: also provide dmesg output when issuing the echo, please.
oh, I did not know I must load the module first. Well, I have problems with my kernel modules right now, mainly with emerge --config sys-kernel/gentoo-kernel-bin
command (because I have a filled up /boot/ dir) so I will try to solve it first then come back
oh it worked. it worked, IT WORKED
so I had to load the module first. I got no error now thank god
echo -n "device_specific" > /sys/bus/pci/devices/0000:09:00.0/reset_method
gentoo sometimes is hard :sob: (because of filled up /boot/ partition) and thanks for the tip!
@EsmailELBoBDev2
Awesome! Glad it worked for you and I'm happy to help. I'm also using Gentoo, and this might be of some help to you. This is how I set it all up automatically on boot:
descartes ~ # cat /etc/modules-load.d/vendor-reset.conf
vendor-reset
descartes ~ # cat /etc/local.d/navi10_reset_method.start
echo "Setting Navi10 device to device specific reset method..."
echo 'device_specific' > /sys/bus/pci/devices/0000:0c:00.0/reset_method
Make /etc/local.d/navi10_reset_method.start (or whatever you name it) executable with chmod +x, and it should all behave and set correctly on boot. I spent a good bit of time ironing out my VFIO setup on Gentoo, which I now use daily. I think it's essentially perfect now. Good luck!
Alright, Followed your auto-start tutorial, thanks again for your tips <3 :D
I've fixed my problems by upgrading my rx560 to an rx6500xt. I highly recommend this solution. Plus at the moment they're cheap as.
I've fixed my problems by upgrading my rx560 to an rx6500xt
Jeff Brazos, is that you?
I've fixed my problems by upgrading my rx560 to an rx6500xt. I highly recommend this solution. Plus at the moment they're cheap as.
LOL, "fix your software problem by buying new hardware" isn't really an option for many people right now. Even if some of the products are getting cheaper, inflation is up massively worldwide and many of us are more worried about paying basic utilities next month instead of getting a new GPU.
However, if someone would like to buy me a GPU newer than my watercooled 5700 XT, I'll be glad to accept it!
Consider trying the udev rules merged in #64
These do not need to be configured as hooks per-VM. It's system-wide.
Can confirm that this workaround somewhat works with RX 460 on Arch with kernel 5.15.7-zen1-1-zen. But if you reboot the guest it's performing reset about a thousand times, clogging dmesg before finally giving up and shutting down a VM. Starting the VM again seems to work.
I can confirm this behaviour. I added the udev rules as recommened by @xperia64 but now my host can't even boot the VM anymore because it's performing resets until the process gets killed.
kernel 5.15.7-zen1-1-zen
on my gentoo dist kernel I had a bug on 5.15 and moved to kernel 5.19 in order to let amdgpu (the foss driver one) work
I finally got it working on Proxmox PVE 5.15.30-2-pve (Debian) with a Sapphire Radeon RX 5700 XT by setting the device_specific
to the /sys/bus/pci/devices/0000:03:00.0/reset_method
. Thanks @pluser !
I found out it's important to do this when the host is in a clean state, so right after boot. If you try to use the GPU before applying this, the vendor-reset won't kick in.
I've made a simple boot script to perform this change using systemd. Do the following:
/var/lib/vz/gpu-vendor-reset-method.sh
.echo
command as contents of the script (see above)/etc/systemd/system/gpu-vendor-reset-method.service
[Unit]
Description=Set the AMD GPU reset method to 'device_specific'
After=default.target
[Service] ExecStart=/bin/bash /var/lib/vz/gpu-vendor-reset-method.sh
[Install] WantedBy=default.target
5. Enable the systemd service to execute the script at startup with `systemctl enable gpu-vendor-reset-method.service`
6. Reboot
7. Check the systemd service status with `systemctl status gpu-vendor-reset-method.service` or check the actual reset_method of your GPU with `cat "/sys/bus/pci/devices/0000:03:00.0/reset_method"`
Since kernel 5.15 I can't start the VM again after it has been shutdown. They output stays black until I reboot my host. Which indicates, that the vendor-reset module is not working properly anymore. I booted up 5.14 and I could boot the VM as often as I wanted without the need of a host reboot.
I am running Debian testing and the module gets loaded (with kernel 5.15): "dmesg | grep vendor" shows "... vendor_reset_hook: installed"
Has anyone else experienced issues with this kernel or could it be a Debian only problem? Thanks in advance.