oracle / qemu

QEMU git Oracle repository
Other
22 stars 20 forks source link

migration fails at destination with "Unable to write to socket: Bad file descriptor" #6

Open tmakatos opened 3 years ago

tmakatos commented 3 years ago

When trying to migrate SPDK NVMf/vfio-user target which creates an NVMe controller with one namespace in the guest (/dev/nvme0n1), destination QEMU fails with:

Unable to write to socket: Bad file descriptor

Using the following:

Debugging further, this happens here:

#0  0x0000555555cb8eff in qio_channel_socket_writev (ioc=0x5555568a2400, iov=0x7fffec35da90, niov=1, fds=0x555557314934, nfds=3, errp=0x7fffec35da70) at ../io/channel-socket.c:571
#1  0x0000555555cb2627 in qio_channel_writev_full (ioc=0x5555568a2400, iov=0x7fffec35da90, niov=1, fds=0x555557314934, nfds=3, errp=0x7fffec35da70) at ../io/channel.c:86
#2  0x0000555555c812d5 in vfio_user_send_locked (proxy=0x5555575af7e0, msg=0x55555747eb70, fds=0x7fffec35db40) at ../hw/vfio/user.c:278
#3  0x0000555555c815c9 in vfio_user_send_recv (proxy=0x5555575af7e0, msg=0x55555747eb70, fds=0x7fffec35db40, rsize=0) at ../hw/vfio/user.c:351
#4  0x0000555555c82c38 in vfio_user_set_irqs (vbasedev=0x5555575a9c70, irq=0x555557314920) at ../hw/vfio/user.c:898
#5  0x0000555555c6b79d in vfio_enable_vectors (vdev=0x5555575a9370, msix=true) at ../hw/vfio/pci.c:413
#6  0x0000555555c6bb4c in vfio_msix_vector_do_use (pdev=0x5555575a9370, nr=3, msg=0x0, handler=0x0) at ../hw/vfio/pci.c:516
#7  0x0000555555c6be8c in vfio_msix_enable (vdev=0x5555575a9370) at ../hw/vfio/pci.c:615
#8  0x0000555555c70b0b in vfio_pci_load_config (vbasedev=0x5555575a9c70, f=0x5555568f5af0) at ../hw/vfio/pci.c:2528
#9  0x0000555555bab3df in vfio_load_device_config_state (f=0x5555568f5af0, opaque=0x5555575a9c70) at ../hw/vfio/migration.c:382
#10 0x0000555555babbe2 in vfio_load_state (f=0x5555568f5af0, opaque=0x5555575a9c70, version_id=1) at ../hw/vfio/migration.c:649
#11 0x00005555558a5cb9 in vmstate_load (f=0x5555568f5af0, se=0x555556964df0) at ../migration/savevm.c:908
#12 0x00005555558a8dec in qemu_loadvm_section_start_full (f=0x5555568f5af0, mis=0x5555568cec70) at ../migration/savevm.c:2433
#13 0x00005555558a944a in qemu_loadvm_state_main (f=0x5555568f5af0, mis=0x5555568cec70) at ../migration/savevm.c:2619
#14 0x00005555558a95c5 in qemu_loadvm_state (f=0x5555568f5af0) at ../migration/savevm.c:2698
#15 0x00005555558e437d in process_incoming_migration_co (opaque=0x0) at ../migration/migration.c:555
#16 0x0000555555e28cb6 in coroutine_trampoline (i0=1457783792, i1=21845) at ../util/coroutine-ucontext.c:173
#17 0x00007ffff75a4b50 in __correctly_grouped_prefixwc (begin=0x7fffec35da70 L"\x56965b50啕\003", end=0x0, thousands=-175363960 L'\xf58c2888', grouping=0x555556650010 "") at grouping.c:171
#18 0x0000000000000000 in  ()
(gdb) p errno
$2 = 9
(gdb) p sioc->fd
$3 = 13

Looking at the FD:

# ls -lh /proc/1816/fd/13
lrwx------ 1 root root 64 Jun  8 11:43 /proc/1816/fd/13 -> 'socket:[30949]'
# cat /proc/1816/fdinfo/13
pos:    0
flags:  02000002
mnt_id: 10

The source QEMU is run as follows:

/opt/qemu/bin/qemu-system-x86_64 -smp 4 -nographic -m 2G -object memory-backend-file,id=mem0,size=2G,mem-path=/dev/hugepages,share=on,prealloc=yes, -numa node,memdev=mem0 -kernel bionic-server-cloudimg-amd64-vmlinuz-generic -initrd bionic-server-cloudimg-amd64-initrd-generic -append console=ttyS0 root=/dev/sda1 single intel_iommu=on -hda bionic-server-cloudimg-amd64-0.raw -hdb nvme.img -nic user,model=virtio-net-pci -machine pc-q35-3.1 -device vfio-user-pci,socket=/var/run/vfio-user.sock,x-enable-migration=on -D qemu.out -trace enable=vfio*

and destination QEMU:

/opt/qemu/bin/qemu-system-x86_64 -smp 4 -nographic -m 2G -object memory-backend-file,id=mem0,size=2G,mem-path=/dev/hugepages,share=on,prealloc=yes, -numa node,memdev=mem0 -kernel bionic-server-cloudimg-amd64-vmlinuz-generic -initrd bionic-server-cloudimg-amd64-initrd-generic -append console=ttyS0 root=/dev/sda1 single intel_iommu=on -hda bionic-server-cloudimg-amd64-0.raw -hdb nvme.img -nic user,model=virtio-net-pci -machine pc-q35-3.1 -device vfio-user-pci,socket=/var/run/vfio-user.sock,x-enable-migration=on -D qemu.out -trace enable=vfio* -incoming tcp:0:4444

I migrate using:

migrate -d tcp:<IP address>:4444

In the source QEMU log:

vfio_msi_interrupt  (VFIO user </var/run/vfio-user.sock>) vector 2 0xfee04004/0x4023
vfio_get_dirty_bitmap container fd=-1, iova=0x0 size= 0xa0000 bitmap_size=0x18 start=0x0
vfio_get_dirty_bitmap container fd=-1, iova=0xc0000 size= 0xb000 bitmap_size=0x8 start=0xc0000
vfio_get_dirty_bitmap container fd=-1, iova=0xcb000 size= 0x3000 bitmap_size=0x8 start=0xcb000
vfio_get_dirty_bitmap container fd=-1, iova=0xce000 size= 0x1e000 bitmap_size=0x8 start=0xce000
vfio_msi_interrupt  (VFIO user </var/run/vfio-user.sock>) vector 2 0xfee04004/0x4023
vfio_get_dirty_bitmap container fd=-1, iova=0xec000 size= 0x4000 bitmap_size=0x8 start=0xec000
vfio_get_dirty_bitmap container fd=-1, iova=0xf0000 size= 0x10000 bitmap_size=0x8 start=0xf0000
vfio_get_dirty_bitmap container fd=-1, iova=0x100000 size= 0x7ff00000 bitmap_size=0xffe0 start=0x100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfd000000 size= 0x1000000 bitmap_size=0x200 start=0x80080000
vfio_get_dirty_bitmap container fd=-1, iova=0xfebd1000 size= 0x1000 bitmap_size=0x8 start=0x81100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfffc0000 size= 0x40000 bitmap_size=0x8 start=0x80000000
vfio_update_pending  (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_pending  (VFIO user </var/run/vfio-user.sock>) precopy 0x1195000 postcopy 0x0 compatible 0x0
vfio_migration_set_state  (VFIO user </var/run/vfio-user.sock>) state 2
vfio_vmstate_change  (VFIO user </var/run/vfio-user.sock>) running 0 reason finish-migrate device state 2
vfio_get_dirty_bitmap container fd=-1, iova=0x0 size= 0xa0000 bitmap_size=0x18 start=0x0
vfio_get_dirty_bitmap container fd=-1, iova=0xc0000 size= 0xb000 bitmap_size=0x8 start=0xc0000
vfio_get_dirty_bitmap container fd=-1, iova=0xcb000 size= 0x3000 bitmap_size=0x8 start=0xcb000
vfio_get_dirty_bitmap container fd=-1, iova=0xce000 size= 0x1e000 bitmap_size=0x8 start=0xce000
vfio_get_dirty_bitmap container fd=-1, iova=0xec000 size= 0x4000 bitmap_size=0x8 start=0xec000
vfio_get_dirty_bitmap container fd=-1, iova=0xf0000 size= 0x10000 bitmap_size=0x8 start=0xf0000
vfio_get_dirty_bitmap container fd=-1, iova=0x100000 size= 0x7ff00000 bitmap_size=0xffe0 start=0x100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfd000000 size= 0x1000000 bitmap_size=0x200 start=0x80080000
vfio_get_dirty_bitmap container fd=-1, iova=0xfebd1000 size= 0x1000 bitmap_size=0x8 start=0x81100000
vfio_get_dirty_bitmap container fd=-1, iova=0xfffc0000 size= 0x40000 bitmap_size=0x8 start=0x80000000
vfio_migration_set_state  (VFIO user </var/run/vfio-user.sock>) state 2
vfio_update_pending  (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_buffer  (VFIO user </var/run/vfio-user.sock>) Offset 0x1000 size 0x8000 pending 0x8000
vfio_update_pending  (VFIO user </var/run/vfio-user.sock>) pending 0x8000
vfio_save_buffer  (VFIO user </var/run/vfio-user.sock>) Offset 0x9000 size 0x0 pending 0x8000
vfio_migration_set_state  (VFIO user </var/run/vfio-user.sock>) state 0
vfio_save_complete_precopy  (VFIO user </var/run/vfio-user.sock>)
vfio_save_device_config_state  (VFIO user </var/run/vfio-user.sock>)
vfio_region_unmap Region migration mmaps[0] unmap [0x1000 - 0x8fff]
vfio_save_cleanup  (VFIO user </var/run/vfio-user.sock>)
vfio_migration_state_notifier  (VFIO user </var/run/vfio-user.sock>) state completed

And in the destination QEMU:

...
vfio_region_mmap Region migration mmaps[0] [0x1000 - 0x8fff]
vfio_migration_set_state  (VFIO user </var/run/vfio-user.sock>) state 4
vfio_load_state  (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100003                                                                                                                                                                  vfio_load_state  (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100004
vfio_load_state_device_data  (VFIO user </var/run/vfio-user.sock>) Offset 0x1000 size 0x8000
vfio_load_state  (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100004
vfio_listener_region_del region_del 0xc0000 - 0xdffff
vfio_listener_region_add_ram region_add [ram] 0xc0000 - 0xcafff [0x7fa250200000]
vfio_listener_region_add_ram region_add [ram] 0xcb000 - 0xcdfff [0x7fa2506cb000]
vfio_listener_region_add_ram region_add [ram] 0xce000 - 0xdffff [0x7fa25020e000]
vfio_listener_region_add_skip SKIPPING region_add 0xb0000000 - 0xbfffffff
vfio_listener_region_del region_del 0xc0000 - 0xcafff
vfio_listener_region_del region_del 0xce000 - 0xdffff
vfio_listener_region_del region_del 0xe0000 - 0xfffff
vfio_listener_region_add_ram region_add [ram] 0xc0000 - 0xcafff [0x7fa2506c0000]
vfio_listener_region_add_ram region_add [ram] 0xce000 - 0xebfff [0x7fa2506ce000]
vfio_listener_region_add_ram region_add [ram] 0xec000 - 0xeffff [0x7fa2506ec000]                                                                                                                                                                vfio_listener_region_add_ram region_add [ram] 0xf0000 - 0xfffff [0x7fa2506f0000]
vfio_listener_region_add_skip SKIPPING region_add 0xfed1c000 - 0xfed1ffff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd7000 - 0xfebd7fff
vfio_listener_region_add_ram region_add [ram] 0xfd000000 - 0xfdffffff [0x7fa241400000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4000 - 0xfebd43ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4400 - 0xfebd441f
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4420 - 0xfebd44ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4500 - 0xfebd4515
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4516 - 0xfebd45ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4600 - 0xfebd4607
vfio_listener_region_add_skip SKIPPING region_add 0xfebd4608 - 0xfebd4fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe000000 - 0xfe000fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe001000 - 0xfe001fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe002000 - 0xfe002fff
vfio_listener_region_add_skip SKIPPING region_add 0xfe003000 - 0xfe003fff
vfio_load_state  (VFIO user </var/run/vfio-user.sock>) data 0xffffffffef100002
vfio_listener_region_add_skip SKIPPING region_add 0xfebd0000 - 0xfebd0fff
vfio_listener_region_add_ram region_add [ram] 0xfebd1000 - 0xfebd1fff [0x7fa35db96000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd0000 - 0xfebd0fff
vfio_listener_region_add_ram region_add [ram] 0xfebd1000 - 0xfebd1fff [0x7fa35db96000]
vfio_listener_region_add_skip SKIPPING region_add 0xfebd2000 - 0xfebd3fff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd5000 - 0xfebd53ff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd5400 - 0xfebd5fff
vfio_listener_region_add_skip SKIPPING region_add 0xfebd6000 - 0xfebd6fff
vfio_pci_write_config  (VFIO user </var/run/vfio-user.sock>, @0x4, 0x507, len=0x2)
vfio_listener_region_add_skip SKIPPING region_add 0xfebd2000 - 0xfebd3fff
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 0 mmaps enabled: 1
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 4 mmaps enabled: 1
vfio_region_mmaps_set_enabled Region VFIO user </var/run/vfio-user.sock> BAR 5 mmaps enabled: 1
vfio_intx_disable  (VFIO user </var/run/vfio-user.sock>)
vfio_msix_vector_do_use  (VFIO user </var/run/vfio-user.sock>) vector 3 used
tmakatos commented 3 years ago

Using the gpio sample instead of SPDK works, so maybe this has something to do with SPDK.

tmakatos commented 3 years ago

@john-johnson-git suggested that this happens because -1 is sent for some file descriptors which cannot work with sendmsg, and this turns out to be the case. Leaving this with @john-johnson-git.