pugs / vfio-linux-2.6

Linux 2.6 plus VFIO driver
Other
5 stars 2 forks source link

Hang in vfio_dma_map_common when mapping more than 196K in vfio-v7 branch #1

Open ian2 opened 13 years ago

ian2 commented 13 years ago

Hello,

Somehow I manage to run into a Kernel bug, when I use VFIO.

I use the UIOMMU and VFIO driver driver to map a small DMA'able buffer for a custom device with the following C code:

*****snip--------------------------------------------------------------------------------------------------------------------------- sprintf(vfname, "%s%i", "/dev/vfio", vf); vfio_fd = open(vfname, O_RDWR);

    if (vfio_fd < 0) {
            /* If a vfio file is not there we just skip it. */
            return -1;
    }

    if (uiommu_fd == 0) {
            uiommu_fd = open("/dev/uiommu", O_RDWR);

            if (uiommu_fd < 0) {
                    perror("uiommu open:");
                    return errno;
            }
    }

    /* Connect UIOMMU to VF */
    rc = ioctl(vfio_fd, VFIO_DOMAIN_SET, &uiommu_fd);

    if (rc != 0) {
            close(vfio_fd);
            return -1;
    }

    /* DMA Mapping */
    dma_map_virt = malloc(0x10000);
    // alignment
    dma_map_virt = (void *)((uint64_t)dma_map_virt & (~(0x1000 - 1)));
    struct vfio_dma_map dma_map;
    dma_map.vaddr = (uint64_t) dma_map_virt;
    dma_map.size = dma_map_size;
    dma_map.flags = VFIO_FLAG_WRITE;
    dma_map.dmaaddr = (uint64_t)dma_adr;
    rc = ioctl(vfio_fd, VFIO_DMA_MAP_IOVA, &dma_map);

    if(rc < 0){
            perror("ioctl(VFIO_DMA_MAP_IOVA)");
            return errno;
    }
    printf("dma_map.vaddr %016llx\n", dma_map.vaddr);
    printf("dma_map.dmaaddr %016llx\n", dma_map.dmaaddr);
    printf("dma_map.size %016llx\n", dma_map.size);
    printf("dma_map.flags %016llx\n", dma_map.flags);

*****snip---------------------------------------------------------------------------------------------------------------------------

When I supply dma_map.size <= 0x31000 ./vfdmamap 0 10000 31000 dma_map.vaddr 0000000001dde000 dma_map.dmaaddr 0000000000010000 dma_map.size 0000000000031000 dma_map.flags 0000000000000001

DMA from and to this buffer works just fine.

However: Anything larger than that (e.g. ./vfdmamap 0 10000 32000) Just freezes up the ioctl. dmesg: [ 433.154727] VFIO - User Level PCI meta-driver version: 0.1 [ 434.696624] vfio 0000:14:00.0: restoring config space at offset 0x1 (was 0x100400, writing 0x100047) [ 434.696649] vfio 0000:14:00.0: BAR 7: set to [mem 0xc2904000-0xc2906fff 64bit](PCI address [0xc2904000-0xc2906fff]) [ 434.696658] vfio 0000:14:00.0: BAR 9: set to [mem 0xc2907000-0xc2909fff 64bit](PCI address [0xc2907000-0xc2909fff]) [ 434.797278] vfio 0000:14:00.0: PCI INT A -> GSI 30 (level, low) -> IRQ 30 [ 459.324129] vfio 0000:14:00.0: PCI INT A disabled [ 462.108689] vfio 0000:14:00.0: restoring config space at offset 0x1 (was 0x100400, writing 0x100043) [ 462.108712] vfio 0000:14:00.0: BAR 7: set to [mem 0xc2904000-0xc2906fff 64bit](PCI address [0xc2904000-0xc2906fff]) [ 462.108721] vfio 0000:14:00.0: BAR 9: set to [mem 0xc2907000-0xc2909fff 64bit](PCI address [0xc2907000-0xc2909fff]) [ 462.209406] vfio 0000:14:00.0: PCI INT A -> GSI 30 (level, low) -> IRQ 30 [ 462.262680] vfio_dma_map_common: get_user_pages_fast returns 49, not 50 [ 462.262691] BUG: unable to handle kernel paging request at ffffeae38039fec8 [ 462.262695] IP: [] virt_to_head_page+0x23/0x31 [ 462.262704] PGD 0 [ 462.262707] Oops: 0000 [#1] SMP [ 462.262710] last sysfs file: /sys/devices/system/cpu/cpu23/cache/index2/shared_cpu_map [ 462.262714] CPU 1 [ 462.262715] Modules linked in: vfio uiommu sunrpc cpufreq_ondemand acpi_cpufreq freq_table mperf ip6t_REJECT nf_conntrack_ipv6 nf_defrag_ipv6 ip6table_filter ip6_tables ipv6 uinput ioatdma cdc_ether igb usbnet mii matroxfb_base matroxfb_DAC1064 matroxfb_accel i2c_i801 matroxfb_Ti3026 dca i2c_core matroxfb_g450 serio_raw g450_pll matroxfb_misc shpchp i7core_edac edac_core iTCO_wdt iTCO_vendor_support pcspkr microcode [last unloaded: scsi_wait_scan] [ 462.262751] [ 462.262755] Pid: 1938, comm: vfdmamap Tainted: G W 2.6.37 #2 69Y3701 /IBM System X iDataPlex dx360 M3 Server -[6391AC1]- [ 462.262759] RIP: 0010:[] [] virt_to_head_page+0x23/0x31 [ 462.262765] RSP: 0018:ffff880078297d58 EFLAGS: 00010282 [ 462.262768] RAX: ffffeae38039fec8 RBX: ffffc9001091f000 RCX: 0000000000002663 [ 462.262771] RDX: ffffea0000000000 RSI: 0000000000000096 RDI: ffffc9001091f000 [ 462.262774] RBP: ffff880078297d58 R08: 0000000000000002 R09: 00000000fffffffe [ 462.262777] R10: ffff8800f8297cc7 R11: 0000000000000000 R12: 0000000000000032 [ 462.262780] R13: ffffffffa000e8d5 R14: ffff880078297e38 R15: 0000000000000001 [ 462.262784] FS: 00007fa9c2280720(0000) GS:ffff88007c220000(0000) knlGS:0000000000000000 [ 462.262788] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 462.262791] CR2: ffffeae38039fec8 CR3: 00000000755ca000 CR4: 00000000000006e0 [ 462.262794] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 462.262797] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 462.262800] Process vfdmamap (pid: 1938, threadinfo ffff880078296000, task ffff880075dc0000) [ 462.262803] Stack: [ 462.262805] ffff880078297d98 ffffffff8110e98d ffff880078297e38 ffff880037a13d00 [ 462.262810] 0000000000000032 00000000fffffff2 ffff880078297e38 0000000000000001 [ 462.262815] ffff880078297e18 ffffffffa000e8d5 ffff880078297db8 ffffffff8103e925 [ 462.262820] Call Trace: [ 462.262825] [] kfree+0x5a/0xd8 [ 462.262832] [] vfio_dma_map_common+0x193/0x3c7 [vfio] [ 462.262838] [] ? need_resched+0x23/0x2d [ 462.262843] [] ? _cond_resched+0xe/0x22 [ 462.262848] [] vfio_unl_ioctl+0xfe/0x44f [vfio] [ 462.262855] [] do_vfs_ioctl+0x4b1/0x4f2 [ 462.262860] [] sys_ioctl+0x56/0x7a [ 462.262865] [] system_call_fastpath+0x16/0x1b [ 462.262867] Code: 02 00 00 0f 4f c2 c9 c3 55 48 89 e5 0f 1f 44 00 00 e8 2e b1 f2 ff 48 c1 e8 0c 48 ba 00 00 00 00 00 ea ff ff 48 6b c0 38 48 01 d0 <48> 8b 10 80 e6 80 74 04 48 8b 40 10 c9 c3 55 48 89 e5 41 57 41 [ 462.262905] RIP [] virt_to_head_page+0x23/0x31 [ 462.262910] RSP [ 462.262912] CR2: ffffeae38039fec8 [ 462.262915] ---[ end trace 8803c078f2032f02 ]--- [ 462.262920] BUG: sleeping function called from invalid context at kernel/rwsem.c:21 [ 462.262923] in_atomic(): 0, irqs_disabled(): 1, pid: 1938, name: vfdmamap [ 462.262927] Pid: 1938, comm: vfdmamap Tainted: G D W 2.6.37 #2 [ 462.262929] Call Trace: [ 462.262934] [] might_sleep+0xeb/0xf0 [ 462.262939] [] down_read+0x24/0x3b [ 462.262943] [] acct_collect+0x4d/0x185 [ 462.262950] [] do_exit+0x1fc/0x71c [ 462.262954] [] ? _raw_spin_unlock_irqrestore+0x17/0x19 [ 462.262959] [] oops_end+0xbf/0xc7 [ 462.262964] [] no_context+0x1f9/0x208 [ 462.262969] [] bad_area_nosemaphore+0x192/0x1b5 [ 462.262975] [] ? number.clone.1+0x138/0x24c [ 462.262979] [] bad_area_nosemaphore+0x13/0x15 [ 462.262984] [] do_page_fault+0x187/0x35a [ 462.262989] [] ? vt_console_print+0xae/0x2bf [ 462.262993] [] ? vsnprintf+0x83/0x44c [ 462.263000] [] ? arch_local_irq_save+0x18/0x1e [ 462.263004] [] ? _raw_spin_unlock_irqrestore+0x17/0x19 [ 462.263008] [] ? _raw_spin_unlock_irqrestore+0x17/0x19 [ 462.263014] [] ? vfio_dma_map_common+0x193/0x3c7 [vfio] [ 462.263018] [] page_fault+0x25/0x30 [ 462.263024] [] ? vfio_dma_map_common+0x193/0x3c7 [vfio] [ 462.263029] [] ? virt_to_head_page+0x23/0x31 [ 462.263034] [] ? virt_to_head_page+0xe/0x31 [ 462.263037] [] kfree+0x5a/0xd8 [ 462.263042] [] vfio_dma_map_common+0x193/0x3c7 [vfio] [ 462.263046] [] ? need_resched+0x23/0x2d [ 462.263050] [] ? _cond_resched+0xe/0x22 [ 462.263055] [] vfio_unl_ioctl+0xfe/0x44f [vfio] [ 462.263060] [] do_vfs_ioctl+0x4b1/0x4f2 [ 462.263065] [] sys_ioctl+0x56/0x7a [ 462.263069] [] system_call_fastpath+0x16/0x1b

Some more data:

uname -a Linux **** 2.6.37 #2 SMP Fri Jul 22 11:44:15 CEST 2011 x86_64 x86_64 x86_64 GNU/Linux

ulimit -l unlimited

getconf PAGESIZE 4096

This shouldn't happen, right?

Regards, Jan Kunigk

awilliam commented 13 years ago

An anonymous mmap is probably a better way to allocate a page aligned DMA buffer. As it is, you're allocating it, aligning it, then passing the new aligned start and original size, which is only valid if no re-alignment happened. Also, Tom's tree is deprecated. Current development for VFIO is happening here: https://github.com/awilliam/linux-vfio