Xilinx / dma_ip_drivers

Xilinx QDMA IP Drivers
https://xilinx.github.io/dma_ip_drivers/
573 stars 420 forks source link

qdata->xcdev can be destroyed even if the queue isn't removed #27

Open hmaarrfk opened 5 years ago

hmaarrfk commented 5 years ago

https://github.com/Xilinx/dma_ip_drivers/blob/f3828c8604126fa1ab149d5969d4b4c85509991f/QDMA/linux-kernel/drv/qdma_mod.c#L1553-L1560

in the case that removing the device false (say the queue was still in the "start" state) the qdata->xcdev can be freed.

[  111.122426] qdma:qdma_cdev_destroy: destroying cdev 00000000495e0073
[  111.122640] qdma:qdma_device_get_pair_descq_by_id: qdma01000-p0000:01:00.0, idx 0x0, q 0x00000000abafbfce state invalid.
[  111.122644] qdma:qdma_queue_remove: queue qdma01000-MM-0, id 0 cannot be deleted. Invalid q state
[  111.122649] qdma:xnl_q_del: xpdev_queue_delete() failed: -8
[  116.347548] qdma:xnl_dump_attrs: snd_seq 0x5ce33d9f, portid 0x29c009c5.
[  116.347554] qdma:xnl_dump_attrs: nlhdr: len 28, type 29, flags 0x1, seq 0x5ce33d9f, pid 700451269.
[  116.347555] qdma:xnl_dump_attrs: genlhdr: cmd 0x1 DEV_INFO, version 0, reserved 0x0.
[  116.347557] qdma:xnl_dump_attrs: attr DEV_IDX, u32 0x1000.
[  116.347576] qdma:xnl_dump_attrs: snd_seq 0x5ce33da0, portid 0x29c009c5.
[  116.347578] qdma:xnl_dump_attrs: nlhdr: len 52, type 29, flags 0x1, seq 0x5ce33da0, pid 700451269.
[  116.347579] qdma:xnl_dump_attrs: genlhdr: cmd 0xb Q_DEL, version 0, reserved 0x0.
[  116.347580] qdma:xnl_dump_attrs: attr DEV_IDX, u32 0x1000.
[  116.347581] qdma:xnl_dump_attrs: attr CMPT_CNTR_IDX, u32 0x0.
[  116.347583] qdma:xnl_dump_attrs: attr CMPT_TRIG_MODE, u32 0x1.
[  116.347583] qdma:xnl_dump_attrs: attr RANGE_START, u32 0x4.
[  116.347588] qdma:qdma_cdev_destroy: destroying cdev 00000000495e0073
[  116.347625] ------------[ cut here ]------------
[  116.347626] kernel BUG at /build/linux-3btXxq/linux-4.15.0/mm/slub.c:296!
[  116.347632] invalid opcode: 0000 [#1] SMP PTI
[  116.347634] Modules linked in: uvcvideo videobuf2_vmalloc videobuf2_memops videobuf2_v4l2 videobuf2_core videodev snd_usb_audio snd_usbmidi_lib media nls_iso8859_1 snd_hda_codec_hdmi intel_rapl snd_hda_codec_realtek x86_pkg_temp_thermal snd_hda_codec_generic intel_powerclamp coretemp snd_hda_intel kvm_intel snd_hda_codec kvm snd_hda_core snd_hwdep irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel snd_pcm pcbc snd_seq_midi snd_seq_midi_event snd_rawmidi aesni_intel aes_x86_64 snd_seq crypto_simd glue_helper cryptd intel_cstate input_leds joydev snd_seq_device intel_rapl_perf snd_timer cdc_acm qdma(OE) i915 snd soundcore qdma_vf(OE) drm_kms_helper lpc_ich wmi drm i2c_algo_bit fb_sys_fops syscopyarea ie31200_edac sysfillrect sysimgblt mei_me shpchp mei mac_hid intel_smartconnect video sch_fq_codel
[  116.347677]  parport_pc ppdev lp parport ip_tables x_tables autofs4 hid_generic usbhid hid ahci r8169 libahci mii
[  116.347697] CPU: 0 PID: 2501 Comm: dmactl Tainted: G           OE    4.15.0-50-generic #54-Ubuntu
[  116.347698] Hardware name: MSI MS-7758/Z77A-G43 (MS-7758), BIOS V2.13 03/07/2014
[  116.347712] RIP: 0010:__slab_free+0x17a/0x2c0
[  116.347714] RSP: 0018:ffffadc7099bf7a0 EFLAGS: 00010246
[  116.347717] RAX: ffff92b4b92fe900 RBX: ffff92b4b92fe900 RCX: 0000000180100004
[  116.347719] RDX: ffff92b4b92fe900 RSI: ffffd49ddfe4bf80 RDI: ffff92b4bec03200
[  116.347721] RBP: ffffadc7099bf840 R08: 0000000000000001 R09: ffffffffc06920fb
[  116.347723] R10: ffffadc7099bf858 R11: 0000000000000004 R12: ffff92b4bec03200
[  116.347725] R13: ffffd49ddfe4bf80 R14: ffff92b4b92fe900 R15: ffff92b4b6f60060
[  116.347727] FS:  00007f0f12987f40(0000) GS:ffff92b4df200000(0000) knlGS:0000000000000000
[  116.347730] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  116.347732] CR2: 0000564551570000 CR3: 0000000782070001 CR4: 00000000001606f0
[  116.347734] Call Trace:
[  116.347739]  ? invalid_op+0x1b/0x40
[  116.347750]  ? qdma_cdev_destroy+0x5b/0x80 [qdma]
[  116.347754]  kfree+0x165/0x180
[  116.347757]  ? kfree+0x165/0x180
[  116.347763]  qdma_cdev_destroy+0x5b/0x80 [qdma]
[  116.347770]  xpdev_queue_delete+0x119/0x130 [qdma]
[  116.347776]  xnl_q_del.part.12+0x163/0x260 [qdma]
[  116.347781]  ? skb_queue_tail+0x43/0x50
[  116.347785]  ? __netlink_sendskb+0x44/0x70
[  116.347788]  ? netlink_unicast+0x20c/0x240
[  116.347795]  ? xnl_dev_info+0x236/0x300 [qdma]
[  116.347802]  xnl_q_del+0x16/0x20 [qdma]
[  116.347805]  genl_family_rcv_msg+0x1fe/0x3f0
[  116.347808]  ? __netlink_sendskb+0x44/0x70
[  116.347812]  genl_rcv_msg+0x4c/0x90
[  116.347814]  ? genl_family_rcv_msg+0x3f0/0x3f0
[  116.347818]  netlink_rcv_skb+0x54/0x130
[  116.347821]  genl_rcv+0x28/0x40
[  116.347824]  netlink_unicast+0x19e/0x240
[  116.347827]  netlink_sendmsg+0x2d1/0x3d0
[  116.347831]  sock_sendmsg+0x3e/0x50
[  116.347835]  ___sys_sendmsg+0x2a0/0x2f0
[  116.347840]  ? lru_cache_add_active_or_unevictable+0x36/0xb0
[  116.347844]  ? handle_pte_fault+0x4f7/0xce0
[  116.347848]  ? __handle_mm_fault+0x478/0x5c0
[  116.347852]  __sys_sendmsg+0x54/0x90
[  116.347855]  ? __sys_sendmsg+0x54/0x90
[  116.347859]  SyS_sendmsg+0x12/0x20
[  116.347863]  do_syscall_64+0x73/0x130
[  116.347866]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  116.347869] RIP: 0033:0x7f0f12085d04
[  116.347871] RSP: 002b:00007fff6137e568 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  116.347874] RAX: ffffffffffffffda RBX: 000056455156d990 RCX: 00007f0f12085d04
[  116.347876] RDX: 0000000000000000 RSI: 00007fff6137e5a0 RDI: 0000000000000003
[  116.347877] RBP: 000056455156dc20 R08: 0000000000000014 R09: 000056455156ed20
[  116.347879] R10: 000056455156d010 R11: 0000000000000246 R12: 000056455156d8a0
[  116.347881] R13: 00007fff6137e5a0 R14: 0000000000000000 R15: 0000000000000000
[  116.347884] Code: 0f 84 ee fe ff ff 44 0f b6 7d 8b 80 7d ab 00 79 05 45 84 ff 74 61 48 83 c4 70 5b 41 5a 41 5c 41 5d 41 5e 41 5f 5d 49 8d 62 f8 c3 <0f> 0b 4c 89 d0 4c 89 d7 45 89 fa 48 85 c0 44 0f b6 7d 8b 74 cb 
[  116.347923] RIP: __slab_free+0x17a/0x2c0 RSP: ffffadc7099bf7a0
[  116.347933] ---[ end trace f34e0c4b49b7dff0 ]---
hmaarrfk commented 5 years ago

Proposed patch:

From a432b7a86a14eefed708ca08669fdfcae9a76c66 Mon Sep 17 00:00:00 2001
From: Mark Harfouche <mark.harfouche@gmail.com>
Date: Mon, 20 May 2019 20:07:59 -0400
Subject: [PATCH] attemp to fixup early free of resources in xpdev_queue_delete

---
 QDMA/linux-kernel/drv/qdma_mod.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/QDMA/linux-kernel/drv/qdma_mod.c b/QDMA/linux-kernel/drv/qdma_mod.c
index 1557dc3..6552899 100644
--- a/QDMA/linux-kernel/drv/qdma_mod.c
+++ b/QDMA/linux-kernel/drv/qdma_mod.c
@@ -1547,7 +1547,7 @@ int xpdev_queue_delete(struct xlnx_pci_dev *xpdev, unsigned int qidx, bool c2h,
 {
    struct xlnx_qdata *qdata = xpdev_queue_get(xpdev, qidx, c2h, 1, ebuf,
                        ebuflen);
-   int rv = 0;
+   int rv = 1;

    if (!qdata)
        return -EINVAL;
@@ -1555,12 +1555,6 @@ int xpdev_queue_delete(struct xlnx_pci_dev *xpdev, unsigned int qidx, bool c2h,
    if(!qdata->xcdev)
        return -EINVAL;

-   spin_lock(&xpdev->cdev_lock);
-   qdata->xcdev->dir_init &= ~(1 << (c2h ? 1 : 0));
-   if (qdata->xcdev && !qdata->xcdev->dir_init)
-       qdma_cdev_destroy(qdata->xcdev);
-   spin_unlock(&xpdev->cdev_lock);
-
    if (qdata->qhndl != QDMA_QUEUE_IDX_INVALID)
        rv = qdma_queue_remove(xpdev->dev_hndl, qdata->qhndl,
                    ebuf, ebuflen);
@@ -1570,6 +1564,13 @@ int xpdev_queue_delete(struct xlnx_pci_dev *xpdev, unsigned int qidx, bool c2h,
    if (rv < 0)
        goto exit;

+       // Optimistically remvoe the direction flag
+   spin_lock(&xpdev->cdev_lock);
+   qdata->xcdev->dir_init &= ~(1 << (c2h ? 1 : 0));
+   if (qdata->xcdev && !qdata->xcdev->dir_init)
+       qdma_cdev_destroy(qdata->xcdev);
+   spin_unlock(&xpdev->cdev_lock);
+
    memset(qdata, 0, sizeof(*qdata));
 exit:
    return rv;
-- 
2.21.0

comments are appreciated. Maybe we need to optimistically hide resources from other threads attempt to delete the queue, then revert if we fail????