openzfs / zfs

OpenZFS on Linux and FreeBSD
https://openzfs.github.io/openzfs-docs
Other
10.65k stars 1.75k forks source link

PANIC: VERIFY(avl_find(tree, new_node, &where) == NULL) failed #16685

Open click0 opened 3 weeks ago

click0 commented 3 weeks ago

System information

Type Version/Name
Distribution Name FreeBSD
Distribution Version 14.1-STABLE
Kernel Version f5fabf3dc SUPPORT-14-1 1401501
Architecture x86_64
OpenZFS Version zfs-2.2.4-FreeBSD_g256659204
-- zfs-kmod-2.2.4-FreeBSD_g256659204

Describe the problem you're observing

I assume scanning a ZFS partition with many small files for synchronization with another server.

Describe how to reproduce the problem

Include any warning/errors/backtraces from the system logs

# zpool status
  pool: tank3
 state: ONLINE
  scan: scrub repaired 0B in 02:27:16 with 0 errors on Wed Nov 22 15:01:45 2023
config:

        NAME            STATE     READ WRITE CKSUM
        tank3           ONLINE       0     0     0
          mirror-0      ONLINE       0     0     0
            gpt/disk-5  ONLINE       0     0     0
            gpt/disk-4  ONLINE       0     0     0

errors: No known data errors

  pool: zroot
 state: ONLINE
  scan: scrub repaired 0B in 02:20:03 with 0 errors on Mon Jul  8 19:16:03 2024
config:

        NAME                  STATE     READ WRITE CKSUM
        zroot                 ONLINE       0     0     0
          mirror-0            ONLINE       0     0     0
            gpt/system-ada0   ONLINE       0     0     0
            gpt/system-ada17  ONLINE       0     0     0

errors: No known data errors
# kgdb /boot/kernel/kernel /var/crash/vmcore.3
...
Reading symbols from /boot/kernel/kernel...
Reading symbols from /usr/lib/debug//boot/kernel/kernel.debug...

Unread portion of the kernel message buffer:
panic: VERIFY(avl_find(tree, new_node, &where) == NULL) failed

cpuid = 3
time = 1729751226
KDB: stack backtrace:
db_trace_self_wrapper() at 0xffffffff804c959b = db_trace_self_wrapper+0x2b/frame 0xfffffe00b46a8230
vpanic() at 0xffffffff80c24011 = vpanic+0x131/frame 0xfffffe00b46a8360
spl_panic() at 0xffffffff828b3c1a = spl_panic+0x3a/frame 0xfffffe00b46a83c0
avl_add() at 0xffffffff8287c7c8 = avl_add+0x138/frame 0xfffffe00b46a83f0
dsl_deadlist_load_tree() at 0xffffffff82947192 = dsl_deadlist_load_tree+0x122/frame 0xfffffe00b46a8580
dsl_deadlist_move_bpobj() at 0xffffffff829491de = dsl_deadlist_move_bpobj+0x6e/frame 0xfffffe00b46a8680
dsl_destroy_snapshot_sync_impl() at 0xffffffff82959bec = dsl_destroy_snapshot_sync_impl+0x57c/frame 0xfffffe00b46a8750
dsl_destroy_snapshot_sync() at 0xffffffff8295a35e = dsl_destroy_snapshot_sync+0x4e/frame 0xfffffe00b46a8790
zcp_synctask_destroy() at 0xffffffff82a4793a = zcp_synctask_destroy+0xba/frame 0xfffffe00b46a87d0
zcp_synctask_wrapper() at 0xffffffff82a4780b = zcp_synctask_wrapper+0xfb/frame 0xfffffe00b46a8820
luaD_precall() at 0xffffffff8289aa41 = luaD_precall+0x311/frame 0xfffffe00b46a88f0
luaV_execute() at 0xffffffff828aae4d = luaV_execute+0xe0d/frame 0xfffffe00b46a8960
luaD_call() at 0xffffffff8289af8b = luaD_call+0xcb/frame 0xfffffe00b46a89a0
luaD_rawrunprotected() at 0xffffffff8289a313 = luaD_rawrunprotected+0x53/frame 0xfffffe00b46a8a40
luaD_pcall() at 0xffffffff8289b5c8 = luaD_pcall+0x38/frame 0xfffffe00b46a8a90
lua_pcallk() at 0xffffffff82893596 = lua_pcallk+0xa6/frame 0xfffffe00b46a8ad0
zcp_eval_impl() at 0xffffffff82a439eb = zcp_eval_impl+0xbb/frame 0xfffffe00b46a8b00
dsl_sync_task_sync() at 0xffffffff8296c2b4 = dsl_sync_task_sync+0xb4/frame 0xfffffe00b46a8b30
dsl_pool_sync() at 0xffffffff8295cebb = dsl_pool_sync+0x43b/frame 0xfffffe00b46a8bb0
spa_sync() at 0xffffffff829927d2 = spa_sync+0xa72/frame 0xfffffe00b46a8df0
txg_sync_thread() at 0xffffffff829aadb8 = txg_sync_thread+0x358/frame 0xfffffe00b46a8ef0
fork_exit() at 0xffffffff80bdc06f = fork_exit+0x7f/frame 0xfffffe00b46a8f30
fork_trampoline() at 0xffffffff8123b78e = fork_trampoline+0xe/frame 0xfffffe00b46a8f30
--- trap 0, rip = 0, rsp = 0, rbp = 0 ---
KDB: enter: panic

Reading symbols from /boot/kernel/zfs.ko...
Reading symbols from /usr/lib/debug//boot/kernel/zfs.ko.debug...
Reading symbols from /boot/kernel/acpi_wmi.ko...
Reading symbols from /usr/lib/debug//boot/kernel/acpi_wmi.ko.debug...
Reading symbols from /boot/kernel/uhid.ko...
Reading symbols from /usr/lib/debug//boot/kernel/uhid.ko.debug...
Reading symbols from /boot/kernel/wmt.ko...
Reading symbols from /usr/lib/debug//boot/kernel/wmt.ko.debug...
Reading symbols from /boot/kernel/mac_ntpd.ko...
Reading symbols from /usr/lib/debug//boot/kernel/mac_ntpd.ko.debug...
Reading symbols from /boot/kernel/fdescfs.ko...
Reading symbols from /usr/lib/debug//boot/kernel/fdescfs.ko.debug...
__curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:57
57              return (td);
(kgdb) bt
#0  __curthread () at /usr/src/sys/amd64/include/pcpu_aux.h:57
#1  doadump (textdump=0) at /usr/src/sys/kern/kern_shutdown.c:405
#2  0xffffffff804c69b8 in db_fncall_generic (nargs=0, args=0xfffffe00b46a7c60, addr=<optimized out>, rv=<optimized out>)
    at /usr/src/sys/ddb/db_command.c:626
#3  db_fncall (dummy1=<optimized out>, dummy2=<optimized out>, dummy3=<optimized out>, dummy4=<optimized out>) at /usr/src/sys/ddb/db_command.c:674
#4  0xffffffff804c642d in db_command (last_cmdp=<optimized out>, cmd_table=<optimized out>, dopager=false) at /usr/src/sys/ddb/db_command.c:504
#5  0xffffffff804c6576 in db_command_script (command=command@entry=0xffffffff8201a722 <db_recursion_data+82> "call doadump")
    at /usr/src/sys/ddb/db_command.c:569
#6  0xffffffff804cb718 in db_script_exec (scriptname=scriptname@entry=0xfffffe00b46a7e30 "kdb.enter.panic", warnifnotfound=warnifnotfound@entry=0)
    at /usr/src/sys/ddb/db_script.c:302
#7  0xffffffff804cb612 in db_script_kdbenter (eventname=<optimized out>) at /usr/src/sys/ddb/db_script.c:324
#8  0xffffffff804c96e1 in db_trap (type=<optimized out>, code=<optimized out>) at /usr/src/sys/ddb/db_main.c:267
#9  0xffffffff80c73236 in kdb_trap (type=type@entry=3, code=code@entry=0, tf=tf@entry=0xfffffe00b46a8170) at /usr/src/sys/kern/subr_kdb.c:790
#10 0xffffffff81263978 in trap (frame=0xfffffe00b46a8170) at /usr/src/sys/amd64/amd64/trap.c:608
#11 <signal handler called>
#12 kdb_enter (why=<optimized out>, msg=<optimized out>) at /usr/src/sys/kern/subr_kdb.c:556
#13 0xffffffff80c24042 in vpanic (fmt=0xffffffff82afbb96 "%s", ap=ap@entry=0xfffffe00b46a83a0) at /usr/src/sys/kern/kern_shutdown.c:955
#14 0xffffffff828b3c1a in spl_panic (file=<optimized out>, func=<optimized out>, line=-2090756609,
    fmt=0x4a746756e138b8b9 <error: Cannot access memory at address 0x4a746756e138b8b9>)
    at /usr/src/sys/contrib/openzfs/module/os/freebsd/spl/spl_misc.c:103
#15 0xffffffff8287c7c8 in spl_assert (line=624, buf=<optimized out>, file=<optimized out>, func=<optimized out>)
    at /usr/src/sys/contrib/openzfs/include/os/freebsd/spl/sys/debug.h:69
#16 avl_add (tree=0xfffff8011eceb170, new_node=0xfffff8010b8a6480) at /usr/src/sys/contrib/openzfs/module/avl/avl.c:624
#17 0xffffffff82947192 in dsl_deadlist_load_tree (dl=dl@entry=0xfffff8011eceb160) at /usr/src/sys/contrib/openzfs/module/zfs/dsl_deadlist.c:179
#18 0xffffffff829491de in dsl_deadlist_move_bpobj (dl=0xfffff8011eceb160, bpo=bpo@entry=0xfffff800047180d8, mintxg=1235257081,
    tx=tx@entry=0xfffff801b6e86700) at /usr/src/sys/contrib/openzfs/module/zfs/dsl_deadlist.c:936
#19 0xffffffff82959bec in dsl_destroy_snapshot_sync_impl (ds=0xfffff800c0079000, defer=defer@entry=0, tx=tx@entry=0xfffff801b6e86700)
    at /usr/src/sys/contrib/openzfs/module/zfs/dsl_destroy.c:414
#20 0xffffffff8295a35e in dsl_destroy_snapshot_sync (arg=<optimized out>, tx=0xfffff801b6e86700)
    at /usr/src/sys/contrib/openzfs/module/zfs/dsl_destroy.c:582
#21 0xffffffff82a4793a in zcp_sync_task (state=0xfffff801da343808, arg=0xfffffe00b46a87a0, sync=1,
    err_dsname=0xfffff80127907460 "[zroot@2024-10-18_09.00.00--6d](mailto:zroot@2024-10-18_09.00.00--6d)", checkfunc=<optimized out>, syncfunc=<optimized out>)
    at /usr/src/sys/contrib/openzfs/module/zfs/zcp_synctask.c:100
#22 zcp_synctask_destroy (state=0xfffff801da343808, sync=1, err_details=<optimized out>) at /usr/src/sys/contrib/openzfs/module/zfs/zcp_synctask.c:156
#23 0xffffffff82a4780b in zcp_synctask_wrapper (state=state@entry=0xfffff801da343808) at /usr/src/sys/contrib/openzfs/module/zfs/zcp_synctask.c:541
#24 0xffffffff8289aa41 in luaD_precall (L=0xfffff801da343808, func=<optimized out>, nresults=<optimized out>)
    at /usr/src/sys/contrib/openzfs/module/lua/ldo.c:402
#25 0xffffffff828aae4d in luaV_execute (L=L@entry=0xfffff801da343808) at /usr/src/sys/contrib/openzfs/module/lua/lvm.c:772
#26 0xffffffff8289af8b in luaD_call (L=0xfffff801da343808, func=0xfffff8019656e428, nResults=-1, allowyield=0)
    at /usr/src/sys/contrib/openzfs/module/lua/ldo.c:490
#27 0xffffffff8289a313 in luaD_rawrunprotected (L=L@entry=0xfffff801da343808, f=0xffffffff82893660 <f_call>, ud=ud@entry=0xfffffe00b46a8aa8)
    at /usr/src/sys/contrib/openzfs/module/lua/ldo.c:214
#28 0xffffffff8289b5c8 in luaD_pcall (L=0xffffffff81caf7a8 <vt_conswindow+16>, L@entry=0xfffff801da343808, func=0xfffffe00b46a80f0,
    u=0xffffffff836195ff, u@entry=0xfffffe00b46a8aa8, old_top=5365026678948542649, ef=16) at /usr/src/sys/contrib/openzfs/module/lua/ldo.c:691
#29 0xffffffff82893596 in lua_pcallk (L=L@entry=0xfffff801da343808, nargs=nargs@entry=1, nresults=nresults@entry=-1, errfunc=errfunc@entry=1,
    ctx=ctx@entry=0, k=0x10, k@entry=0x0) at /usr/src/sys/contrib/openzfs/module/lua/lapi.c:947
#30 0xffffffff82a439eb in zcp_eval_impl (tx=<optimized out>, ri=0xfffffe00d640e8d0) at /usr/src/sys/contrib/openzfs/module/zfs/zcp.c:856
#31 0xffffffff8296c2b4 in dsl_sync_task_sync (dst=0xfffffe00d640e7f8, tx=tx@entry=0xfffff801b6e86700)
    at /usr/src/sys/contrib/openzfs/module/zfs/dsl_synctask.c:248
#32 0xffffffff8295cebb in dsl_pool_sync (dp=dp@entry=0xfffff80004718000, txg=txg@entry=1235385697)
    at /usr/src/sys/contrib/openzfs/module/zfs/dsl_pool.c:850
--Type <RET> for more, q to quit, c to continue without paging--
#33 0xffffffff829927d2 in spa_sync_iterate_to_convergence (spa=0xfffffe00b2a51000, tx=0xfffff8018c717400)
    at /usr/src/sys/contrib/openzfs/module/zfs/spa.c:9647
#34 spa_sync (spa=spa@entry=0xfffffe00b2a51000, txg=txg@entry=1235385697) at /usr/src/sys/contrib/openzfs/module/zfs/spa.c:9880
#35 0xffffffff829aadb8 in txg_sync_thread (arg=arg@entry=0xfffff80004718000) at /usr/src/sys/contrib/openzfs/module/zfs/txg.c:600
#36 0xffffffff80bdc06f in fork_exit (callout=0xffffffff829aaa60 <txg_sync_thread>, arg=0xfffff80004718000, frame=0xfffffe00b46a8f40)
    at /usr/src/sys/kern/kern_fork.c:1164
#37 <signal handler called>
mtelka commented 1 week ago

I faced panic with similar stack two years ago at illumos: https://www.illumos.org/issues/15306