Closed KonssnoK closed 10 months ago
Crash n.1
#0 block_is_free (block=0xdeadbeef) at /opt/esp-idf/components/heap/heap_tlsf_block_functions.h:90
#1 block_merge_prev (block=0xdeadbeef, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:344
#2 tlsf_free (tlsf=0x3d800014, ptr=0x3da28dc8) at /opt/esp-idf/components/heap/heap_tlsf.c:966
#3 0x4038c48d in multi_heap_free_impl (heap=0x3d800000, p=0x3da28dc8) at /opt/esp-idf/components/heap/multi_heap.c:212
#4 0x40376b0d in heap_caps_free (ptr=0x3da28dc8) at /opt/esp-idf/components/heap/heap_caps.c:382
#5 0x4038d2dc in free (ptr=0x3da28dc8) at /opt/esp-idf/components/newlib/heap.c:39
#6 0x420c5034 in mem_free (rmem=0x3da28dc8) at /opt/esp-idf/components/lwip/lwip/src/core/mem.c:264
#7 0x420c594d in pbuf_free (p=0x3da28dc8) at /opt/esp-idf/components/lwip/lwip/src/core/pbuf.c:789
#8 0x420c66f0 in tcp_seg_free (seg=0x3da2c8cc) at /opt/esp-idf/components/lwip/lwip/src/core/tcp.c:1628
#9 0x420c7ba7 in tcp_free_acked_segments (pcb=0x3da2a730, seg_list=0x0, dbg_list_name=<optimized out>, dbg_other_seg_list=0x0) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_in.c:1120
#10 0x420c7e24 in tcp_receive (pcb=0x3da2a730) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_in.c:1289
#11 0x420c8ca4 in tcp_process (pcb=0x3da2a730) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_in.c:987
#12 0x420c93ac in tcp_input (p=<optimized out>, inp=<optimized out>) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_in.c:438
#13 0x420ce25d in ip4_input (p=0x3da2d3c4, inp=0x3da2095c) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/ip4.c:803
#14 0x420cfa20 in ethernet_input (p=0x3da2d3c4, netif=0x3da2095c) at /opt/esp-idf/components/lwip/lwip/src/netif/ethernet.c:186
#15 0x420c1ed0 in tcpip_thread_handle_msg (msg=0x3da2d43c) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:180
#16 0x420c1f53 in tcpip_thread (arg=0x0) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:154
#17 0x40388805 in vPortTaskWrapper (pxCode=0x420c1f18 <tcpip_thread>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Crash n.2
#0 insert_free_block (sl=29, fl=23, block=0x3da204d8, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:237
#1 block_insert (block=0x3da204d8, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:263
#2 tlsf_free (tlsf=0x3d800014, ptr=<optimized out>) at /opt/esp-idf/components/heap/heap_tlsf.c:968
#3 0x4038c48d in multi_heap_free_impl (heap=0x3d800000, p=0x3da2054c) at /opt/esp-idf/components/heap/multi_heap.c:212
#4 0x40376b0d in heap_caps_free (ptr=0x3da2054c) at /opt/esp-idf/components/heap/heap_caps.c:382
#5 0x4037f594 in esp_mbedtls_mem_free (ptr=0x3da2054c) at /opt/esp-idf/components/mbedtls/port/esp_mem.c:46
#6 0x420ee871 in mbedtls_free (ptr=0x3da2054c) at /opt/esp-idf/components/mbedtls/mbedtls/library/platform.c:66
#7 0x420ec366 in mbedtls_md_free (ctx=0x3fcf6f70) at /opt/esp-idf/components/mbedtls/mbedtls/library/md.c:330
#8 0x420be376 in pbkdf2_sha1 (passphrase=<optimized out>, ssid=0x3fcb62a4 <s_wifi_nvs+8> \"\", ssid_len=14, iterations=4096, buf=<optimized out>, buflen=32) at /opt/esp-idf/components/wpa_supplicant/src/crypto/crypto_mbedtls.c:687
#9 0x42146920 in wpa_set_passphrase (passphrase=<optimized out>, ssid=<optimized out>, ssid_len=<optimized out>) at /opt/esp-idf/components/wpa_supplicant/src/rsn_supp/wpa.c:2492
#10 0x42146db8 in wpa_set_bss (macddr=0x3fcf70a0 \"\254\\027T\", bssid=<optimized out>, pairwise_cipher=<optimized out>, group_cipher=<optimized out>, passphrase=0x3fcb62cb <s_wifi_nvs+47> \"\", ssid=0x3fcb62a4 <s_wifi_nvs+8> \"\", ssid_len=14) at /opt/esp-idf/components/wpa_supplicant/src/rsn_supp/wpa.c:2462
#11 0x4213f1f1 in wpa_config_bss (bssid=0x3fcb696c <g_cnxMgr+12> \"\") at /opt/esp-idf/components/wpa_supplicant/esp_supplicant/src/esp_wpa_main.c:92
#12 0x4213f207 in wpa_sta_connect (bssid=0x3fcb696c <g_cnxMgr+12> \"\") at /opt/esp-idf/components/wpa_supplicant/esp_supplicant/src/esp_wpa_main.c:174
#13 0x42132049 in ?? ()
#14 0x4213253d in ?? ()
#15 0x421332fe in ?? ()
#16 0x4213337b in cnx_start_handoff_cb ()
#17 0x4212d01f in ?? ()
#18 0x4212d496 in ?? ()
#19 0x42131a2e in chm_end_op ()
#20 0x42131a4d in chm_end_op_timeout_process ()
#21 0x421315af in ?? ()
#22 0x42131844 in ieee80211_timer_do_process ()
#23 0x4038d5b4 in ppTask ()
#24 0x40388805 in vPortTaskWrapper (pxCode=0x4038d50c <ppTask>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Crash n.3 (internal 5)
#0 0x403763bd in panic_abort (details=0x3fcf6d8b \"assert failed: remove_free_block heap_tlsf.c:205 (prev && \\\"prev_free field can not be null\\\")\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384588 in esp_system_abort (details=0x3fcf6d8b \"assert failed: remove_free_block heap_tlsf.c:205 (prev && \\\"prev_free field can not be null\\\")\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d2ac in __assert_func (file=0x3c1874ed \"heap_tlsf.c\", line=<optimized out>, func=<optimized out>, expr=0x3c187764 \"prev && \\\"prev_free field can not be null\\\"\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x4038ba2a in remove_free_block (sl=<optimized out>, fl=<optimized out>, block=0x3da2c5f0, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:205
#4 block_remove (block=0x3da2c5f0, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:255
#5 block_merge_next (block=<optimized out>, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:361
#6 tlsf_free (tlsf=0x3d800014, ptr=<optimized out>) at /opt/esp-idf/components/heap/heap_tlsf.c:967
#7 0x4038c48d in multi_heap_free_impl (heap=0x3d800000, p=0x3da2c58c) at /opt/esp-idf/components/heap/multi_heap.c:212
#8 0x40376b0d in heap_caps_free (ptr=0x3da2c58c) at /opt/esp-idf/components/heap/heap_caps.c:382
#9 0x4037f594 in esp_mbedtls_mem_free (ptr=0x3da2c58c) at /opt/esp-idf/components/mbedtls/port/esp_mem.c:46
#10 0x420ee871 in mbedtls_free (ptr=0x3da2c58c) at /opt/esp-idf/components/mbedtls/mbedtls/library/platform.c:66
#11 0x420ec352 in mbedtls_md_free (ctx=0x3fcf6f70) at /opt/esp-idf/components/mbedtls/mbedtls/library/md.c:324
#12 0x420be376 in pbkdf2_sha1 (passphrase=<optimized out>, ssid=0x3fcb62a4 <s_wifi_nvs+8> \"\", ssid_len=11, iterations=4096, buf=<optimized out>, buflen=32) at /opt/esp-idf/components/wpa_supplicant/src/crypto/crypto_mbedtls.c:687
#13 0x42146920 in wpa_set_passphrase (passphrase=<optimized out>, ssid=<optimized out>, ssid_len=<optimized out>) at /opt/esp-idf/components/wpa_supplicant/src/rsn_supp/wpa.c:2492
#14 0x42146db8 in wpa_set_bss (macddr=0x3fcf70a0 \"\254\\027T\", bssid=<optimized out>, pairwise_cipher=<optimized out>, group_cipher=<optimized out>, passphrase=0x3fcb62cb <s_wifi_nvs+47> \"\", ssid=0x3fcb62a4 <s_wifi_nvs+8> \"\", ssid_len=11) at /opt/esp-idf/components/wpa_supplicant/src/rsn_supp/wpa.c:2462
#15 0x4213f1f1 in wpa_config_bss (bssid=0x3fcb696c <g_cnxMgr+12> \"\") at /opt/esp-idf/components/wpa_supplicant/esp_supplicant/src/esp_wpa_main.c:92
#16 0x4213f207 in wpa_sta_connect (bssid=0x3fcb696c <g_cnxMgr+12> \"\") at /opt/esp-idf/components/wpa_supplicant/esp_supplicant/src/esp_wpa_main.c:174
#17 0x42132049 in ?? ()
i think we have more, we'll continue to check.
Crash n.4 (internal id 9)
#0 block_is_free (block=0x5378430) at /opt/esp-idf/components/heap/heap_tlsf_block_functions.h:90
#1 block_merge_prev (block=0x5378430, control=0x3d800014) at /opt/esp-idf/components/heap/heap_tlsf.c:344
#2 tlsf_free (tlsf=0x3d800014, ptr=0x3da292e0) at /opt/esp-idf/components/heap/heap_tlsf.c:966
#3 0x4038c48d in multi_heap_free_impl (heap=0x3d800000, p=0x3da292e0) at /opt/esp-idf/components/heap/multi_heap.c:212
#4 0x40376b0d in heap_caps_free (ptr=0x3da292e0) at /opt/esp-idf/components/heap/heap_caps.c:382
#5 0x4038d2dc in free (ptr=0x3da292e0) at /opt/esp-idf/components/newlib/heap.c:39
#6 0x4038d7c0 in esf_buf_recycle ()
#7 0x4003f3ce in ?? ()
#8 0x4003a280 in ?? ()
#9 0x4038d068 in sta_input ()
#10 0x4038d0a4 in sta_rx_cb ()
#11 0x4003f4c0 in ?? ()
#12 0x4038d5e1 in ppTask ()
#13 0x40388805 in vPortTaskWrapper (pxCode=0x4038d50c <ppTask>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Crash n.5 (internal id 10) @zhangyanjiaoesp this one seems related to mesh
#0 0x403763bd in panic_abort (details=0x3fcea71b \"assert failed: tlsf_free heap_tlsf.c:964 (!block_is_free(block) && \\\"block already marked as free\\\")\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384588 in esp_system_abort (details=0x3fcea71b \"assert failed: tlsf_free heap_tlsf.c:964 (!block_is_free(block) && \\\"block already marked as free\\\")\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d2ac in __assert_func (file=0x3c1874ed \"heap_tlsf.c\", line=<optimized out>, func=<optimized out>, expr=0x3c187998 \"!block_is_free(block) && \\\"block already marked as free\\\"\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x4038b83a in tlsf_free (tlsf=0x3d800014, ptr=0x3da2759c) at /opt/esp-idf/components/heap/heap_tlsf.c:964
#4 0x4038c48d in multi_heap_free_impl (heap=0x3d800000, p=0x3da2759c) at /opt/esp-idf/components/heap/multi_heap.c:212
#5 0x40376b0d in heap_caps_free (ptr=0x3da2759c) at /opt/esp-idf/components/heap/heap_caps.c:382
#6 0x4038d2dc in free (ptr=0x3da2759c) at /opt/esp-idf/components/newlib/heap.c:39
#7 0x4211af7e in mesh_free ()
#8 0x4211b02c in esp_mesh_free_context ()
#9 0x420f97ec in esp_mesh_send ()
#10 0x42074f94 in mesh_driver_transmit_from_node_sta (h=<optimized out>, buffer=0x3da27562, len=54) at ../components/mesh/mesh_driver.c:148
#11 0x42074fb8 in mesh_driver_transmit_from_node_sta_wrap (h=0x3d817c7c, buffer=0x3da27562, len=54, netstack_buf=0x3da27548) at ../components/mesh/mesh_driver.c:157
#12 0x4216b1b0 in esp_netif_transmit_wrap (esp_netif=0x3d817bf4, data=0x3da27562, len=54, pbuf=0x3da27548) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:961
#13 0x420d25a6 in low_level_output (netif=<optimized out>, p=0x3da27548) at /opt/esp-idf/components/lwip/port/esp32/netif/wlanif.c:137
#14 0x420cfaad in ethernet_output (netif=0x3da25c80, p=0x3da27548, src=<optimized out>, dst=0x3fcb4aec <arp_table+36>, eth_type=<optimized out>) at /opt/esp-idf/components/lwip/lwip/src/netif/ethernet.c:312
#15 0x420cd124 in etharp_output_to_arp_index (netif=0x3da25c80, q=0x3da27548, arp_idx=<optimized out>) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/etharp.c:785
#16 0x420cd425 in etharp_output (netif=0x3da25c80, q=0x3da27548, ipaddr=<optimized out>) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/etharp.c:884
#17 0x420ce514 in ip4_output_if_opt_src (p=0x3da27548, src=<optimized out>, dest=0x3da2d5b8, ttl=255 '\\377', tos=0 '\\000', proto=<optimized out>, netif=netif@entry=0x3da25c80, ip_options=ip_options@entry=0x0, optlen=optlen@entry=0) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/ip4.c:1108
#18 0x420ce568 in ip4_output_if_opt (p=0x3da27548, src=0x3da2d5b4, dest=0x3da2d5b8, ttl=255 '\\377', tos=0 '\\000', proto=<optimized out>, netif=netif@entry=0x3da25c80, ip_options=ip_options@entry=0x0, optlen=optlen@entry=0) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/ip4.c:909
#19 0x420ce590 in ip4_output_if (p=0x3da27548, src=0x3da2d5b4, dest=0x3da2d5b8, ttl=255 '\\377', tos=0 '\\000', proto=6 '\\006', netif=netif@entry=0x3da25c80) at /opt/esp-idf/components/lwip/lwip/src/core/ipv4/ip4.c:886
#20 0x420c9b45 in tcp_output_control_segment (pcb=0x3da2d5b4, p=0x3da27548, src=0x3da2d5b4, dst=0x3da2d5b8) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_out.c:1951
#21 0x420ca4b8 in tcp_send_empty_ack (pcb=0x3da2d5b4) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_out.c:2057
#22 0x420ca534 in tcp_output (pcb=0x3da2d5b4) at /opt/esp-idf/components/lwip/lwip/src/core/tcp_out.c:1277
#23 0x420c7194 in tcp_fasttmr () at /opt/esp-idf/components/lwip/lwip/src/core/tcp.c:1497
#24 0x420c7702 in tcp_tmr () at /opt/esp-idf/components/lwip/lwip/src/core/tcp.c:237
#25 0x420cab3e in tcpip_tcp_timer (arg=0x0) at /opt/esp-idf/components/lwip/lwip/src/core/timeouts.c:161
#26 0x420cac00 in sys_check_timeouts () at /opt/esp-idf/components/lwip/lwip/src/core/timeouts.c:411
#27 0x420c1e7d in tcpip_timeouts_mbox_fetch (mbox=0x3fcb4544 <tcpip_mbox>, msg=0x3fceac40) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:115
#28 0x420c1f39 in tcpip_thread (arg=0x0) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:148
#29 0x40388805 in vPortTaskWrapper (pxCode=0x420c1f18 <tcpip_thread>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
@KonssnoK all the posted backtraces do indicate that the program aborts due to a heap corruption. Please follow the recommendations listed in the docs to narrow down the root cause of heap corruption: https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-reference/system/heap_debug.html#heap-corruption-detection
Even though the backtrace originates from a Wi-Fi related function, this typically only means that the heap corruption has already occurred and that the next allocation has ran into the corrupted heap structure. Enabling heap debugging options can help detect the issue sooner and hopefully identify which part of the code is responsible for it.
@KonssnoK all the posted backtraces do indicate that the program aborts due to a heap corruption. Please follow the recommendations listed in the docs to narrow down the root cause of heap corruption: https://docs.espressif.com/projects/esp-idf/en/latest/esp32/api-reference/system/heap_debug.html#heap-corruption-detection
Even though the backtrace originates from a Wi-Fi related function, this typically only means that the heap corruption has already occurred and that the next allocation has ran into the corrupted heap structure. Enabling heap debugging options can help detect the issue sooner and hopefully identify which part of the code is responsible for it.
Hi @igrr , sadly there are a few issues:
Therefore we need:
@igrr another wifi crash, not related to heap
Guru Meditation Error: Core 1 panic'ed (LoadProhibited). Exception was unhandled.
Core 1 register dump:
PC : 0x42123db8 PS : 0x00060630 A0 : 0x82124b7e A1 : 0x3fcf71c0
0x42123db8: ieee80211_set_max_rate at ??:?
A2 : 0x00000000 A3 : 0x3d816ed0 A4 : 0x00000032 A5 : 0x00000000
A6 : 0x00000000 A7 : 0x00000018 A8 : 0x00000035 A9 : 0x0000001d
A10 : 0x00000020 A11 : 0x3fcf7264 A12 : 0x00000006 A13 : 0x3fcf72c6
A14 : 0x00000001 A15 : 0x00000000 SAR : 0x00000010 EXCCAUSE: 0x0000001c
EXCVADDR: 0x00000019 LBEG : 0x40056f5c LEND : 0x40056f72 LCOUNT : 0xffffffff
Backtrace: 0x42123db5:0x3fcf71c0 0x42124b7b:0x3fcf72c0 0x42123b51:0x3fcf7320 0x4003f4dd:0x3fcf7340 |<-CORRUPTED
0x42123db5: ieee80211_set_max_rate at ??:?
0x42124b7b: ieee80211_parse_rsn at ??:?
0x42123b51: ieee80211_decap at ??:?
@igrr another wifi crash, not related to heap
Guru Meditation Error: Core 1 panic'ed (LoadProhibited). Exception was unhandled. Core 1 register dump: PC : 0x42123db8 PS : 0x00060630 A0 : 0x82124b7e A1 : 0x3fcf71c0 0x42123db8: ieee80211_set_max_rate at ??:? A2 : 0x00000000 A3 : 0x3d816ed0 A4 : 0x00000032 A5 : 0x00000000 A6 : 0x00000000 A7 : 0x00000018 A8 : 0x00000035 A9 : 0x0000001d A10 : 0x00000020 A11 : 0x3fcf7264 A12 : 0x00000006 A13 : 0x3fcf72c6 A14 : 0x00000001 A15 : 0x00000000 SAR : 0x00000010 EXCCAUSE: 0x0000001c EXCVADDR: 0x00000019 LBEG : 0x40056f5c LEND : 0x40056f72 LCOUNT : 0xffffffff Backtrace: 0x42123db5:0x3fcf71c0 0x42124b7b:0x3fcf72c0 0x42123b51:0x3fcf7320 0x4003f4dd:0x3fcf7340 |<-CORRUPTED 0x42123db5: ieee80211_set_max_rate at ??:? 0x42124b7b: ieee80211_parse_rsn at ??:? 0x42123b51: ieee80211_decap at ??:?
@KonssnoK Can you provide the .elf
file on this failure test?
@KonssnoK About the heap corruption issue, can you provide more logs before the problem happen? This will help us analyze in what scenario does this happen.
Hi @zhangyanjiaoesp ,
sadly the crashes are happening on field devices, so we don't really know what is going on. we do have logs but they are related to the device functionality, not the network itself. We are investigating ways to increase repeatability and understand more!
Sorry i can't share the elf :( but that crash happened on one of our testing devices, so i might be able to reproduce it. I'll post more when we'll have more data available.
@zhangyanjiaoesp we are still working on this, we are bit puzzled because it seems that the heap poisoning works only for free blocks, ignoring allocated ones. we are working trying to trigger the checks in a way that can be used on the field.
Hello @zhangyanjiaoesp , while we are still working on isolating the heap corruptions we found another crash related to the MTX task. Sadly we cannot look at it because it's part of the closed source code.
Please note that the whole mutex is handled internally by espressif, so our code is not involved
==================== CURRENT THREAD STACK =====================
#0 0x403763e9 in panic_abort (details=0x3fccd2cb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384688 in esp_system_abort (details=0x3fccd2cb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d3b4 in __assert_func (file=0x3c181b47 \"spinlock.h\", line=<optimized out>, func=<optimized out>, expr=0x3c182a44 \"(result == SPINLOCK_FREE) == (lock->count == 0)\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x40388b2c in spinlock_acquire (timeout=<optimized out>, lock=0x3d819d44) at /opt/esp-idf/components/esp_hw_support/include/soc/spinlock.h:123
#4 xPortEnterCriticalTimeout (mux=0x3d819d44, timeout=<optimized out>) at /opt/esp-idf/components/freertos/port/xtensa/port.c:301
#5 0x403858f0 in vPortEnterCritical (mux=0x3d819d44) at /opt/esp-idf/components/freertos/port/xtensa/include/freertos/portmacro.h:578
#6 xQueueSemaphoreTake (xQueue=0x3d819cf0, xTicksToWait=<optimized out>) at /opt/esp-idf/components/freertos/queue.c:1563
#7 0x40385a53 in xQueueTakeMutexRecursive (xMutex=0x3d819cf0, xTicksToWait=4294967295) at /opt/esp-idf/components/freertos/queue.c:731
#8 0x40376780 in mutex_lock_wrapper (mutex=0x3d819cf0) at /opt/esp-idf/components/esp_wifi/esp32s3/esp_adapter.c:390
#9 0x4211e402 in mesh_mutex_lock ()
#10 0x4211be87 in mesh_tx_task_main ()
#11 0x4038890d in vPortTaskWrapper (pxCode=0x4211bae8 <mesh_tx_task_main>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Attached the dump
v3_dump_FFFFAC17540004AC_1690286435.txt
We currently work on top of 4fc8964ec36c44cd5b959b6b30f4ffb7ccbdcf91
No idea on how to reproduce.
Another mesh crash we cannot investigate
==================== CURRENT THREAD STACK =====================
#0 0x42106922 in esp_mesh_scan_done ()
#1 0x4210cb2d in mesh_nwk_task_main ()
#2 0x4038890d in vPortTaskWrapper (pxCode=0x4210c1fc <mesh_nwk_task_main>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
@KonssnoK Can you provide the .elf
file and the backtrace info ?
@zhangyanjiaoesp the elf i would send it via mail, if you can provide it. For the backtrace, do you mean that the file generated by coredump.py is not enough?
hi @KonssnoK you can send it to the following email sales@espressif.com and include your elf file. Please make sure to mention the GitHub link and provide a brief description in the email. Alternatively, you can notify me (@mention) after sending the email, that we can get the information as soon as possible
@KonssnoK coredump.py is not enough, can you provide more context for the logs?
@zhangyanjiaoesp sadly no. the coredumps are retrieved from devices in the field placed in customers houses, so this is all we can see... We are also trying to trigger those crashes but no luck for now
What kind of context would you need?
@KonssnoK We want to get the runtime logs until the crash, it may help us to analyze the crash scenario. It will help us also if you can provide the .elf
file. The mail is as @Xiehanxin mentioned in the comment.
sorry, somehow i missed the @Xiehanxin comment!
I sent an initial email with one of the most recent crashes related to
==================== CURRENT THREAD STACK =====================
#0 0x42135e1d in ?? ()
#1 0x421364be in cnx_csa_fn_process ()
#2 0x42135063 in ?? ()
#3 0x4213520c in ieee80211_timer_do_process ()
#4 0x4038dbe4 in ppTask ()
#5 0x40388f05 in vPortTaskWrapper (pxCode=0x4038db3c <ppTask>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
let's start with one, we'll see the next ones.
Meanwhile, by tomorrow we should have some results of our heap-checking enhanced firmware.
Hello, @zhangyanjiaoesp , @Xiehanxin.
I have a question regarding one other crash
================== CURRENT THREAD REGISTERS ===================
exccause 0x9 (LoadStoreAlignmentCause)
excvaddr 0x4037ea59
epc1 0x420b815b
Apparently a misaligned access to the memory occurs in the WIFI task.
By looking at who is doing the access tho, it is still the allocation.
==================== CURRENT THREAD STACK =====================
#0 insert_free_block (sl=0, fl=24, block=0x3df24894, control=0x3df00014) at /opt/esp-idf/components/heap/heap_tlsf.c:237
#1 block_insert (block=0x3df24894, control=0x3df00014) at /opt/esp-idf/components/heap/heap_tlsf.c:263
#2 block_trim_free (size=<optimized out>, block=<optimized out>, control=0x3df00014) at /opt/esp-idf/components/heap/heap_tlsf.c:377
#3 block_prepare_used (size=<optimized out>, block=<optimized out>, control=0x3df00014) at /opt/esp-idf/components/heap/heap_tlsf.c:454
#4 tlsf_malloc (tlsf=0x3df00014, size=<optimized out>) at /opt/esp-idf/components/heap/heap_tlsf.c:861
#5 0x4038c78c in multi_heap_malloc_impl (heap=0x3df00000, size=24) at /opt/esp-idf/components/heap/multi_heap.c:187
#6 0x4038c944 in multi_heap_malloc (heap=0x3df00000, size=8) at /opt/esp-idf/components/heap/multi_heap_poisoning.c:289
#7 0x403769d0 in heap_caps_malloc_base (size=8, caps=5120) at /opt/esp-idf/components/heap/heap_caps.c:175
#8 0x40376b22 in heap_caps_malloc_prefer (size=8, num=1) at /opt/esp-idf/components/heap/heap_caps.c:290
#9 0x403766c0 in wifi_malloc (size=8) at /opt/esp-idf/components/esp_wifi/esp32s3/esp_adapter.c:71
#10 0x421352aa in ieee80211_timer_process ()
#11 0x421235cc in ?? ()
#12 0x4200a075 in timer_process_alarm (dispatch_method=ESP_TIMER_TASK) at /opt/esp-idf/components/esp_timer/src/esp_timer.c:396
#13 0x4200a0c8 in timer_task (arg=0x0) at /opt/esp-idf/components/esp_timer/src/esp_timer.c:422
#14 0x40388f05 in vPortTaskWrapper (pxCode=0x4200a0b4 <timer_task>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Could it be that an aligned flag is missing in the wifi function allocating the memory? Or a non-aligned malloc is never permitted?
Thanks
@KonssnoK All of these crash issues occurred on the same test version and code?
@KonssnoK All of these crash issues occurred on the same test version and code?
@zhangyanjiaoesp we currently have 2 versions on the field
What i'm currently doing is looking for crashes on devices in 1.1.4 and then placing 23073101. Once the crash occurs again, i collect the logs.
to give you an overview these are the amount of crashes in the last 7 days on a subset of (150) devices in 1.1.4. as you can se we improved a bit by solving one on the 25th, but still.
@zhangyanjiaoesp at some point i would like to move from
4fc8964ec36c44cd5b959b6b30f4ffb7ccbdcf91 to cc71308e2fdce1d6d27fc52d39943f5d0fb83f35
as codebase for esp-idf, but i don't know if you have other pending changes related to the WIFI component. Do you have any idea if there are pending items that will land on the public repo soon?
@KonssnoK You can update to https://github.com/espressif/esp-idf/commit/cc71308e2fdce1d6d27fc52d39943f5d0fb83f35.
Since the location of your crash problems are different each time, we want to make sure that the version tested is the same each time.
According to the elf file you provided last time, we still could not find the root cause. And you said that you could not get the log information of the test device, so we could not add debug logs to help analyze the problem. Still need you try to find the way to reproduce the issue locally.
@zhangyanjiaoesp reproducing locally it's really difficult... we have no idea what is the underlying cause that triggers the issue... i hope the latest version of the heap tracker will help.
one thing that might be related, is the fact that some devices move from layer to layer (like from 3 to 2, or viceversa).. but i have no idea if related
hello @zhangyanjiaoesp i have the first buffer to be checked by your side.
Could you please check that 128B buffer IE, allocated and released in
void esp_set_assoc_ie(uint8_t *bssid, const u8 *ies, size_t ies_len, bool mdie)
is correctly handled by function
esp_wifi_set_appie_internal(WIFI_APPIE_ASSOC_REQ, ie, ASSOC_IE_LEN - len, 0);
(the pointer is not passed around considering it gets freed right after) ?
Our tracing system reported: ALLOCATION: 128 bytes (@ 0x3df370e8) allocated CPU 1 ccount 0xb4e1d3c4 caller 0x4214af12:0x4214a62d:0x421429f9:0x42142a0f freed by 0x4037fc44:0x420f20dd:0x420ef8c2:0x420bee6a
which i translated to:
wpa_sta_connect:wpa_config_bss:wpa_set_bss:esp_set_assoc_ie freed by esp_mbedtls_mem_free:mbedtls_free:mbedtls_md_free:pbkdf2_sha1
the second part looks strange, such buffer is actually released in the same function it's called.
void esp_set_assoc_ie(uint8_t *bssid, const u8 *ies, size_t ies_len, bool mdie)
{
#define ASSOC_IE_LEN 128
uint8_t *ie, *pos;
size_t len = ASSOC_IE_LEN, ie_len;
ie = os_malloc(ASSOC_IE_LEN + ies_len);
if (!ie) {
wpa_printf(MSG_ERROR, "failed to allocate ie");
return;
}
pos = ie;
ie_len = get_extended_caps_ie(pos, len);
pos += ie_len;
len -= ie_len;
#ifdef CONFIG_WPA_11KV_SUPPORT
ie_len = get_rm_enabled_ie(pos, len);
pos += ie_len;
len -= ie_len;
#ifdef CONFIG_MBO
ie_len = get_operating_class_ie(pos, len);
pos += ie_len;
len -= ie_len;
ie_len = get_mbo_oce_assoc_ie(pos, len);
pos += ie_len;
len -= ie_len;
#endif /* CONFIG_MBO */
#endif
if (ies_len) {
os_memcpy(pos, ies, ies_len);
pos += ies_len;
len -= ies_len;
}
esp_wifi_unset_appie_internal(WIFI_APPIE_ASSOC_REQ);
esp_wifi_set_appie_internal(WIFI_APPIE_ASSOC_REQ, ie, ASSOC_IE_LEN - len, 0);
os_free(ie);
#undef ASSOC_IE_LEN
}
Thanks
@KonssnoK Have checked the esp_wifi_set_appie_internal()
function, it will malloc a new block of memory and then memcpy the contents of ie
to the new memory. So it is ok to release ie
after that.
mmm i have 2 more crashes pointing at the same buffer from different devices... wait, let me improve the logging so that i understand if it's a canary overflow or what. Maybe it's just overflowing.
ALLOCATION: 128 bytes (@ 0x3df2ef28) allocated CPU 1 ccount 0xdeeca8b0 caller 0x4214af12:0x4214a62d:0x421429f9:0x42142a0f freed by 0x4037fc44:0x420f20dd:0x420ef8c2:0x420bee6a
ALLOCATION: 128 bytes (@ 0x3df2ed2c) allocated CPU 1 ccount 0xa51a0dec caller 0x4214af12:0x4214a62d:0x421429f9:0x42142a0f freed by 0x4037fc44:0x420f20dd:0x420ef8c2:0x420bee6a
@KonssnoK This branch contains an assoc ie related memory leak fix, you can test based on this branch to see the if the crash issue has improved.
@zhangyanjiaoesp please push the branch to the public github :P
@KonssnoK Sorry, wrong link, this is the right branch 3b570fd9cd761511ad75efd5126eda4496d575a5
ah ok, we are already rebasing on top of these changes in our latest release, we'll monitor the crashes. (we currently had to stop the dedicated heap tracing due to some test devices going offline)
Initial logs suggest the assoc ie crash is solved. ( 🎉 ). We continue to monitor.
We currently got a netif crash v3_dump_FFFFAC17540002FC_1691734017.txt
==================== CURRENT THREAD STACK =====================
#0 0x40376421 in panic_abort (details=0x3fccbc6b \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384828 in esp_system_abort (details=0x3fccbc6b \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d554 in __assert_func (file=0x3c189a8d \"queue.c\", line=<optimized out>, func=<optimized out>, expr=0x3c189ae0 \"pxQueue\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x40385d3d in vQueueDelete (xQueue=0x0) at /opt/esp-idf/components/freertos/queue.c:2131
#4 0x420bbe82 in esp_netif_list_unlock () at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:60
#5 0x420bbfac in esp_netif_remove_from_list (netif=<optimized out>) at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:107
#6 0x420bd08f in esp_netif_destroy (esp_netif=0x3d8061f4) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:648
#7 0x42073590 in mesh_netif_interface_clear (netif_p=0x3fcabe58 <self_priv+8>) at ../components/mesh/mesh_netif.c:76
#8 0x420735a1 in mesh_netif_interface_wifi () at ../components/mesh/mesh_netif.c:84
#9 0x420739e3 in mesh_netifs_stop () at ../components/mesh/mesh_netif.c:285
#10 0x4207429f in mesh_handover_parent_disconnected (forced=false) at ../components/mesh/mesh_handover.c:489
#11 0x420728fc in handle_message (param=<optimized out>, event=<optimized out>) at ../components/mesh/mesh.c:342
#12 0x420a0c12 in thread_process_events (handlers=0x3fccbf80, size=6, events=33554432, param=0x0) at ../components/core/utils/thread_utils.c:143
#13 0x420723ed in mesh_task (arg=0x0) at ../components/mesh/mesh.c:618
#14 0x40388aad in vPortTaskWrapper (pxCode=0x420723c0 <mesh_task>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
and an esp_modem related one (not attached)
We continue to investigate and monitor for more crashes (latest firmware is only on 100 devices, we are going to push to 2000 soon)
We currently got a netif crash v3_dump_FFFFAC17540002FC_1691734017.txt
==================== CURRENT THREAD STACK ===================== #0 0x40376421 in panic_abort (details=0x3fccbc6b \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/panic.c:408 #1 0x40384828 in esp_system_abort (details=0x3fccbc6b \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/esp_system.c:137 #2 0x4038d554 in __assert_func (file=0x3c189a8d \"queue.c\", line=<optimized out>, func=<optimized out>, expr=0x3c189ae0 \"pxQueue\") at /opt/esp-idf/components/newlib/assert.c:85 #3 0x40385d3d in vQueueDelete (xQueue=0x0) at /opt/esp-idf/components/freertos/queue.c:2131 #4 0x420bbe82 in esp_netif_list_unlock () at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:60 #5 0x420bbfac in esp_netif_remove_from_list (netif=<optimized out>) at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:107 #6 0x420bd08f in esp_netif_destroy (esp_netif=0x3d8061f4) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:648 #7 0x42073590 in mesh_netif_interface_clear (netif_p=0x3fcabe58 <self_priv+8>) at ../components/mesh/mesh_netif.c:76 #8 0x420735a1 in mesh_netif_interface_wifi () at ../components/mesh/mesh_netif.c:84 #9 0x420739e3 in mesh_netifs_stop () at ../components/mesh/mesh_netif.c:285 #10 0x4207429f in mesh_handover_parent_disconnected (forced=false) at ../components/mesh/mesh_handover.c:489 #11 0x420728fc in handle_message (param=<optimized out>, event=<optimized out>) at ../components/mesh/mesh.c:342 #12 0x420a0c12 in thread_process_events (handlers=0x3fccbf80, size=6, events=33554432, param=0x0) at ../components/core/utils/thread_utils.c:143 #13 0x420723ed in mesh_task (arg=0x0) at ../components/mesh/mesh.c:618 #14 0x40388aad in vPortTaskWrapper (pxCode=0x420723c0 <mesh_task>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
@KonssnoK This may related to your code, the assert is due to the configASSERT( pxQueue );
fail in vQueueDelete()
function, that is to say the s_list_lock
is equal to NULL. Please check the related code.
@zhangyanjiaoesp after the update to the latest v4 branch ( 8aa1395db58e30f821757e73baa1e18fad54e26f ) we haven't seen anymore the crash related to ie. Good!
the total number of crashes has decreased significantly.
We still have some which are a bit strange, like
==================== CURRENT THREAD STACK =====================
#0 0x40376421 in panic_abort (details=0x3fcea9bb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384828 in esp_system_abort (details=0x3fcea9bb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d554 in __assert_func (file=0x3c181b47 \"spinlock.h\", line=<optimized out>, func=<optimized out>, expr=0x3c182a44 \"(result == SPINLOCK_FREE) == (lock->count == 0)\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x40388ccc in spinlock_acquire (timeout=<optimized out>, lock=0x3da22154) at /opt/esp-idf/components/esp_hw_support/include/soc/spinlock.h:123
#4 xPortEnterCriticalTimeout (mux=0x3da22154, timeout=<optimized out>) at /opt/esp-idf/components/freertos/port/xtensa/port.c:301
#5 0x40385a90 in vPortEnterCritical (mux=0x3da22154) at /opt/esp-idf/components/freertos/port/xtensa/include/freertos/portmacro.h:578
#6 xQueueSemaphoreTake (xQueue=0x3da22100, xTicksToWait=<optimized out>) at /opt/esp-idf/components/freertos/queue.c:1563
#7 0x420bbf6d in esp_netif_list_lock () at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:51
#8 0x420bc14e in esp_netif_is_netif_listed (esp_netif=0x3da21c04) at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:156
#9 0x420bc6c4 in esp_netif_is_active (arg=0x3da21c04) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:189
#10 0x420bc6d4 in esp_netif_ip_lost_timer (arg=0x3da21c04) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:1031
#11 0x420cbe40 in sys_check_timeouts () at /opt/esp-idf/components/lwip/lwip/src/core/timeouts.c:411
#12 0x420c4cfd in tcpip_timeouts_mbox_fetch (mbox=0x3fcb69d0 <tcpip_mbox>, msg=0x3fceac40) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:104
#13 0x420c4dcd in tcpip_thread (arg=0x0) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:148
#14 0x40388aad in vPortTaskWrapper (pxCode=0x420c4dac <tcpip_thread>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
v3_dump_FFFFAC1754000690_1692168828.txt
Have you ever seen something like this?
We continue to investigate, we are still waiting to recover some devices which were triggering multiple issues.
i confirm that the main remaining crash is what i posted above
==================== CURRENT THREAD STACK =====================
#0 0x40376421 in panic_abort (details=0x3fccbbeb \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/panic.c:408
#1 0x40384828 in esp_system_abort (details=0x3fccbbeb \"assert failed: vQueueDelete queue.c:2131 (pxQueue)\") at /opt/esp-idf/components/esp_system/esp_system.c:137
#2 0x4038d554 in __assert_func (file=0x3c18425d \"queue.c\", line=<optimized out>, func=<optimized out>, expr=0x3c1842b0 \"pxQueue\") at /opt/esp-idf/components/newlib/assert.c:85
#3 0x40385d3d in vQueueDelete (xQueue=0x0) at /opt/esp-idf/components/freertos/queue.c:2131
#4 0x420bbfb6 in esp_netif_list_unlock () at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:60
#5 0x420bc0e0 in esp_netif_remove_from_list (netif=<optimized out>) at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:107
#6 0x420bd1c3 in esp_netif_destroy (esp_netif=0x3d806280) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:648
#7 0x420736c8 in mesh_netif_interface_clear (netif_p=0x3fcabde0 <self_priv+8>) at ../components/mesh/mesh_netif.c:76
#8 0x420736d9 in mesh_netif_interface_wifi () at ../components/mesh/mesh_netif.c:84
#9 0x42073b1b in mesh_netifs_stop () at ../components/mesh/mesh_netif.c:285
#10 0x420743d7 in mesh_handover_parent_disconnected (forced=false) at ../components/mesh/mesh_handover.c:489
#11 0x42072a34 in handle_message (param=<optimized out>, event=<optimized out>) at ../components/mesh/mesh.c:342
#12 0x420a0d46 in thread_process_events (handlers=0x3fccbf00, size=6, events=33554432, param=0x0) at ../components/core/utils/thread_utils.c:143
#13 0x42072525 in mesh_task (arg=0x0) at ../components/mesh/mesh.c:618
#14 0x40388aad in vPortTaskWrapper (pxCode=0x420724f8 <mesh_task>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
considering we don't handle the queues in netif, it must be some call ordering that creates the issue
@KonssnoK OK, we will try to debug it according to your logs
@zhangyanjiaoesp after the update to the latest v4 branch ( 8aa1395 ) we haven't seen anymore the crash related to ie. Good!
the total number of crashes has decreased significantly.
We still have some which are a bit strange, like
==================== CURRENT THREAD STACK ===================== #0 0x40376421 in panic_abort (details=0x3fcea9bb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/panic.c:408 #1 0x40384828 in esp_system_abort (details=0x3fcea9bb \"assert failed: spinlock_acquire spinlock.h:123 ((result == SPINLOCK_FREE) == (lock->count == 0))\") at /opt/esp-idf/components/esp_system/esp_system.c:137 #2 0x4038d554 in __assert_func (file=0x3c181b47 \"spinlock.h\", line=<optimized out>, func=<optimized out>, expr=0x3c182a44 \"(result == SPINLOCK_FREE) == (lock->count == 0)\") at /opt/esp-idf/components/newlib/assert.c:85 #3 0x40388ccc in spinlock_acquire (timeout=<optimized out>, lock=0x3da22154) at /opt/esp-idf/components/esp_hw_support/include/soc/spinlock.h:123 #4 xPortEnterCriticalTimeout (mux=0x3da22154, timeout=<optimized out>) at /opt/esp-idf/components/freertos/port/xtensa/port.c:301 #5 0x40385a90 in vPortEnterCritical (mux=0x3da22154) at /opt/esp-idf/components/freertos/port/xtensa/include/freertos/portmacro.h:578 #6 xQueueSemaphoreTake (xQueue=0x3da22100, xTicksToWait=<optimized out>) at /opt/esp-idf/components/freertos/queue.c:1563 #7 0x420bbf6d in esp_netif_list_lock () at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:51 #8 0x420bc14e in esp_netif_is_netif_listed (esp_netif=0x3da21c04) at /opt/esp-idf/components/esp_netif/esp_netif_objects.c:156 #9 0x420bc6c4 in esp_netif_is_active (arg=0x3da21c04) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:189 #10 0x420bc6d4 in esp_netif_ip_lost_timer (arg=0x3da21c04) at /opt/esp-idf/components/esp_netif/lwip/esp_netif_lwip.c:1031 #11 0x420cbe40 in sys_check_timeouts () at /opt/esp-idf/components/lwip/lwip/src/core/timeouts.c:411 #12 0x420c4cfd in tcpip_timeouts_mbox_fetch (mbox=0x3fcb69d0 <tcpip_mbox>, msg=0x3fceac40) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:104 #13 0x420c4dcd in tcpip_thread (arg=0x0) at /opt/esp-idf/components/lwip/lwip/src/api/tcpip.c:148 #14 0x40388aad in vPortTaskWrapper (pxCode=0x420c4dac <tcpip_thread>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
v3_dump_FFFFAC1754000690_1692168828.txt
Have you ever seen something like this?
We continue to investigate, we are still waiting to recover some devices which were triggering multiple issues.
@KonssnoK From this crash logs, we suspect that netif was deleted somewhere, please check the code where esp_netif_destroy
is called.
interface is destroyed only in the mesh_netif_interface_clear
function.
i guess in some cases there are pending netif tasks which are not finished when we destroy the interface.
something like:
which would mean that the lwip library is not protected
interface is destroyed only in the
mesh_netif_interface_clear
function. i guess in some cases there are pending netif tasks which are not finished when we destroy the interface.something like:
* start lwip timeouts check * destroy the netif * sys_check_timeouts
which would mean that the lwip library is not protected
@KonssnoK The mesh_netif_interface_clear
function is your own code, can you show us how you called the netif related function? Something like the following (code from idf/examples/mesh/ip_internal_network/main/mesh_netif.c
) :
@zhangyanjiaoesp we'll try to check where we call it. Meanwhile, we got the following
==================== CURRENT THREAD STACK =====================
#0 0x421365d5 in ?? ()
#1 0x42136c86 in cnx_csa_fn_process ()
#2 0x4213581b in ?? ()
#3 0x421359c4 in ieee80211_timer_do_process ()
#4 0x4038d870 in ppTask ()
#5 0x40388aad in vPortTaskWrapper (pxCode=0x4038d7c8 <ppTask>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
@zhangyanjiaoesp we'll try to check where we call it. Meanwhile, we got the following
==================== CURRENT THREAD STACK ===================== #0 0x421365d5 in ?? () #1 0x42136c86 in cnx_csa_fn_process () #2 0x4213581b in ?? () #3 0x421359c4 in ieee80211_timer_do_process () #4 0x4038d870 in ppTask () #5 0x40388aad in vPortTaskWrapper (pxCode=0x4038d7c8 <ppTask>, pvParameters=0x0) at /opt/esp-idf/components/freertos/port/xtensa/port.c:142
Do you have the elf
file of this crash issue?
@zhangyanjiaoesp mmm i could send it to the esp mail
@zhangyanjiaoesp who should i reference the mail to?
https://github.com/espressif/esp-idf/issues/11713#issuecomment-1652855935
@KonssnoK This comment has given the contact email.
@zhangyanjiaoesp done
@zhangyanjiaoesp well.. i had to resend the mail twice because :
Error: 550 5.0.350 One or more of the attachments in your email is of a file type that is NOT allowed by the recipient's organization. Message rejected by: TYZPR04MB5654.apcprd04.prod.outlook.com | Error: | 550 5.0.350 One or more of the attachments in your email is of a file type that is NOT allowed by the recipient's organization. | Message rejected by: | TYZPR04MB5654.apcprd04.prod.outlook.com
-- | -- | -- | -- | --
Error: | 550 5.0.350 One or more of the attachments in your email is of a file type that is NOT allowed by the recipient's organization.
how should i attach the files? i tried a
Answers checklist.
General issue report
Hello, we are going to use this issue to report multiple WIFI crashes our field devices are reporting. v4.4 based on 3cec3a0026098d1b027f2103ec154a15baf97318
Since then, only one commit on the WIFI library, so these errors should be all still there.
Our application uses