espressif / esp-idf

Espressif IoT Development Framework. Official development framework for Espressif SoCs.
Apache License 2.0
13.45k stars 7.25k forks source link

Coredump failed on overflowed tasks due to excessive stack usage of panic handler (IDFGH-12163) #13219

Open andylinpersonal opened 7 months ago

andylinpersonal commented 7 months ago

Title

Core dump failed on overflowed tasks due to excessive stack usage of panic handler

IDF version.

release/v5.1 7380f96 release/v5.2 93ea06f master c460e1c

Espressif SoC revision.

esp32s3: v0.1 esp32c3: v0.4 esp32c6: v0.0

Operating System used.

Linux

How did you build your project?

Command line with idf.py

Development Kit.

esp32c3: LuatOS CORE-ESP32-C3 (custom esp32-c3 with DIO mode flash) esp32c6: esp32-c6-devkitc-1-n8 esp32s3: esp32-s3-devkitc-1-n32r8v

Power Supply used.

USB

What is the expected behavior?

Core dump should be reliably performed, even for the overflowed tasks.

Following is the normal core dump flow:

  1. [Exception occurred]
  2. Run the exception handler on the current stack.
  3. Fatal exception -> invoke panic handler.
  4. If core dump is enabled, invoke the core dump function.
  5. If core dump stack is enabled, swap the stack to the pre-allocated one.
  6. Perform real core dump stuff.
  7. Swap back the stack to the panic handler's one.
  8. Reset.

What is the actual behavior?

Steps to reproduce.

Debug Logs.

More Information.

main/main.c

#include <driver/usb_serial_jtag.h>
#include <esp_vfs_dev.h>
#include <esp_idf_version.h>

#include <freertos/FreeRTOS.h>
#include <freertos/task.h>

#include <stdio.h>
#include <string.h>

#define ENABLE_RESERVED_SPACE 1

#if ENABLE_RESERVED_SPACE
#if ESP_IDF_VERSION_MAJOR == 5 && ESP_IDF_VERSION_MINOR == 1

#ifdef CONFIG_IDF_TARGET_ESP32C3
#if CONFIG_COMPILER_OPTIMIZATION_NONE
#define RESERVED_BYTES 896
#elif CONFIG_COMPILER_OPTIMIZATION_DEFAULT
#define RESERVED_BYTES 640
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
#define RESERVED_BYTES 640
#endif
#endif

#ifdef CONFIG_IDF_TARGET_ESP32S3
#if CONFIG_COMPILER_OPTIMIZATION_NONE
// Stucked during core dump
#define RESERVED_BYTES 1536
#elif CONFIG_COMPILER_OPTIMIZATION_DEFAULT
#define RESERVED_BYTES 1024
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
#define RESERVED_BYTES 1024
#endif
#endif

#elif ESP_IDF_VERSION_MAJOR == 5 && ESP_IDF_VERSION_MINOR == 2

#if CONFIG_IDF_TARGET_ESP32C6
#if CONFIG_COMPILER_OPTIMIZATION_NONE
#define RESERVED_BYTES 896
#elif CONFIG_COMPILER_OPTIMIZATION_DEBUG
#define RESERVED_BYTES 640
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
#define RESERVED_BYTES 640
#endif
#endif

#ifdef CONFIG_IDF_TARGET_ESP32S3
#if CONFIG_COMPILER_OPTIMIZATION_NONE
// Stucked during core dump
#define RESERVED_BYTES 1536
#elif CONFIG_COMPILER_OPTIMIZATION_DEBUG
// Stucked during core dump
#define RESERVED_BYTES 1024
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
// Stucked during core dump
#define RESERVED_BYTES 1024
#endif
#endif

#elif ESP_IDF_VERSION_MAJOR == 5 && ESP_IDF_VERSION_MINOR == 3

#if CONFIG_IDF_TARGET_ESP32C3
#if CONFIG_COMPILER_OPTIMIZATION_NONE
#define RESERVED_BYTES 896
#elif CONFIG_COMPILER_OPTIMIZATION_DEBUG
#define RESERVED_BYTES 640
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
#define RESERVED_BYTES 640
#endif
#endif

#if CONFIG_IDF_TARGET_ESP32C6
#if CONFIG_COMPILER_OPTIMIZATION_NONE
#define RESERVED_BYTES 896
#elif CONFIG_COMPILER_OPTIMIZATION_DEBUG
#define RESERVED_BYTES 640
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
#define RESERVED_BYTES 640
#endif
#endif

#ifdef CONFIG_IDF_TARGET_ESP32S3
#if CONFIG_COMPILER_OPTIMIZATION_NONE
// Stucked during core dump
#define RESERVED_BYTES 1536
#elif CONFIG_COMPILER_OPTIMIZATION_DEBUG
// Stucked during core dump
#define RESERVED_BYTES 1024
#elif CONFIG_COMPILER_OPTIMIZATION_SIZE
// Stucked during core dump
#define RESERVED_BYTES 1024
#endif
#endif

#endif // ESP_IDF_VERSION_MAJOR == 5 && ESP_IDF_VERSION_MINOR == 3
#else  // !ENABLE_RESERVED_SPACE
#define RESERVED_BYTES 0
#endif // ENABLE_RESERVED_SPACE

#define CANARY_WP_SIZE 64
#define STACK_BYTES 3072

StaticTask_t task_tcb;
TaskHandle_t task_handle = NULL;
/**
 * Reserve RESERVED_BYTES bytes of extra space for panic handler and espcoredump before replacing the stack
 * Add COREDUMP_DRAM_ATTR to save the whole stack to coredump.
 */
COREDUMP_DRAM_ATTR __attribute__((aligned(CANARY_WP_SIZE))) StackType_t task_stack[STACK_BYTES + RESERVED_BYTES];

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Winfinite-recursion"
__attribute__((noreturn)) NOINLINE_ATTR void crashme(void) { crashme(); }
#pragma GCC diagnostic pop

void task_func(void *) {
  fprintf(stderr, "Previous frame: %p:%p\n", __builtin_extract_return_addr(__builtin_return_address(0)), esp_cpu_get_sp());

  char buf[32] = {};
  fputs("$ \n", stderr);
  while (true) {
    if (fgets(buf, sizeof(buf), stdin)) {
      fprintf(stderr, "$ %s", buf);
      if (strnstr(buf, "crash", sizeof(buf))) {
        break;
      }
    }

    memset(buf, 0, sizeof(buf));
    vTaskDelay(pdMS_TO_TICKS(1000));
  }

  // Should be triggered on S3 when the reserved space exhausted
#if RESERVED_BYTES >= CANARY_WP_SIZE && ENABLE_RESERVED_SPACE
  assert(esp_cpu_set_watchpoint(0, &task_stack, CANARY_WP_SIZE,  ESP_CPU_WATCHPOINT_STORE) == ESP_OK);
#endif
  crashme();
}

void app_main(void) {
  // Disable buffering on stdin
  setvbuf(stdin, NULL, _IONBF, 0);

  // Install USB-SERIAL-JTAG driver for interrupt-driven reads and writes
  usb_serial_jtag_driver_config_t usb_serial_jtag_config =
      USB_SERIAL_JTAG_DRIVER_CONFIG_DEFAULT();
  assert(usb_serial_jtag_driver_install(&usb_serial_jtag_config) == ESP_OK);

  // Tell vfs to use usb-serial-jtag driver
  esp_vfs_usb_serial_jtag_use_driver();

  fprintf(stderr, "stack@%p-%p\n", &task_stack[0], (const char*)&task_stack[0] + sizeof(task_stack));

  task_handle = xTaskCreateStaticPinnedToCore(
      task_func, "outer", STACK_BYTES, NULL, CONFIG_PTHREAD_TASK_PRIO_DEFAULT, &task_stack[RESERVED_BYTES], &task_tcb, 0);
}

main/CMakeLists.txt

idf_component_register(SRCS "main.c")

CMakeLists.txt

cmake_minimum_required(VERSION 3.16)
include($ENV{IDF_PATH}/tools/cmake/project.cmake)
project(tester)

sdkconfig.defaults

CONFIG_ESP_CONSOLE_USB_SERIAL_JTAG=y
CONFIG_FREERTOS_WATCHPOINT_END_OF_STACK=y

CONFIG_ESP32_ENABLE_COREDUMP_TO_FLASH=y

# Optionally enable this to print more details.
# CONFIG_LOG_DEFAULT_LEVEL_DEBUG=y

# -O0
CONFIG_COMPILER_OPTIMIZATION_NONE=y
# -Og (Default)
# CONFIG_COMPILER_OPTIMIZATION_DEBUG=y
# CONFIG_COMPILER_OPTIMIZATION_DEFAULT=y
# -Os
# CONFIG_COMPILER_OPTIMIZATION_SIZE=y

# CONFIG_COMPILER_OPTIMIZATION_ASSERTIONS_ENABLE=y

# Enable and allocate a dedicated stack for coredump code to handle the overflowed stacks safely.
CONFIG_ESP_COREDUMP_STACK_SIZE=1536
CONFIG_ESP32_CORE_DUMP_STACK_SIZE=1536
andylinpersonal commented 7 months ago

Extra log files: logs.tar.gz

andylinpersonal commented 7 months ago

Additional note for ESP32-S3 with -O0:

  1. Coredump cannot even continue with enlarged stack
    • With a bit of esp_rom_printf, it seems like S3 hanged near esp_core_dump_replace_sp() and xthal_window_spill(). At least esp_rom_printf cannot print anything anymore after xthal_window_spill().
andylinpersonal commented 6 months ago

test.tar.gz

andylinpersonal commented 6 months ago

Any update? Thanks.

erhankur commented 6 months ago

@andylinpersonal Thanks for reporting the issue. We have some findings regarding stack replacement on both xtensa and riscv chips. I will wrap up what we did and explain here next week.

Regarding overflow scenarios, I am sorry, I couldn't find a time to investigate yet. Will check it soon.

AxelLin commented 3 months ago

@erhankur Any follow up of https://github.com/espressif/esp-idf/issues/13219#issuecomment-2027991578 ?