espressif / esp-iot-solution

Espressif IoT Library. IoT Device Drivers, Documentations And Solutions.
Apache License 2.0
1.88k stars 754 forks source link

使用usb_cdc_4g_module会死机 (AEGHB-395) #295

Closed Szeroy closed 11 months ago

Szeroy commented 12 months ago

IDF版本是5.1,使用usb_cdc_4g_module的例程去连接4g模块会死机,用的是合宙air780e这款。 我例程里面的代码写在自己的任务里面:

//! Modem itf: IN Addr:0x84, OUT Addr:0x02
void air780_task(void *arg)
{
    uint16_t retry = 0, time_out = 0;
    uint32_t get_type = 0;
    /* Initialize modem board. Dial-up internet */
    modem_config_t modem_config = MODEM_DEFAULT_CONFIG();
    /* Modem init flag, used to control init process */
#ifndef CONFIG_EXAMPLE_ENTER_PPP_DURING_INIT
    /* if Not enter ppp, modem will enter command mode after init */
    modem_config.flags |= MODEM_FLAGS_INIT_NOT_ENTER_PPP;
    /* if Not waiting for modem ready, just return after modem init */
    modem_config.flags |= MODEM_FLAGS_INIT_NOT_BLOCK;
#endif
    modem_board_init(&modem_config);
    vTaskDelay(15000 / portTICK_PERIOD_MS);

    esp_err_t err = ESP_FAIL;
    while (err == ESP_FAIL)
    {
        err = esp_modem_dce_generic_command(s_dce, "AT+CGNSPWR=1\r", MODEM_COMMAND_TIMEOUT_DEFAULT, esp_modem_open_gps, NULL);
    }
    err = ESP_FAIL;
    if (get_user_config()->imei[0] != 0)
    {
        ESP_LOGI(TAG, "imei = %s\n", get_user_config()->imei);
    }
    else
    {
        while (err == ESP_FAIL)
        {
            err = esp_modem_dce_generic_command(s_dce, "AT+CGSN\r", MODEM_COMMAND_TIMEOUT_DEFAULT, esp_modem_get_imei, NULL);
            vTaskDelay(2000 / portTICK_PERIOD_MS);
        }
    }

    int ber = 0;
    modem_board_get_signal_quality(&get_equipment_data()->rssi, &ber);
    while (1)
    {
        if (get_equipment_data_queue != NULL)
        {
            // gps_data_update = 1;
            xQueueReceive(get_equipment_data_queue, &get_type, portMAX_DELAY);
            if (get_type == GET_GPS_DATA)
            {
                modem_board_ppp_auto_connect(false);
                while (modem_board_ppp_stop(10000) != ESP_OK)
                {
                    vTaskDelay(1000 / portTICK_PERIOD_MS);
                }
                modem_board_get_signal_quality(&get_equipment_data()->rssi, &ber);
                vTaskDelay(200 / portTICK_PERIOD_MS);
                esp_err_t err = esp_modem_dce_generic_command(s_dce, "AT+CGNSINF\r", MODEM_COMMAND_TIMEOUT_DEFAULT, esp_modem_dce_handle_get_gps_data, NULL);
                if (err != ESP_OK)
                {
                    retry++;
                    if (retry == 20)
                    {
                        modem_board_force_reset();
                        ESP_LOGE(TAG, "get gps data failed,modem reset");
                        break;
                    }
                    else
                    {
                        ESP_LOGW(TAG, "get gps data failed,retry = %d", retry);
                    }
                    vTaskDelay(1000 / portTICK_PERIOD_MS);
                }
                if ((get_equipment_data()->latitude != 0) && (get_equipment_data()->longitude != 0))
                {
                    gps_data_update = 1;
                }
            }
        }

        if (get_type == START_PPP)
        {
            modem_board_ppp_auto_connect(true);
            modem_board_ppp_start(500);
        }
    }
}

但是执行到这两步的时候会死机:

 case STAGE_CHECK_SIGNAL:
    if (_check_signal_quality() != true)
    {
        retry_after_ms = 3000;
        ++stage_retry_times;
    }
    else
    {
        modem_stage = STAGE_CHECK_REGIST;
        goto _stage_succeed;
    }
    break;
case STAGE_CHECK_REGIST:
    if (_check_network_registration() != true)
    {
        retry_after_ms = 3000;
        ++stage_retry_times;
    }
    else
    {
        modem_stage = STAGE_START_PPP;
        goto _stage_succeed;
    }
    break;

有时候加上printf或者把esp_modem_process_command_done挪到字符串解析之后,就不会死机了,这个是原来的代码,没有改

static esp_err_t esp_modem_dce_common_handle_csq(esp_modem_dce_t *dce, const char *line)
{
    esp_err_t err = ESP_FAIL;
    if (strstr(line, MODEM_RESULT_CODE_SUCCESS)) {
        err = esp_modem_process_command_done(dce, ESP_MODEM_STATE_SUCCESS);
    } else if (strstr(line, MODEM_RESULT_CODE_ERROR)) {
        err = esp_modem_process_command_done(dce, ESP_MODEM_STATE_FAIL);
    }
    if (strstr(line, "+CSQ")) {
        /* store value of rssi and ber */
        esp_modem_dce_csq_ctx_t *csq = dce->handle_line_ctx;
        /* +CSQ: <rssi>,<ber> */
        sscanf(strstr(line, "+CSQ"), "%*s%d,%d", &csq->rssi, &csq->ber);
        err = ESP_OK;
    }
    return err;
}

有没有可能存在任务优先级的问题,在这里面看到任务优先级加3,但是好像没看到哪里调用了CONFIG_USBH_TASK_BASE_PRIORITY 这个宏定义:

#define MODEM_DEFAULT_CONFIG()\
    {                                \
        .rx_buffer_size = 1024*15,   \
        .tx_buffer_size = 1024*15,   \
        .line_buffer_size = 1600,    \
        .event_task_priority = CONFIG_USBH_TASK_BASE_PRIORITY + 3,\
        .event_task_stack_size = 3072\
    }

只使用例程下进去是正常的,但是用在项目中会不会跟多创建了几个任务有关系。

leeebo commented 12 months ago

@Szeroy 建议提供完整 LOG

Szeroy commented 11 months ago

@Szeroy 建议提供完整 LOG 基本上只在查询信号和运营商的时候会死机,如果直接注释掉这两个步骤,就过去了


I (11120) modem_board: reconnect after 5s...
I (12120) modem_board: reconnect after 4s...
I (13120) modem_board: reconnect after 3s...
I (14120) modem_board: reconnect after 2s...
I (15120) modem_board: reconnect after 1s...
I (15120) modem_board: Modem state STAGE_SYNC, Start
I (15149) modem_board: Network Auto reconnecting ...
I (15149) modem_board: Modem state STAGE_SYNC, Success!
W (15149) network_4g: [ Modem Board Event ]: Network disconnected
I (15249) modem_board: Modem state STAGE_CHECK_SIM, Start
I (15253) modem_board: SIM Card Ready
I (15253) modem_board: Modem state STAGE_CHECK_SIM, Success!
I (15253) network_4g: [ Modem Board Event ]: SIM Card Connected
I (15354) modem_board: Modem state STAGE_CHECK_SIGNAL, Start
Guru Meditation Error: Core  1 panic'ed (LoadProhibited). Exception was unhandled.

Core 1 register dump: PC : 0x400556c0 PS : 0x00060230 A0 : 0x8206d03e A1 : 0x3fcac770 0x400556c0: strlen in ROM

A2 : 0x00000016 A3 : 0x00000014 A4 : 0x000000ff A5 : 0x0000ff00 A6 : 0x00ff0000 A7 : 0xff000000 A8 : 0x820755be A9 : 0x00000000 A10 : 0x00000000 A11 : 0x00000005 A12 : 0x3fcac9b4 A13 : 0x00000000 A14 : 0x3fcac984 A15 : 0x00000001 SAR : 0x00000004 EXCCAUSE: 0x0000001c EXCVADDR: 0x00000014 LBEG : 0x40056fc5 LEND : 0x40056fe7 LCOUNT : 0xffffffff 0x40056fc5: memcpy in ROM

0x40056fe7: memcpy in ROM

Backtrace: 0x400556bd:0x3fcac770 |<-CORRUPTED 0x400556bd: strlen in ROM

ELF file SHA256: a4ed21ddf8cbd491

Rebooting... ESP-ROM:esp32s3-20210327 Build:Mar 27 2021 rst:0x3 (RTC_SW_SYS_RST),boot:0x18 (SPI_FAST_FLASH_BOOT) Saved PC:0x4037a102 0x4037a102: esp_cpu_wait_for_intr at E:/espV5.1/.espressif/frameworks/esp-idf-v5.1/components/esp_hw_support/cpu.c:121

SPIWP:0xee mode:DIO, clock div:1 load:0x3fce3818,len:0x16e4 load:0x403c9700,len:0x4 load:0x403c9704,len:0xc00 load:0x403cc700,len:0x2eb0 entry 0x403c9908 I (31) boot: ESP-IDF v5.1-dirty 2nd stage bootloader I (31) boot: compile time Sep 20 2023 10:17:18 I (31) boot: Multicore bootloader I (35) boot: chip revision: v0.1 I (39) boot.esp32s3: Boot SPI Speed : 80MHz I (43) boot.esp32s3: SPI Mode : DIO I (48) boot.esp32s3: SPI Flash Size : 4MB I (53) boot: Enabling RNG early entropy source... I (58) boot: Partition Table: I (62) boot: ## Label Usage Type ST Offset Length I (69) boot: 0 nvs WiFi data 01 02 00009000 00006000 I (77) boot: 1 phy_init RF data 01 01 0000f000 00001000 I (84) boot: 2 factory factory app 00 00 00010000 0016e360 I (92) boot: End of partition table I (96) esp_image: segment 0: paddr=00010020 vaddr=3c090020 size=302c0h (197312) map I (140) esp_image: segment 1: paddr=000402e8 vaddr=3fc98200 size=02b24h ( 11044) load I (142) esp_image: segment 2: paddr=00042e14 vaddr=40374000 size=0d204h ( 53764) load I (157) esp_image: segment 3: paddr=00050020 vaddr=42000020 size=8325ch (537180) map I (254) esp_image: segment 4: paddr=000d3284 vaddr=40381204 size=06f60h ( 28512) load I (269) boot: Loaded app from partition at offset 0x10000 I (269) boot: Disabling RNG early entropy source... I (280) cpu_start: Multicore app I (281) esp_psram: Found 2MB PSRAM device I (281) esp_psram: Speed: 40MHz I (282) cpu_start: Pro cpu up. I (285) cpu_start: Starting app cpu, entry point is 0x403758d8

Szeroy commented 11 months ago

@Szeroy 建议提供完整 LOG

请问有找到这个问题吗

leeebo commented 11 months ago

LoadProhibited EXCVADDR: 0x00000014 意思是内存加载错误,地址是 0x14。大概率是在读一个结构体空指针,触发异常。

example 代码你有更改吗?

Szeroy commented 11 months ago

LoadProhibited EXCVADDR: 0x00000014 意思是内存加载错误,地址是 0x14。大概率是在读一个结构体空指针,触发异常。

example 代码你有更改吗?

之前example是可以的,example初始化是在main里面的,现在我是把初始化新建了个单独任务,这个会不会存在问题。 现在比较怀疑的是,有没有可能存在发送at指令,没有等收到就直接去解析,类似于事件和任务的优先级问题。 目前把这个库里面_modem_daemon_task的优先级再往上加一点就不会死机了,现在不太确定具体是不是这个导致的。

esp_err_t modem_board_init(modem_config_t *config)
{
    MODEM_CHECK(s_modem_evt_hdl == NULL, "Modem already initialized", ESP_ERR_INVALID_STATE);
    ESP_LOGI(TAG, "iot_usbh_modem, version: %d.%d.%d", IOT_USBH_MODEM_VER_MAJOR, IOT_USBH_MODEM_VER_MINOR, IOT_USBH_MODEM_VER_PATCH);
    MODEM_CHECK(config != NULL && config->line_buffer_size && config->rx_buffer_size && config->tx_buffer_size, "Buffer size can not be 0", ESP_ERR_INVALID_ARG);
    MODEM_CHECK(config != NULL && config->event_task_priority && config->event_task_stack_size, "Task stack size can not be 0", ESP_ERR_INVALID_ARG);
    s_modem_evt_hdl = xEventGroupCreate();
    assert(s_modem_evt_hdl != NULL);
    // if set not enter ppp mode, daemon task will suspend
    if (config->flags & MODEM_FLAGS_INIT_NOT_ENTER_PPP)
    {
        modem_board_ppp_auto_connect(false);
    }
    /* Create Modem Daemon task */
    TaskHandle_t daemon_task_handle = NULL;
    xTaskCreate(_modem_daemon_task, "modem_daemon", config->event_task_stack_size, config, config->event_task_priority+3, &daemon_task_handle);
    assert(daemon_task_handle != NULL);
    xTaskNotifyGive(daemon_task_handle);
    // If auto enter ppp and block until ppp got ip
    if (((config->flags & MODEM_FLAGS_INIT_NOT_ENTER_PPP) == 0) && ((config->flags & MODEM_FLAGS_INIT_NOT_BLOCK) == 0))
    {
        xEventGroupWaitBits(s_modem_evt_hdl, PPP_NET_CONNECT_BIT, pdFALSE, pdTRUE, portMAX_DELAY);
    }
    return ESP_OK;
}
leeebo commented 11 months ago

@Szeroy 我已经在 IDF 5. 版本上复现了这个问题,IDF 4. 版本下正常,具体原因正在调查

Szeroy commented 11 months ago

@Szeroy 我已经在 IDF 5. 版本上复现了这个问题,IDF 4. 版本下正常,具体原因正在调查

好的,因为有问题改库稍微有点麻烦

leeebo commented 11 months ago

@Szeroy The bug has been fixed by https://github.com/espressif/esp-iot-solution/commit/62959339d02da792b1a86f5d39ac9017495132f3 (please upgrade to version v0.2.0 if you using idf component manager), and we also added air780e to the support list, you can choose it through menuconfig directly.

Szeroy commented 11 months ago

@Szeroy The bug has been fixed by https://github.com/espressif/esp-iot-solution/commit/62959339d02da792b1a86f5d39ac9017495132f3 (please upgrade to version v0.2.0 if you using idf component manager), and we also added air780e to the support list, you can choose it through menuconfig directly.

谢谢,我试一下