espressif / esp-idf

Espressif IoT Development Framework. Official development framework for Espressif SoCs.
Apache License 2.0
13.79k stars 7.31k forks source link

Nimble BLE Cached access crash (IDFGH-4052) #5923

Closed Francois-Belanger closed 1 month ago

Francois-Belanger commented 4 years ago

Environment

Problem Description

If I enable BLE Nimble and access the Flash a lot, I get this crash pretty quickly.

Without the BLE enabled, everything runs fine. When Enable the BLE is only advertising for now, no connection.

Code for BLE

Init:

 ESP_ERROR_CHECK(esp_nimble_hci_and_controller_init());

    nimble_port_init();
    // Initialize the NimBLE host configuration
    ble_hs_cfg.sync_cb = ble_on_sync;
    ble_hs_cfg.reset_cb = ble_on_reset;
    ble_hs_cfg.gatts_register_cb = gatt_svr_register_cb;
    int rc = gatt_svr_init();
    assert(rc == 0);

    rc = ble_svc_gap_device_name_set(device_name.c_str());  // Set the default device name
    assert(rc == 0);

    ble_att_set_preferred_mtu(256);

    // Start the task
    nimble_port_freertos_init(ble_host_task);

The rest that is important:

void BLETASK::ble_advertise(void) {
    struct ble_gap_adv_params adv_params;
    struct ble_hs_adv_fields fields;
    int rc;

    if (bleAdvertising) {
        return;
    }
    memset(&fields, 0, sizeof(fields));
    fields.flags = BLE_HS_ADV_F_DISC_GEN | BLE_HS_ADV_F_BREDR_UNSUP;
    fields.tx_pwr_lvl_is_present = 1;
    fields.tx_pwr_lvl = BLE_HS_ADV_TX_PWR_LVL_AUTO;

    fields.name = (uint8_t *)device_name.c_str();
    fields.name_len = device_name.size();
    fields.name_is_complete = 1;

    rc = ble_gap_adv_set_fields(&fields);
    if (rc != 0) {
        ESP_LOGE(TAG, "error setting advertisement data; rc=%d", rc);
        return;
    }

    /* Begin advertising */
    memset(&adv_params, 0, sizeof(adv_params));
    adv_params.conn_mode = BLE_GAP_CONN_MODE_UND;
    adv_params.disc_mode = BLE_GAP_DISC_MODE_GEN;
    rc = ble_gap_adv_start(ble_addr_type, NULL, BLE_HS_FOREVER, &adv_params, ble_gap_event, NULL);
    if (rc != 0) {
        ESP_LOGE(TAG, "error enabling advertisement; rc=%d", rc);
        return;
    }
    bleAdvertising = true;
}

int BLETASK::ble_gap_event(struct ble_gap_event *event, void *arg) {
    switch (event->type) {
        case BLE_GAP_EVENT_CONNECT:
            /* A new connection was established or a connection attempt failed */
            ESP_LOGI(TAG, "connection %s; status=%d, handle: %d", event->connect.status == 0 ? "established" : "failed", event->connect.status, event->connect.conn_handle);

            if (event->connect.status != 0) {
                /* Connection failed; resume advertising */
                bleTask->ble_advertise();
                bleTask->bleConnectedToHost = false;
            } else {
                bleTask->bleConnectedToHost = true;
                bleTask->bleAdvertising = false;
            }
            bleTask->conn_handle = event->connect.conn_handle;
            bleTask->g_spp_conn_handle = event->connect.conn_handle;
            break;

        case BLE_GAP_EVENT_DISCONNECT:
            ESP_LOGI(TAG, "disconnect; reason=%d", event->disconnect.reason);
            bleTask->conn_handle = -1;
            /* Connection terminated; resume advertising */
            bleTask->bleConnectedToHost = false;
            bleTask->currentMTUsize = 20;
            bleTask->ble_advertise();
            break;

        case BLE_GAP_EVENT_ADV_COMPLETE:
            ESP_LOGI(TAG, "adv complete");
            bleTask->ble_advertise();
            break;

        case BLE_GAP_EVENT_NOTIFY_TX:
            // ESP_LOGI(TAG, "BLE_GAP_EVENT_NOTIFY_TX, STATUS: %d", event->notify_tx.status);
            bleTask->notify(TX_DONE);
            break;

        case BLE_GAP_EVENT_MTU:
            ESP_LOGI(TAG, "mtu update event; conn_handle=%d mtu=%d", event->mtu.conn_handle, event->mtu.value);
            if (event->mtu.value > 20) {
                bleTask->currentMTUsize = event->mtu.value - 10;
            } else {
                bleTask->currentMTUsize = event->mtu.value;
            }
            break;
    }

    return 0;
}

void BLETASK::ble_on_sync(void) {
    int rc;

    rc = ble_hs_id_infer_auto(0, &bleTask->ble_addr_type);
    assert(rc == 0);

    uint8_t addr_val[6] = {0};
    rc = ble_hs_id_copy_addr(bleTask->ble_addr_type, addr_val, NULL);

    // ESP_LOGI(TAG, "Device Address: ");
    // print_addr(addr_val);
    // ESP_LOGI(TAG, "");

    /* Begin advertising */
    bleTask->ble_advertise();
}

void BLETASK::ble_on_reset(int reason) {
    bleTask->bleConnectedToHost = false;
    MODLOG_DFLT(ERROR, "Resetting state; reason=%d", reason);
}

void BLETASK::ble_host_task(void *param) {
    ESP_LOGI(TAG, "BLE Host Task Started");
    /* This function will return only when nimble_port_stop() is executed */
    nimble_port_run();

    nimble_port_freertos_deinit();
}

int BLETASK::gatt_svr_chr_access_uart_write(uint16_t conn_handle, uint16_t attr_handle, struct ble_gatt_access_ctxt *ctxt, void *arg) {
    struct os_mbuf *om = ctxt->om;

    switch (ctxt->op) {
        case BLE_GATT_ACCESS_OP_WRITE_CHR:
            while (om) {
                // ets_printf("gatt_svr_chr_access_uart_write : ");
                for (size_t i = 0; i < om->om_len; i++) {
                    // ets_printf("%02X ", om->om_data[i]);
                    if (bleTask->cali.process_byte(om->om_data[i])) {
                        // Payload is valid...
                    }
                }
                // ets_printf("\n");
                om = SLIST_NEXT(om, om_next);
            }
            return 0;
        default:
            assert(0);
            return BLE_ATT_ERR_UNLIKELY;
    }
}

void BLETASK::gatt_svr_register_cb(struct ble_gatt_register_ctxt *ctxt, void *arg) {
    char buf[BLE_UUID_STR_LEN];

    switch (ctxt->op) {
        case BLE_GATT_REGISTER_OP_SVC:
            ESP_LOGI(TAG, "registered service %s with handle=%d", ble_uuid_to_str(ctxt->svc.svc_def->uuid, buf), ctxt->svc.handle);
            break;

        case BLE_GATT_REGISTER_OP_CHR:
            ESP_LOGI(TAG,
                     "registering characteristic %s with "
                     "def_handle=%d val_handle=%d",
                     ble_uuid_to_str(ctxt->chr.chr_def->uuid, buf),
                     ctxt->chr.def_handle,
                     ctxt->chr.val_handle);
            break;

        case BLE_GATT_REGISTER_OP_DSC:
            ESP_LOGI(TAG, "registering descriptor %s with handle=%d", ble_uuid_to_str(ctxt->dsc.dsc_def->uuid, buf), ctxt->dsc.handle);
            break;

        default:
            assert(0);
            break;
    }
}

int BLETASK::gatt_svr_init(void) {
    int rc;

    extern ble_uuid128_t gatt_svr_svc_uart_uuid;
    extern ble_uuid128_t gatt_svr_chr_uart_write_uuid;
    extern ble_uuid128_t gatt_svr_chr_uart_read_uuid;

#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wmissing-field-initializers"

    const struct ble_gatt_chr_def gatt_chr[] = {
        {
            .uuid = &gatt_svr_chr_uart_read_uuid.u,
            .access_cb = gatt_svr_chr_access_uart_write,
            .flags = BLE_GATT_CHR_F_NOTIFY,
            .val_handle = &g_bleuart_attr_read_handle,
        },
        {
            /* Characteristic: Write */
            .uuid = &gatt_svr_chr_uart_write_uuid.u,
            .access_cb = gatt_svr_chr_access_uart_write,
            .flags = BLE_GATT_CHR_F_WRITE | BLE_GATT_CHR_F_WRITE_NO_RSP,
            .val_handle = &g_bleuart_attr_write_handle,
        },
        {
            0, /* No more characteristics in this service */
        },
    };

    const struct ble_gatt_svc_def gatt_svr_svcs[] = {
        {
            /* Service: uart */
            .type = BLE_GATT_SVC_TYPE_PRIMARY,
            .uuid = &gatt_svr_svc_uart_uuid.u,
            .characteristics = gatt_chr,
        },
        {
            0, /* No more services */
        },
    };
#pragma GCC diagnostic pop

    ble_svc_gap_init();
    ble_svc_gatt_init();

    rc = ble_gatts_count_cfg(gatt_svr_svcs);
    if (rc != 0) {
        return rc;
    }

    rc = ble_gatts_add_svcs(gatt_svr_svcs);
    if (rc != 0) {
        return rc;
    }

    return 0;
}

Debug Logs

Re-enable cpu cache.
Guru Meditation Error: Core  0 panic'ed (Cache disabled but cached memory region accessed). 

Core  0 register dump:
PC      : 0x4009772c  PS      : 0x00060c34  A0      : 0x80082230  A1      : 0x3ffbe610  
0x4009772c: xQueueReceiveFromISR at /esp-idf/components/freertos/queue.c:1622 (discriminator 1)

A2      : 0x3f940e50  A3      : 0x00000000  A4      : 0x3ffbe650  A5      : 0xbad00bad  
A6      : 0x00000001  A7      : 0x00000000  A8      : 0x01eb1365  A9      : 0x3ff5f078  
A10     : 0x3ff5f07c  A11     : 0x3ffb2dec  A12     : 0x00000655  A13     : 0x3f42ff10  
A14     : 0x40087bf0  A15     : 0x3ffc59d0  SAR     : 0x0000001f  EXCCAUSE: 0x00000007  
0x40087bf0: r_lld_evt_schedule at ??:?

EXCVADDR: 0x00000000  LBEG    : 0x400847c5  LEND    : 0x400847cd  LCOUNT  : 0x00000026  
0x400847c5: esp_timer_impl_get_counter_reg at /esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:126 (discriminator 2)

0x400847cd: esp_timer_impl_get_counter_reg at /esp-idf/components/esp_timer/src/esp_timer_impl_lac.c:136 (discriminator 1)

Backtrace:0x40097729:0x3ffbe610 0x4008222d:0x3ffbe630 0x4008ce4a:0x3ffbe650 0x4008d1cc:0x3ffbe690 0x4008cdde:0x3ffbe6d0 0x4008223d:0x3ffbe6f0 0x40087eae:0x3ffbe710 0x40085a7e:0x3ffbe750 0x4008a2bd:0x3ffbe770 0x4008acc7:0x3ffbe790 0x40084cbd:0x3ffbe7b0 0x40098096:0x3ffbd3f0 0x400902de:0x3ffbd410 0x4008bf25:0x3ffbd430 0x40123621:0x3ffbd470 0x400850dd:0x3ffbd490 0x4011eccd:0x3ffbd4b0 0x4011faf7:0x3ffbd4e0 0x40120ca2:0x3ffbd530 0x400ddb71:0x3ffbd570 0x400dcc66:0x3ffbd590 0x400dce7e:0x3ffbd5c0 0x400dd041:0x3ffbd5f0 0x400db05a:0x3ffbd660
0x40097729: xQueueReceiveFromISR at /esp-idf/components/freertos/queue.c:1622 (discriminator 1)

0x4008222d: semphr_take_from_isr_wrapper at/esp-idf/components/esp_wifi/esp32/esp_adapter.c:271

0x4008ce4a: coex_bb_reset_unlock at ??:?

0x4008d1cc: coex_core_request at ??:?

0x4008cdde: coex_bt_request at ??:?

0x4008223d: coex_bt_request_wrapper at /esp-idf/components/esp_wifi/esp32/esp_adapter.c:501

0x40087eae: r_lld_evt_schedule at ??:?

0x40085a7e: r_ea_finetimer_isr at ??:?

0x4008a2bd: r_rwbt_isr at ??:?

0x4008acc7: r_rwbtdm_isr_wrapper at intc.c:?

0x40084cbd: _xt_lowint1 at /esp-idf/components/freertos/xtensa/xtensa_vectors.S:1105

0x40098096: spi_flash_ll_set_cs_pin at /esp-idf/components/hal/esp32/include/hal/spi_flash_ll.h:243
 (inlined by) spi_flash_hal_device_config at /esp-idf/components/hal/spi_flash_hal_common.inc:40

0x400902de: spiflash_start_default at /esp-idf/components/spi_flash/esp_flash_api.c:106

0x4008bf25: esp_flash_read at /esp-idf/components/spi_flash/esp_flash_api.c:576

0x40123621: esp_partition_read at /esp-idf/components/spi_flash/partition.c:347

0x400850dd: flash_read at /build/../main/esp_littlefs.c:51

0x4011eccd: lfs_bd_read at /build/../components/littlefs-master/lfs.c:84
 (inlined by) lfs_bd_read at /build/../components/littlefs-master/lfs.c:27

0x4011faf7: lfs_ctz_find at /build/../components/littlefs-master/lfs.c:2161

0x40120ca2: lfs_file_read at /build/../components/littlefs-master/lfs.c:2774

0x400ddb71: esp_lfs_file_read at /build/../main/esp_littlefs.c:257

0x400dcc66: AUDIOTASK::updateSeq(AUDIOSEQ&) at /build/../main/audio_Task.cpp:216

0x400dce7e: AUDIOTASK::tick() at /build/../main/audio_Task.cpp:291 (discriminator 2)

0x400dd041: AUDIOTASK::run(void*) at /build/../main/audio_Task.cpp:45

0x400db05a: Task::runTask(void*) at /build/../main/Task.cpp:60

sdkconfig.zip

prasad-alatkar commented 4 years ago

Hi @Francois-Belanger Can you please take a look at similar issues https://github.com/espressif/esp-idf/issues/1299 and https://github.com/espressif/arduino-esp32/issues/855 for solution? Please let me know if you still face any issue.

Francois-Belanger commented 4 years ago

Hi @prasad-alatkar I don't use any interrupt in my code. Does the Nimble use any on the client code ? In the Nimble Example I didn't see anything with IRAM so I guess all callback are not in interrupt ? Thanks

Francois-Belanger commented 4 years ago

Hummm issue #5924 or #5834 ? Looks similar. I didn't have these issues before, this is from the 4.3 rev.

Francois-Belanger commented 4 years ago

Ok, if I disable Software controls WiFi/Bluetooth coexistence everything looks stable. @prasad-alatkar can you help please. Looks like task priority ?

Thanks

prasad-alatkar commented 4 years ago

@Francois-Belanger, Can you please try with disabling SPI_MASTER_ISR_IN_IRAM (make menuconfig → Component config → Driver configurations → SPI configuration) from menuconfig once ? I could see from the sdkconfig CONFIG_BT_NIMBLE_MEM_ALLOC_MODE_EXTERNAL is enabled in your case and following up few different threads related to the similar issues looks like disabling SPI_MASTER_ISR_IN_IRAM may help you here (Reference: https://www.esp32.com/viewtopic.php?t=7684 ).

if I disable Software controls WiFi/Bluetooth coexistence everything looks stable

I will look into it and update/explain you accordingly. Meanwhile please let me know if the above suggestion helps you overcome the issue.

Francois-Belanger commented 4 years ago

Hi @prasad-alatkar I've tried disabling SPI_MASTER_ISR_IN_IRAM, no improvements, still crash with coex in software :-( What is the penalty in speed to use the hardware coex ?

Thanks

prasad-alatkar commented 4 years ago

As per my understanding, with hardware coex control BLE performance won't be affected at all, there is chance of little degradation in wifi performance but given that your BLE application is only advertising, I believe it should give enough window for wifi operations. You can refer detailed explanation on this topic here: WIFI/BLE Simultaneously and here: ESP32 FAQ (Refer 5.3.2)

To narrow down the issue further, Can you please try to enable CONFIG_NIMBLE_MEM_ALLOC_MODE_INTERNAL (and disable CONFIG_NIMBLE_MEM_ALLOC_MODE_EXTERNAL) and see if you still observe the issue ? Please share the results.

Francois-Belanger commented 4 years ago

Hi @prasad-alatkar putting Nimble Internal Memory and putting Coex Soft crash again. :-(

blueMoodBHD commented 2 months ago

@Francois-Belanger, Sorry for the late reply. Does this issue still exist? Please update the IDF and try again.

Alvin1Zhang commented 1 month ago

Thanks for reporting, will close due to short of feedback, feel free to reopen with more updates.