zephyrproject-rtos / zephyr

Primary Git Repository for the Zephyr Project. Zephyr is a new generation, scalable, optimized, secure RTOS for multiple hardware architectures.
https://docs.zephyrproject.org
Apache License 2.0
10.55k stars 6.46k forks source link

Bluetooth stops responding when calling k_delayed_work_submit. v2.3.0 #27482

Closed ghost closed 4 years ago

ghost commented 4 years ago

Describe the bug

Using k_delayed_work causes Bluetooth to stop responding.

k_delayed_work_init(&leds_update_work, leds_update);
k_delayed_work_submit(&leds_update_work, K_MSEC(LEDS_UPDATE_DELAY));
*** Booting Zephyr OS build zephyr-v2.3.0  ***
Bluetooth initialized
Advertising successfully started
[00:00:00.405,029] <inf> bt_hci_core: HW Platform: Nordic Semiconductor (0x0002)
[00:00:00.405,029] <inf> bt_hci_core: HW Variant: nRF52x (0x0002)
[00:00:00.405,059] <inf> bt_hci_core: Firmware: Standard Bluetooth controller (0x00) Version 2.3 Build 0
[00:00:00.405,670] <inf> bt_hci_core: Identity: ce:d0:45:2a:a4:65 (random)
[00:00:00.405,670] <inf> bt_hci_core: HCI: version 5.2 (0x0b) revision 0x0000, manufacturer 0x05f1
[00:00:00.405,670] <inf> bt_hci_core: LMP: version 5.2 (0x0b) subver 0xffff
Connected
[00:00:18.570,587] <wrn> bt_att: Ignoring unexpected request
[00:00:29.640,686] <err> bt_conn: not connected!
[00:00:29.640,716] <err> os: ***** BUS FAULT *****
[00:00:29.640,716] <err> os:   Imprecise data bus error
[00:00:29.640,747] <err> os: r0/a1:  0xfffffffc  r1/a2:  0x00000008  r2/a3:  0x0001c439
[00:00:29.640,747] <err> os: r3/a4:  0x20002c64 r12/ip:  0xffffffff r14/lr:  0x0001c3ab
[00:00:29.640,747] <err> os:  xpsr:  0x21000000
[00:00:29.640,747] <err> os: Faulting instruction address (r15/pc): 0x0001c3b8
[00:00:29.640,747] <err> os: >>> ZEPHYR FATAL ERROR 0: CPU exception on CPU 0
[00:00:29.640,747] <err> os: Current thread: 0x20001218 (unknown)
[00:00:30.267,547] <err> os: Halting system

It works fine unless you use k_delayed_work_init and k_delayed_work_submit.

Environment (please complete the following information):

Best regards.

carlescufi commented 4 years ago

@github-deden could you please share the contents of the leds_update_work function? you are not blocking there are you? Also, what are you using on the other side, is it Zephyr too?

Vudentz commented 4 years ago

Can't find any leds_update_work, is that some out of tree code? In that case that is obviously not a bug as it can't be reproduced upstream.

What can be happening is k_delayed_work_init when the work has already been scheduled causing the struct to be cleared, note that k_delayed_work_init is supposed to be called only once not everytime k_delayed_work_submit is called.

ghost commented 4 years ago

@carlescufi The other side is using iOS(LightBlue apps). @Vudentz leds_update_work added by myself.

It seemed to affect Bluetooth by calling k_delayed_work_init and k_delayed_work_submit. But If you're told it's not a big deal, it might be.

##########
# Bluetooth Config

CONFIG_BT=y
CONFIG_BT_DEBUG_LOG=y
CONFIG_BT_SMP=y
CONFIG_BT_PERIPHERAL=y
CONFIG_BT_DEVICE_NAME="Nordic Blue"
CONFIG_BT_DEVICE_APPEARANCE=833
/* main.c - Application main entry point */

/*
 * Copyright (c) 2015-2016 Intel Corporation
 *
 * SPDX-License-Identifier: Apache-2.0
 */

#include <zephyr/types.h>
#include <stddef.h>
#include <string.h>
#include <errno.h>
#include <sys/printk.h>
#include <sys/byteorder.h>
#include <zephyr.h>
#include <drivers/gpio.h>
#include <soc.h>

#include <bluetooth/bluetooth.h>
#include <bluetooth/hci.h>
#include <bluetooth/conn.h>
#include <bluetooth/uuid.h>
#include <bluetooth/gatt.h>

#include <settings/settings.h>

#include "services/ble_mpu.h"

//bt
static struct bt_conn* p_conn = NULL;

//BLE Advertise
static const struct bt_data ad[] = {
    BT_DATA_BYTES(BT_DATA_FLAGS, (BT_LE_AD_GENERAL | BT_LE_AD_NO_BREDR)),
    BT_DATA_BYTES(BT_DATA_UUID128_ALL, IMU_SERVICE_UUID),
};

/* LED */
/* The devicetree node identifier for the "led0" alias. */
#define LED0_NODE DT_ALIAS(led0)

#if DT_NODE_HAS_STATUS(LED0_NODE, okay)
#define LED0    DT_GPIO_LABEL(LED0_NODE, gpios)
#define PIN    DT_GPIO_PIN(LED0_NODE, gpios)
#if DT_PHA_HAS_CELL(LED0_NODE, gpios, flags)
#define FLAGS    DT_GPIO_FLAGS(LED0_NODE, gpios)
#endif
#else
/* A build error here means your board isn't set up to blink an LED. */
#error "Unsupported board: led0 devicetree alias is not defined"
#define LED0    ""
#define PIN    0
#endif

#ifndef FLAGS
#define FLAGS    0
#endif

static struct device* led_dev;

/* Interval in milliseconds between each time status LEDs are updated. */
#define LEDS_UPDATE_DELAY            500

/* Structures for work */
static struct k_delayed_work leds_update_work;

/**@brief Update LEDs state. */

static void led_one_shot(u8_t ms)
{
    gpio_pin_set(led_dev, PIN, 1);
    k_sleep(K_MSEC(ms));
    gpio_pin_set(led_dev, PIN, 0);
}

static void leds_update(struct k_work *work)
{

    // flash  LED
    while(true){
      led_one_shot(25);
      k_sleep(K_MSEC(5000));
    }

}

static void received_cb(struct bt_conn *conn,
                          const u8_t *const data, u16_t len)
{
    ;;
}

static struct bt_gatt_mpu_cb mpu_cb = {
     .received_cb = received_cb,
};

static void connected(struct bt_conn *conn, u8_t err)
{
    if (!err) {
        printk("Connected\n");
        p_conn = bt_conn_ref(conn);
    }
}

static void disconnected(struct bt_conn *conn, u8_t reason)
{
    printk("Disconnected (reason 0x%02x)\n", reason);

    if(p_conn){
        bt_conn_unref(p_conn);
        p_conn = NULL;
    }
}

static struct bt_conn_cb conn_callbacks = {
    .connected = connected,
    .disconnected = disconnected,
};

static void bt_ready(void)
{
    int err;
    printk("Bluetooth initialized\n");

    err = ble_mpu_init(&mpu_cb);

    if (err) {
        printk("Failed to init IMUS (err:%d)\n", err);
        return;
    }

    if (IS_ENABLED(CONFIG_SETTINGS)) {
         settings_load();
    }

    err = bt_le_adv_start(BT_LE_ADV_CONN_NAME, ad, ARRAY_SIZE(ad), NULL, 0);

    if (err) {
        printk("Advertising failed to start (err %d)\n", err);
        return;
    }

    printk("Advertising successfully started\n");
}

static void work_init(void)
{

    k_delayed_work_init(&leds_update_work, leds_update);
    k_delayed_work_submit(&leds_update_work, K_MSEC(LEDS_UPDATE_DELAY));
}

/**@brief Initializes buttons and LEDs, using the DK buttons and LEDs
 * library.
 */
static void leds_init(void)
{
    int ret;

    led_dev = device_get_binding(LED0);
    if (led_dev == NULL) {
        return;
    }

    ret = gpio_pin_configure(led_dev, PIN, GPIO_OUTPUT_ACTIVE | FLAGS);
    if (ret < 0) {
        return;
    }
}

static void button_init(void)
{
    ;;
}

void main(void)
{

//    k_sleep(K_SECONDS(10));

    button_init();
    leds_init();

    led_one_shot(200);

    // It works by commenting out here.
    work_init();

    int err;
    err = bt_enable(NULL);

    if(err){
        printk("Bluetooth init failed (err %d)\n", err);
        return;
    }

    bt_ready();
    bt_conn_cb_register(&conn_callbacks);

    u8_t val = 0x00;

    while ( true ) {
        k_sleep(K_SECONDS(1));
        if(p_conn != NULL){
            if(ble_mpu_is_notify()){
                val++;
                ble_mpu_notify(p_conn, &val, sizeof(val));
            }
        }
    }

}
Vudentz commented 4 years ago

You can't have a endless loop, or anything that blocks for long periods, on a system workqueue as the name suggests it is used by the system to schedule all kind of works.

That said, and now Im probably going into the solution space, you don't actually need to do what you are doing in the callback, you can reschedule the work to run every 5000 ms, with an intemediate 25 ms to toggle the pins using static variable for tracking the pin state:

static bool pin_state;

if (!pin_state) {
  gpio_pin_set(led_dev, PIN, 1);
  pin_state = true;
  k_delayed_work_submit(work, K_MSEC(25));
} else {
  gpio_pin_set(led_dev, PIN, 0);
  pin_state = false;
  k_delayed_work_submit(work, K_MSEC(5000));
}
carlescufi commented 4 years ago

Thanks @Vudentz. Indeed the issue is with:

static void leds_update(struct k_work *work)
{

    // flash  LED
    while(true){
      led_one_shot(25);
      k_sleep(K_MSEC(5000));
    }

}

Closing as this is clearly an issue with the user's code.