espressif / esp-idf

Espressif IoT Development Framework. Official development framework for Espressif SoCs.
Apache License 2.0
13.31k stars 7.2k forks source link

MQTT memory leaks while reconnecting Wi-Fi (IDFGH-13446) #14352

Open sohaibqamar opened 1 month ago

sohaibqamar commented 1 month ago

Answers checklist.

General issue report

Environment

Development Kit: ESP32-S3 Espressif IoT Development Framework (ESP-IDF) 5.1.0 Project Configuration Development Env: VScode Operating System: Windows Power Supply: External, stable (checked on oscilloscope)

Problem Description:

The code below is my network_manager.c is used to deinitialize and initialize the wifi task based on the network_change_countervalue. The idea is to switch between wifi interface and observe if there is any memory leakage between a wifi connectivity cycle. I'm having almost 800-900 bytes of memory leakage. The problem seems to be the MQTT component if I do not start the MQTT, there is no memory leakage when we initialize, deinitialize, and again initialize the wifi interface.

network_manager.c

unsigned int network_change_counter = 0;
void network_task(void *pvParameter)
{
    network_change_counter = 0;
    while (1) 
    {
        if(flag_wifi_connected)
        {
            network_change_counter++;
            ESP_LOGW(TAG, "Connected Counter: %d.\n", network_change_counter);
            if(network_change_counter > 12 && network_change_counter < 14)
            {
                ESP_LOGW(TAG, "Deinitializing Wi-Fi mode.\n");
                // network_change_counter = 0;
                set_current_network_mode(NETWORK_MODE_NO_NETWORK);
                // set_change_network_mode_flag();
            }
        }
        if(!flag_wifi_connected && get_current_network_mode() == NETWORK_MODE_NO_NETWORK) // switching to Wi-Fi
        {
            network_change_counter++;
            ESP_LOGW(TAG, "Not connected Counter: %d.\n", network_change_counter);
            if(network_change_counter > 14 && network_change_counter < 16)
            {
                ESP_LOGW(TAG, "Reinitializing Network mode to: %d.\n", (int)get_current_network_mode());
                network_change_counter = 0;
                set_current_network_mode(NETWORK_MODE_ONLY_WIFI);
                set_change_network_mode_flag();
            }
        }
        if(get_change_network_mode_flag() && !flag_wifi_connected)
        {
            ESP_LOGI(TAG, "Changing Network mode to: %d.\n", (int)get_current_network_mode());

            if(get_current_network_mode() == NETWORK_MODE_ONLY_WIFI) // switching to Wi-Fi
            {
                // xTaskCreatePinnedToCore(&lte_bridge_task, "lte_bridge_task", 1024*4, NULL, 21, NULL, 0);
                xTaskCreate(&wifi_task, "wifi_task", 1024*6, NULL, 20, NULL);
            }
            reset_change_network_mode_flag();
        }
        vTaskDelay(3000 / portTICK_PERIOD_MS);
    }
}

Below is my wifi_manager.c code

wifi_manager.c

//headers

static const char *TAG = "[Wi-Fi]";

wifi_config_t wifi_config;
char esp_def_wifi_ssid[SSID_MAX_LEN] = "testWifi";
char esp_def_wifi_pass[PASS_MAX_LEN] = "12345678";

#define ESP_WIFI_SAE_MODE WPA3_SAE_PWE_HUNT_AND_PECK
#define EXAMPLE_H2E_IDENTIFIER ""
#define ESP_WIFI_SCAN_AUTH_MODE_THRESHOLD WIFI_AUTH_WPA2_PSK

wifi_cred_t wifi_cred;
char wifi_ip[16];
bool flag_wifi_connected = false;
bool flag_wifi_cred_changed = false;

uint32_t wifi_retry_count = 0;

TimerHandle_t wifi_retry_timer = NULL;
static esp_event_handler_instance_t instance_any_id;
static esp_event_handler_instance_t instance_got_ip;
static bool wifi_initialized = false;
extern modem_wifi_config_t s_modem_wifi_config;
extern char hub_id[9];

extern esp_err_t mqtt_pause(void);
extern esp_err_t mqtt_resume(void);

static void stop_wifi_retry_timer(void)
{
    if (wifi_retry_timer != NULL)
    {
        ESP_LOGI(TAG, "Stopping wifi retry timer.");
        xTimerStop(wifi_retry_timer, 0);
        xTimerDelete(wifi_retry_timer, 0);
        wifi_retry_timer = NULL;
        wifi_retry_count = 0;
    }
}

static void wifi_retry_timer_callback(TimerHandle_t xTimer)
{
    if (wifi_retry_count < WIFI_RETRY_MAX_COUNT)
    {
        esp_wifi_connect();
        wifi_retry_count++;
        ESP_LOGI(TAG, "Retry to connect to the AP: %s (Attempt %ld)", wifi_cred.ssid, wifi_retry_count);
    }
    else
    {
        // Switch to LTE mode
        //set LTE mode logic
        stop_wifi_retry_timer();
    }
}

static void start_wifi_retry_timer(void)
{
    if (wifi_retry_timer == NULL)
    {
        wifi_retry_timer = xTimerCreate("WifiRetryTimer", pdMS_TO_TICKS(5000), pdTRUE, (void *)0, wifi_retry_timer_callback);
        if (wifi_retry_timer == NULL)
        {
            ESP_LOGE(TAG, "Failed to create retry timer!");
        }
        else
        {
            if (xTimerStart(wifi_retry_timer, 0) != pdPASS)
            {
                ESP_LOGE(TAG, "Failed to start retry timer!");
            }
            else
            {
                ESP_LOGI(TAG, "Wi-Fi retry timer created and running successfully.");
            }
        }
    }
    else
    {
        xTimerStart(wifi_retry_timer, 0);
    }
}

static void wifi_event_handler(void* arg, esp_event_base_t event_base, int32_t event_id, void* event_data)
{
    static bool processing_disconnection = false;
    if (event_base == WIFI_EVENT) 
    {
        switch (event_id) 
        {
            case WIFI_EVENT_STA_START:
                esp_wifi_connect();
                break;
            case WIFI_EVENT_STA_CONNECTED:
                if(!flag_wifi_connected)
                {
                    ESP_LOGI(TAG, "Wi-Fi Connected");
                    stop_wifi_retry_timer();
                    flag_wifi_connected = true;
                    processing_disconnection = false;
                }
                break;
            case WIFI_EVENT_STA_DISCONNECTED:
                if (!processing_disconnection) // Process disconnection only if not already processing
                {
                    ESP_LOGI(TAG, "Wi-Fi Disconnected");
                    flag_wifi_connected = false;
                    processing_disconnection = true;
                    if (get_current_network_mode() == NETWORK_MODE_ONLY_WIFI && wifi_retry_timer == NULL)
                    {
                        start_wifi_retry_timer();
                    }
                    mqtt_pause();
                }
                break;
            case WIFI_EVENT_STA_STOP:
                ESP_LOGI(TAG, "Wi-Fi STA Stopped");
                processing_disconnection = false;
                break;
            default:
                break;
        }
    }
    else if (event_base == IP_EVENT && event_id == IP_EVENT_STA_GOT_IP) 
    {
        ip_event_got_ip_t* event = (ip_event_got_ip_t*) event_data;
        sprintf(wifi_ip, IPSTR, IP2STR(&event->ip_info.ip));
        ESP_LOGI(TAG, "got ip:%s", wifi_ip);
        wifi_retry_count = 0;
        flag_wifi_connected = true;
        processing_disconnection = false;
        mqtt_resume();
    }
}

void wifi_init_sta(void)
{
    if (wifi_initialized)
    {
        ESP_LOGW(TAG, "Wi-Fi already initialized, skipping initialization.");
        return;
    }
    // Check if the default Wi-Fi interface already exists
    esp_netif_t *sta_netif = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
    if (sta_netif != NULL) 
    {
        ESP_LOGW(TAG, "Default Wi-Fi interface already exists");
    } 
    else 
    {
        // Create the default Wi-Fi interface
        sta_netif = esp_netif_create_default_wifi_sta();
        if (sta_netif == NULL) 
        {
            ESP_LOGE(TAG, "Failed to create default Wi-Fi interface");
            return;
        }
    }

    wifi_init_config_t cfg = WIFI_INIT_CONFIG_DEFAULT();

    ESP_ERROR_CHECK(esp_wifi_init(&cfg));    

    ESP_ERROR_CHECK(esp_event_handler_instance_register(WIFI_EVENT,
                                                        ESP_EVENT_ANY_ID,
                                                        &wifi_event_handler,
                                                        NULL,
                                                        &instance_any_id));
    ESP_ERROR_CHECK(esp_event_handler_instance_register(IP_EVENT,
                                                        IP_EVENT_STA_GOT_IP,
                                                        &wifi_event_handler,
                                                        NULL,
                                                        &instance_got_ip));

    get_wifi_credentials_file(wifi_cred.ssid, wifi_cred.pass);
    ESP_LOGI(TAG, "Got credentails as SSID:%s Password:%s", wifi_cred.ssid, wifi_cred.pass);
    bzero(&wifi_config, sizeof(wifi_config_t));
    memcpy(wifi_config.sta.ssid, wifi_cred.ssid, sizeof(wifi_config.sta.ssid));
    memcpy(wifi_config.sta.password, wifi_cred.pass, sizeof(wifi_config.sta.password));

    ESP_ERROR_CHECK(esp_wifi_set_mode(WIFI_MODE_STA));
    ESP_ERROR_CHECK(esp_wifi_set_config(WIFI_IF_STA, &wifi_config));
    ESP_ERROR_CHECK(esp_wifi_start());

    ESP_LOGI(TAG, "wifi_init_sta finished.");
    wifi_initialized = true;
}

void wifi_deinit_sta(void)
{
    if (!wifi_initialized)
    {
        ESP_LOGW(TAG, "Wi-Fi not initialized, skipping deinitialization.");
        return;
    }

    ESP_LOGE(TAG, "Disconnecting from Wi-Fi");

    if (wifi_retry_timer != NULL)
    {
        stop_wifi_retry_timer();
    }

    esp_wifi_disconnect();
    esp_wifi_stop();
    esp_wifi_deinit();

    ESP_ERROR_CHECK(esp_event_handler_instance_unregister(WIFI_EVENT, ESP_EVENT_ANY_ID, instance_any_id));
    ESP_ERROR_CHECK(esp_event_handler_instance_unregister(IP_EVENT, IP_EVENT_STA_GOT_IP, instance_got_ip));

    // Destroy default Wi-Fi netif 
    esp_netif_t *netifConf = esp_netif_get_handle_from_ifkey("WIFI_STA_DEF");
    if (netifConf != NULL)
    {
        esp_netif_destroy_default_wifi(netifConf);
    }
    // Deinitialize the network interface
    // esp_netif_deinit();
    wifi_initialized = false;
}

void wifi_task(void *pvParameter)
{
    ESP_LOGI(TAG, "Wi-Fi task created successfully");
    wifi_init_sta();

    while (1) 
    {
        if (get_current_network_mode() != NETWORK_MODE_ONLY_WIFI)
        {
            wifi_deinit_sta();
            break;
        }
        vTaskDelay(1000 / portTICK_PERIOD_MS);
    }
    ESP_LOGW(TAG, "Deleting wifi_task");
    // Delete the task
    vTaskDelete(NULL);
}

Below is the code for mqtt_manager.c which is being initialized in the start using mqtt_app_start function

mqtt_manager.c

// headers 

static const char *TAG = "[MQTT_MANAGER]";

esp_mqtt_client_handle_t client = NULL;

mqtt_topic_t hub_sub_topic_request;
mqtt_topic_t hub_pub_topic_event;

extern const uint8_t client_cert_pem_start[] asm("_binary_client_crt_start");
extern const uint8_t client_cert_pem_end[] asm("_binary_client_crt_end");
extern const uint8_t client_key_pem_start[] asm("_binary_client_key_start");
extern const uint8_t client_key_pem_end[] asm("_binary_client_key_end");
extern const uint8_t server_cert_pem_start[] asm("_binary_mosquitto_org_crt_start");
extern const uint8_t server_cert_pem_end[] asm("_binary_mosquitto_org_crt_end");

char *json_resp;
char *json_err;

static void log_error_if_nonzero(const char *message, int error_code)
{
    if (error_code != 0) {
        ESP_LOGE(TAG, "Last error %s: 0x%x", message, error_code);
    }
}

esp_err_t mqtt_pause(void) 
{
    return esp_mqtt_client_stop(client);
}

esp_err_t mqtt_resume(void)
{
    return esp_mqtt_client_start(client);
}

static void mqtt_event_handler(void *handler_args, esp_event_base_t base, int32_t event_id, void *event_data)
{
    ESP_LOGD(TAG, "Event dispatched from event loop base=%s, event_id=%" PRIi32, base, event_id);

    esp_mqtt_event_handle_t event = event_data;
    esp_mqtt_client_handle_t client = event->client;

    int msg_id = -1;

    switch ((esp_mqtt_event_id_t)event_id) 
    {
    case MQTT_EVENT_CONNECTED:
        ESP_LOGI(TAG, "MQTT_EVENT_CONNECTED");

        msg_id = esp_mqtt_client_subscribe(client, hub_sub_topic_request.topic, 0);
        ESP_LOGI(TAG, "sent subscribe successful, msg_id=%d", msg_id);

        msg_id = esp_mqtt_client_publish(client, hub_pub_topic_event.topic, get_hub_online_string(), 0, 0, 0);
        ESP_LOGI(TAG, "sent publish successful, msg_id=%d", msg_id);

        break;

    case MQTT_EVENT_DISCONNECTED:
        ESP_LOGI(TAG, "MQTT_EVENT_DISCONNECTED");
        break;

    case MQTT_EVENT_SUBSCRIBED:
        ESP_LOGI(TAG, "MQTT_EVENT_SUBSCRIBED, msg_id=%d", event->msg_id);
        break;

    case MQTT_EVENT_UNSUBSCRIBED:
        ESP_LOGI(TAG, "MQTT_EVENT_UNSUBSCRIBED, msg_id=%d", event->msg_id);
        break;

    case MQTT_EVENT_PUBLISHED:
        ESP_LOGI(TAG, "MQTT_EVENT_PUBLISHED, msg_id=%d", event->msg_id);
        break;

    case MQTT_EVENT_DATA:
        ESP_LOGI(TAG, "MQTT_EVENT_DATA");
        break;

    case MQTT_EVENT_ERROR:
        ESP_LOGI(TAG, "MQTT_EVENT_ERROR");
        if (event->error_handle->error_type == MQTT_ERROR_TYPE_TCP_TRANSPORT) 
        {
            log_error_if_nonzero("reported from esp-tls", event->error_handle->esp_tls_last_esp_err);
            log_error_if_nonzero("reported from tls stack", event->error_handle->esp_tls_stack_err);
            log_error_if_nonzero("captured as transport's socket errno",  event->error_handle->esp_transport_sock_errno);
            ESP_LOGI(TAG, "Last errno string (%s)", strerror(event->error_handle->esp_transport_sock_errno));

        }
        break;

    default:
        ESP_LOGI(TAG, "Other event id:%d", event->event_id);
        break;
    }
}

void mqtt_app_start(char *hub_id)
{
    esp_err_t err = esp_netif_init();
    if (err != ESP_OK)
    {
        ESP_LOGE(TAG, "Failed to initialize TCP/IP stack: %s", esp_err_to_name(err));
        return;
    }
    // Create the default event loop
    err = esp_event_loop_create_default();
    if (err != ESP_OK) 
    {
        ESP_LOGE(TAG, "Failed to create default event loop: %s", esp_err_to_name(err));
        esp_netif_deinit();
        return;
    }

    ESP_LOGI(TAG, "Starting MQTT process");
    esp_err_t esp_mqtt_err;
    sprintf(hub_sub_topic_request.topic,"header/%s/hub/request",hub_id);

    sprintf(hub_pub_topic_event.topic,"header/%s/hub/event",hub_id);

    const esp_mqtt_client_config_t mqtt_cfg = 
    {
        .broker.address.uri = "mqtts://urlxxxx.iot.us-east-1.amazonaws.com:8883",
        .broker.verification.certificate = (const char *)server_cert_pem_start,
        .credentials = 
        {
            .authentication = 
            {
                .certificate = (const char *)client_cert_pem_start,
                .key = (const char *)client_key_pem_start,
            },
        }
    };

    client = esp_mqtt_client_init(&mqtt_cfg);
    if (client == NULL) 
    {
        ESP_LOGE(TAG, "Failed to initialize MQTT client");
        return;
    }
    // The last argument may be used to pass data to the event handler, in this example mqtt_event_handler
    esp_mqtt_err = esp_mqtt_client_register_event(client, ESP_EVENT_ANY_ID, mqtt_event_handler, NULL);
    if (esp_mqtt_err != ESP_OK)
    {
        ESP_LOGE(TAG, "Failed to register MQTT event handler: %s", esp_err_to_name(esp_mqtt_err));
    }
    /* esp_mqtt_err = esp_mqtt_client_start(client);
    if (esp_mqtt_err != ESP_OK)
    {
        ESP_LOGE(TAG, "Failed to start MQTT client: %s", esp_err_to_name(esp_mqtt_err));
    }*/
}

I'm printing the Internal Free heap size using esp_get_free_internal_heap_size() and every time I get connected to the MQTT broker and publish a message, there is a decrease in available heap size by almost 800-900 bytes.

euripedesrocha commented 1 month ago

Hi @sohaibqamar from the code you are sharing you are only calling esp_mqtt_client_stop this will not clean all data from the mqtt client. That is why you can call esp_mqtt_client_start again to re-start the client. To completely clean all data you need to call esp_mqtt_client_destroy but you will need to re init the client.

Regarding the publishing, if you are using QoS > 1, you are likely seeing the outbox growing. Could you verify the outbox size?