bertmelis / espMqttClient

MQTT 3.1.1 client library for the Espressif devices ESP8266 and ESP32 on the Arduino framework.
https://www.emelis.net/espMqttClient/
MIT License
100 stars 21 forks source link

Client keeps disconnecting while trying to connect to AWS #52

Closed Bernard314 closed 1 year ago

Bernard314 commented 1 year ago

Hi I am trying to connect my esp32 to my AWS. I have checked that my endpoint and certificates are correct. I have used the pubsubclient library before but i am using this as i want qos1 messages. Here is my code:

#include <WiFi.h>
#include <Ticker.h>
#include <espMqttClient.h>

#define WIFI_SSID "wifiname"
#define WIFI_PASSWORD "wifipassword"

#define MQTT_PORT 8883
#define MQTT_USER "username"
#define MQTT_PASS "password"

const char* awsEndpoint = "awsendpoint-ats.iot.us-west-2.amazonaws.com";

//End initialise AWS var

// xxxxxxxxxx-certificate.pem.crt
const char certificate_pem_crt []= \

"-----BEGIN CERTIFICATE-----\n"\
"sdfwerfiuweewkndwlekmdnewklmoiejdodjlemdklmwe*(Y&(*)LKmewpofmwpeomfweferreL\n"\
"BQAwTTFLMEksdfkmweoifwef*&BIjknjem9uIFdlYiBTZXJ2aWNlcyBPPUFtYXpvbi5jb20g\n"\
"wefiuwefowief208927492dneklndqowiIUhuygtybiohufuybkjoih*yuygjKKBKJBDSFEnlknkjl\n"\
"spfiubgerupifberpijbrepijbnrepinireniorenvrenvernvk;jnerr;kjvnev;kjnre;kjvnrek;vjner;kvnerk;jvn\n"\
"-----END CERTIFICATE-----\n";

// xxxxxxxxxx-private.pem.key
const char private_pem_key[] = \
"-----BEGIN RSA PRIVATE KEY-----\n"\
"ewnpifh3043jk4fn348fn34f3498fhn34fpierfjVESRGTrtgiu45bgg938gbf3o4fwwewe\n"\
"wefewiunfjerngjierbgoierbfij34nfiup34h89fh34iufnewjnwenkjwenfkjeanfkjafen\n"\
"sdfoigneroigfnerionglrkenglkerngklernglknerklgnerlknklernklernklernlkernlken\n"\
"-----END RSA PRIVATE KEY-----\n";

/* root CA can be downloaded in:
  https://www.symantec.com/content/en/us/enterprise/verisign/roots/VeriSign-Class%203-Public-Primary-Certification-Authority-G5.pem
*/
const char rootCA[]= \

"-----BEGIN CERTIFICATE-----\n" \
"ewnpifh3043jk4fn348fn34f3498fhn34fpierfjVESRGTrtgiu45bgg938gbf3o4fwwewe\n"\
"wefewiunfjerngjierbgoierbfij34nfiup34h89fh34iufnewjnwenkjwenfkjeanfkjafen\n"\
"sdfoigneroigfnerionglrkenglkerngklernglknerklgnerlknklernklernklernlkernlken\n"\
"-----END CERTIFICATE-----\n";

const char* clientID = "testingesppolicy";
const char* topicchar = "testtopic";
const char* payloadchar = "testpayload";

espMqttClientSecure mqttClient;
Ticker reconnectTimer;

void connectToWiFi() {
  Serial.println("Connecting to Wi-Fi...");
  WiFi.begin(WIFI_SSID, WIFI_PASSWORD);
}

void connectToMqtt() {
  Serial.println("Connecting to MQTT...");
  // mqttClient.setClientId(clientID);
  mqttClient.connect();

}

void WiFiEvent(WiFiEvent_t event) {
  Serial.printf("[WiFi-event] event: %d\n", event);
  switch(event) {
    case SYSTEM_EVENT_STA_GOT_IP:
      Serial.println("WiFi connected");
      Serial.println("IP address: ");
      Serial.println(WiFi.localIP());
      connectToMqtt();
      break;
    case SYSTEM_EVENT_STA_DISCONNECTED:
      Serial.println("WiFi lost connection");
      reconnectTimer.once(5, connectToWiFi);
      break;
    default:
      break;
  }
}

void onMqttConnect(bool sessionPresent) {
  Serial.println("Connected to MQTT.");
  Serial.print("Session present: ");
  Serial.println(sessionPresent);

  uint16_t packetIdPub0 = mqttClient.publish(topicchar, 0, false, payloadchar);
  Serial.println("Publishing at QoS 0, packetId: ");
  Serial.println(packetIdPub0);
}

void onMqttDisconnect(espMqttClientTypes::DisconnectReason reason) {
  Serial.printf("Disconnected from MQTT: %u.\n", static_cast<uint8_t>(reason));

  if (WiFi.isConnected()) {
    reconnectTimer.once(5, connectToMqtt);
  }
}

void onMqttMessage(const espMqttClientTypes::MessageProperties& properties, const char* topic, const uint8_t* payload, size_t len, size_t index, size_t total) {
  Serial.println("Publish received.");
  Serial.print("  topic: ");
  Serial.println(topic);
  Serial.print("  qos: ");
  Serial.println(properties.qos);
  Serial.print("  dup: ");
  Serial.println(properties.dup);
  Serial.print("  retain: ");
  Serial.println(properties.retain);
  Serial.print("  len: ");
  Serial.println(len);
  Serial.print("  index: ");
  Serial.println(index);
  Serial.print("  total: ");
  Serial.println(total);
}

void onMqttPublish(uint16_t packetId) {
  Serial.println("Publish acknowledged.");
  Serial.print("  packetId: ");
  Serial.println(packetId);
}

void setup() {
  Serial.begin(115200);
  Serial.println();
  Serial.println();

  WiFi.onEvent(WiFiEvent);

  mqttClient.setInsecure();
  mqttClient.setCACert(rootCA);
  mqttClient.setCertificate(certificate_pem_crt);
  mqttClient.setPrivateKey(private_pem_key);
  mqttClient.setCredentials(MQTT_USER, MQTT_PASS);
  mqttClient.onConnect(onMqttConnect);
  mqttClient.onDisconnect(onMqttDisconnect);
  mqttClient.onMessage(onMqttMessage);
  mqttClient.onPublish(onMqttPublish);
  mqttClient.setServer(awsEndpoint, MQTT_PORT);
  mqttClient.setCleanSession(true);

  connectToWiFi();
}

void loop() {
  delay(1000);
  static uint32_t lastMillis = 0;
  if (millis() - lastMillis > 5000) {
    lastMillis = millis();
    Serial.printf("heap: %u\n", ESP.getFreeHeap());
  }
  mqttClient.publish(topicchar, 0, false, payloadchar);

}

I only copied and pasted the example in this library and pasted my own certs but i get this in the Serial monitor:

ELF file SHA256: 7e4ab9d6b66e846e

Rebooting...
ets Jun  8 2016 00:22:57

rst:0xc (SW_CPU_RESET),boot:0x13 (SPI_FAST_FLASH_BOOT)
configsip: 0, SPIWP:0xee
clk_drv:0x00,q_drv:0x00,d_drv:0x00,cs0_drv:0x00,hd_drv:0x00,wp_drv:0x00
mode:DIO, clock div:1
load:0x3fff0030,len:1344
WiFi connected
IP address: 
192.168.1.17
Connecting to MQTT...
Disconnected from MQTT: 7.
heap: 233476
Connecting to MQTT...
Disconnected from MQTT: 7.
heap: 233488
Connecting to MQTT...
Disconnected from MQTT: 7.
heap: 233488
Connecting to MQTT...
Disconnected from MQTT: 7.
heap: 233488
Connecting to MQTT...

and keeps on repeating.... i read somewhere that subscribing to the same topic can cause problems so i removed the subscribing part of the code. i also use the endpoint of the oergon region where i have no registered devices.....what is going wrong?

bertmelis commented 1 year ago

Which IDE are you using to compile (and upload) the firmware to your esp32?

Could you please fetch the Serial output again, but with enabling debug output? See https://docs.espressif.com/projects/arduino-esp32/en/latest/guides/tools_menu.html#core-debug-level for Arduino IDE or add this to your platformio.ini:

build_flags =
  -D CORE_DEBUG_LEVEL=ARDUHAL_LOG_LEVEL_VERBOSE
  -D LOG_LEVEL=LOG_LEVEL_VERBOSE
build_type = debug
monitor_filters = esp32_exception_decoder

espMqttClient doesn't have access to the underlying error codes of the TCP client. By enabling debug logging, they will show up in the monitor though.

bertmelis commented 1 year ago

By the way, I can't connect to the url at the given port. Why are you using the certificate from Symantec? Amazon publishes them here: https://www.amazontrust.com/repository/

Bernard314 commented 1 year ago

Which IDE are you using to compile (and upload) the firmware to your esp32?

I am using Arduino.

Here is what i got after enabling debug output


E (336) esp_core_dump_fl�.K�ɕ�dump data check failed:
Calculated checksum='7d2b2092'
Image checksum='17bd52fc'
[    28][D][esp32-hal-cpu.c:244] setCpuFrequencyMhz(): PLL: 480 / 2 = 240 Mhz, APB: 80000000 Hz
E (237) psram: PSRAM ID read error: 0xffffffff
[    38][W][esp32-hal-psram.c:71] psramInit(): PSRAM init failed!

Connecting to Wi-Fi...
[    73][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 0 - WIFI_READY
[WiFi-event] event: 0
[   169][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 2 - STA_START
[WiFi-event] event: 2
[   304][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 4 - STA_CONNECTED
[WiFi-event] event: 4
[   496][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 7 - STA_GOT_IP
[   497][D][WiFiGeneric.cpp:996] _eventCallback(): STA IP: 192.168.1.17, MASK: 255.255.255.0, GW: 192.168.1.1
[WiFi-event] event: 7
WiFi connected
IP address: 
192.168.1.17
Connecting to MQTT...
[   513][I][Packet.cpp:322] _allocate(): Alloc (l:50)
[   515][I][MqttClient.cpp:267] loop(): Stack usage: 556/5000
[  1177][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  2178][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  3010][D][ssl_client.cpp:287] start_ssl_client(): Protocol is TLSv1.2 Ciphersuite is TLS-ECDHE-RSA-WITH-AES-128-GCM-SHA256
[  3010][D][ssl_client.cpp:289] start_ssl_client(): Record expansion is 29
[  3017][I][MqttClient.cpp:267] loop(): Stack usage: 3100/5000
[  3023][I][MqttClient.cpp:313] _checkOutgoing(): tx 50/50 (10)
[  3028][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  3034][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  3178][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  3180][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  3321][I][MqttClient.cpp:640] _clearQueue(): clearing queue (clear session: false)
Disconnected from MQTT: 7.
[  4178][I][Packet.cpp:322] _allocate(): Alloc (l:24)
heap: 232920
[  5178][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  6178][I][Packet.cpp:322] _allocate(): Alloc (l:24)
Bernard314 commented 1 year ago

By the way, I can't connect to the url at the given port. Why are you using the certificate from Symantec? Amazon publishes them here: https://www.amazontrust.com/repository/

Oh is it ok to post my endpoint? im sorry i wasnt aware so i just changed my endpoint to a sample one. I just tried the arduino-mqtt library (https://github.com/256dpi/arduino-mqtt) with the same certs and endpoint and was able to publish messages

Why are you using the certificate from Symantec

Oh i'm sorry i must have left that comment accidentally.....I am using the certs that were provided by aws at the time of my thing creation

bertmelis commented 1 year ago

Strange. I have to look into this.

It is quite odd because there is a portion of the logging missing: The last log output you posted from the connection is here. but that function doesn't stop there so there should be more logging.

Anyway, because the client starts sending things, I suppose the connection has been made. Afterwards, the broker disconnects you. Are you using the same username/password for multiple devices? Are you setting a clientID? (if not, try to set to a unique value).

Bernard314 commented 1 year ago

I think i should have mentioned that the log doesn't really end. Like this is an endless loop basically (like it will start from the first line again) (Thousand apologies, here is the proper loop)

Connecting to Wi-Fi...
[    73][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 0 - WIFI_READY
[WiFi-event] event: 0
[   166][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 2 - STA_START
[WiFi-event] event: 2
[   221][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 4 - STA_CONNECTED
[WiFi-event] event: 4
[   454][D][WiFiGeneric.cpp:931] _eventCallback(): Arduino Event: 7 - STA_GOT_IP
[   454][D][WiFiGeneric.cpp:996] _eventCallback(): STA IP: 192.168.1.17, MASK: 255.255.255.0, GW: 192.168.1.1
[WiFi-event] event: 7
WiFi connected
IP address: 
192.168.1.17
Connecting to MQTT...
[   470][I][Packet.cpp:322] _allocate(): Alloc (l:50)
[   472][I][MqttClient.cpp:267] loop(): Stack usage: 556/5000
[  1173][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  2174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  2805][D][ssl_client.cpp:287] start_ssl_client(): Protocol is TLSv1.2 Ciphersuite is TLS-ECDHE-RSA-WITH-AES-128-GCM-SHA256
[  2806][D][ssl_client.cpp:289] start_ssl_client(): Record expansion is 29
[  2813][I][MqttClient.cpp:267] loop(): Stack usage: 3084/5000
[  2819][I][MqttClient.cpp:313] _checkOutgoing(): tx 50/50 (10)
[  2824][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  2830][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  3174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  3176][I][MqttClient.cpp:313] _checkOutgoing(): tx 24/24 (30)
[  3187][I][MqttClient.cpp:640] _clearQueue(): clearing queue (clear session: false)
Disconnected from MQTT: 7.
[  4174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
heap: 233280
[  5174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  6174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  7174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
[  8174][I][Packet.cpp:322] _allocate(): Alloc (l:24)
Connecting to MQTT...

Are you using the same username/password for multiple devices? Are you setting a clientID? (if not, try to set to a unique value).

I only have 1 more device set up that too in other region of aws (N.Virginia) and i created this device for Oregon just to make sure im not making that error. The ClientID and username/passwords are unique too

bertmelis commented 1 year ago

Sure, but there are some lines "missing":

[  2806][D][ssl_client.cpp:289] start_ssl_client(): Record expansion is 29
--> HERE SHOULD BE MORE <--
[  2813][I][MqttClient.cpp:267] loop(): Stack usage: 3084/5000
[  2819][I][MqttClient.cpp:313] _checkOutgoing(): tx 50/50 (10)

It's probably a Serial buffer thing. Anyways, let's move on why the broker disconnects.

Could you try again but without publishing something in your loop? Maybe AWS doesn't allow publishing without being fully connected. (or check first with mqttClient.connected())

Bernard314 commented 1 year ago

Correct! That works. Thanks! I added a delay of 10 seconds at the end of my void setup() and it works perfectly!

bertmelis commented 1 year ago

Nice.

I'm still gonna mark this as a bug. I have an option to disallow publishing before connecting but as it is currently, it doest cover all scenarios.

Bernard314 commented 1 year ago

Just wanted to add that although it works after adding a delay at the end of setup, the same problem occurs when the device loses wifi connection and reconnects

bertmelis commented 1 year ago

That's why it is still a bug. It's one of the scenarios I was referring to in my last answer.

I'm working on a fix.

bertmelis commented 1 year ago

I made a branch: https://github.com/bertmelis/espMqttClient/tree/wait-for-connack

By default, it should wait for connection acknowledgment before starting to send other packets (unless the broker doesn't respond, but then we want to terminate the connection anyway). That part should work. I'm reviewing the changes to see if I didn't introduce new bugs though. Could you check too?