adafruit / Adafruit_CircuitPython_ESP32SPI

ESP32 as wifi with SPI interface
MIT License
103 stars 75 forks source link

wrong connection error messages from connect_AP method #189

Open mMerlin opened 8 months ago

mMerlin commented 8 months ago

ESP_SPIcontrol.connect_AP is not reporting connection failures correctly for the actual failure causes. Testing is pointing to a possible failure or limitation in the NINA fw. The problem could also be the way, and which, commands are being sent to the fw.

Observed: When specifying an SSID that exists, but using the wrong password, connect_AP reports 'No such ssid'. When specifying an SSID that does not exist, connect_AP reports 'Failed to connect to ssid'

Expected: The reverse.

To demonstrate, I took a copy of the simple_test.py example, cut it down, modified it, and added to it. As part of that, I created a subclass of ESP_SPIcontrol to add a testing version of connect_AP. That version monitors and reports the status changes more closely, but raises the same exceptions. The result is below, along with the output I get. In that, I noticed that the status never goes back to idle.

# SPDX-FileCopyrightText: 2024 µMerlin
# SPDX-License-Identifier: MIT

"""testing connection failure statuses"""

import sys
import os
import time
# pylint:disable=import-error
import board
import busio
from digitalio import DigitalInOut
# pylint:enable=import-error
from adafruit_esp32spi import adafruit_esp32spi as wl_const
from adafruit_esp32spi.adafruit_esp32spi import ESP_SPIcontrol

class ExtendedSPIControl(ESP_SPIcontrol):
    """Override individual method for testing"""

    def testing_connect(self, ssid:str, password:str, timeout_s:int=10) -> int:
        """emulate connect_AP() to add debug tracing"""
        print(f'{type(self).__name__ =} in testing version of connect_ap()')
        stat_changes = []
        time_pt0 = time.monotonic()
        stat_changes.append((self.status, time_pt0))
        if isinstance(ssid, str):
            ssid = bytes(ssid, "utf-8")
        if password:
            if isinstance(password, str):
                password = bytes(password, "utf-8")
            self.wifi_set_passphrase(ssid, password)
            time_pt1 = time.monotonic()
        else:
            self.wifi_set_network(ssid)
            time_pt1 = time.monotonic()
        stat_changes.append((self.status, time_pt1))
        while (time.monotonic() - time_pt1) < timeout_s:  # wait until connected or timeout
            stat = self.status
            if stat != stat_changes[-1][0]:
                stat_changes.append((stat, time.monotonic()))
            if stat == wl_const.WL_CONNECTED:
                break
            time.sleep(0.05)
        stat_changes.append((stat, time.monotonic()))
        print(f'set_passphrase elapsed: {time_pt1 - time_pt0}')
        prev_tm = time_pt0
        for idx, stamp in enumerate(stat_changes):
            print(f'{idx:3}: {stamp[0]} at {stamp[1] - prev_tm}')
            prev_tm = stamp[1]
        if stat in (wl_const.WL_CONNECT_FAILED, wl_const.WL_CONNECTION_LOST,
                    wl_const.WL_DISCONNECTED):
            raise ConnectionError("Failed to connect to ssid", ssid)
        if stat == wl_const.WL_NO_SSID_AVAIL:
            raise ConnectionError("No such ssid", ssid)
        if stat != wl_const.WL_CONNECTED:
            raise OSError(f"Unknown error {stat:02X}")
        return stat

secrets = {
    "ssid": os.getenv("CIRCUITPY_WIFI_SSID"),
    "password": os.getenv("CIRCUITPY_WIFI_PASSWORD"),
}
if secrets == {"ssid": None, "password": None}:
    raise OSError('no credentials found in settings.toml')

print("ESP32 SPI AP connection test")
print(f"  Board ID: {getattr(board, 'board_id', 'Unknown')}")
print(f'  Implementation: {sys.implementation}')
print(f'  Platform: {sys.platform}')

# If you are using a board with pre-defined ESP32 Pins:
esp32_cs = DigitalInOut(board.ESP_CS)
esp32_ready = DigitalInOut(board.ESP_BUSY)
esp32_reset = DigitalInOut(board.ESP_RESET)

# Secondary (SCK1) SPI used to connect to WiFi board on Arduino Nano Connect RP2040
if "SCK1" in dir(board):
    spi = busio.SPI(board.SCK1, board.MOSI1, board.MISO1)
else:
    spi = busio.SPI(board.SCK, board.MOSI, board.MISO)
esp = ExtendedSPIControl(spi, esp32_cs, esp32_ready, esp32_reset)

if esp.status == wl_const.WL_IDLE_STATUS:
    print("ESP32 found and in idle mode")
print(f"  Firmware vers. {esp.firmware_version.decode('utf-8'):11}")
MAC = ':'.join(f'{byte:02X}' for byte in esp.MAC_address)
print(f"  MAC addr: {MAC}")

print("Connecting to AP...")
esp.connect_AP(secrets["ssid"], secrets["password"])
print(f'{esp.status =}')
print("Connected to", str(esp.ssid, "utf-8"), "\tRSSI:", esp.rssi)
print("My IP address is", esp.pretty_ip(esp.ip_address))

esp.disconnect()
time.sleep(1.0)

try:
    esp.connect_AP(secrets["ssid"], "BAD PASSWORD")
except ConnectionError as exc:
    print(f'Error when SSID exists, but using wrong password: {exc}')
print(f'{esp.status =}')
time.sleep(1.0)

try:
    esp.connect_AP("NO SUCH SSID HERE", secrets["password"])
except ConnectionError as exc:
    print(f'Error when SSID does not exist: {exc}')
print(f'{esp.status =}')
time.sleep(1.0)

esp.connect_AP(secrets["ssid"], secrets["password"])
print(f'{esp.status =}')
print("Connected to", str(esp.ssid, "utf-8"), "\tRSSI:", esp.rssi)
print("My IP address is", esp.pretty_ip(esp.ip_address))
esp.disconnect()
time.sleep(1.0)

print("\nrepeat with test version of connect_AP\n")

print("Connecting to AP...")
esp.testing_connect(secrets["ssid"], secrets["password"])
print(f'{esp.status =}')
print("Connected to", str(esp.ssid, "utf-8"), "\tRSSI:", esp.rssi)
print("My IP address is", esp.pretty_ip(esp.ip_address))

esp.disconnect()
time.sleep(1.0)

try:
    esp.testing_connect(secrets["ssid"], "BAD PASSWORD")
except ConnectionError as exc:
    print(f'Error when SSID exists, but using wrong password: {exc}')
print(f'{esp.status =}')
time.sleep(1.0)

try:
    esp.testing_connect("NO SUCH SSID HERE", secrets["password"])
except ConnectionError as exc:
    print(f'Error when SSID does not exist: {exc}')
print(f'{esp.status =}')
time.sleep(1.0)

esp.testing_connect(secrets["ssid"], secrets["password"])
print(f'{esp.status =}')
print("Connected to", str(esp.ssid, "utf-8"), "\tRSSI:", esp.rssi)
print("My IP address is", esp.pretty_ip(esp.ip_address))

print("Done!")
main.py output:
ESP32 SPI AP connection test
  Board ID: pyportal
  Implementation: (name='circuitpython', version=(8, 2, 10), mpy=517)
  Platform: MicroChip SAMD51
ESP32 found and in idle mode
  Firmware vers. 1.7.7     
  MAC addr: 14:48:57:12:CF:A4
Connecting to AP...
esp.status =3
Connected to Dungeon    RSSI: -44
My IP address is 192.168.2.25
Error when SSID exists, but using wrong password: ('No such ssid', b'Dungeon')
esp.status =1
Error when SSID does not exist: ('Failed to connect to ssid', b'NO SUCH SSID HERE')
esp.status =4
esp.status =3
Connected to Dungeon    RSSI: -45
My IP address is 192.168.2.25

repeat with test version of connect_AP

Connecting to AP...
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.193848
  0: 6 at 0.0
  1: 1 at 0.193848
  2: 3 at 1.98193
  3: 3 at 0.0
esp.status =3
Connected to Dungeon    RSSI: -45
My IP address is 192.168.2.25
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.217285
  0: 6 at 0.0
  1: 1 at 0.217285
  2: 1 at 10.0059
Error when SSID exists, but using wrong password: ('No such ssid', b'Dungeon')
esp.status =1
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.20166
  0: 1 at 0.0
  1: 6 at 0.20166
  2: 4 at 1.93311
  3: 4 at 8.07324
Error when SSID does not exist: ('Failed to connect to ssid', b'NO SUCH SSID HERE')
esp.status =4
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.222168
  0: 4 at 0.0
  1: 1 at 0.222168
  2: 3 at 2.13477
  3: 3 at 0.0
esp.status =3
Connected to Dungeon    RSSI: -45
My IP address is 192.168.2.25
Done!
anecdata commented 8 months ago

The ESP32SPI library sends the command to connect to the NINA firmware, and handles any errors here: https://github.com/adafruit/Adafruit_CircuitPython_ESP32SPI/blob/3042d197e9c59ba907e614e1fdd6171b2dbd2028/adafruit_esp32spi/adafruit_esp32spi.py#L604

In the ESP32SPI library, code (1) is No SSID, code (4) is Connection failed, (3) is Connected - these seem to match NINA / Arduino codes: https://github.com/adafruit/nina-fw/blob/b1145b466d65eee8d510b4a0cd7dab7783eb29c5/arduino/libraries/WiFi/src/WiFi.h#L32

The NINA firmware command handler calls the Arduino function WiFi.begin here: https://github.com/adafruit/nina-fw/blob/b1145b466d65eee8d510b4a0cd7dab7783eb29c5/main/CommandHandler.cpp#L78

Pretty much everything at that level or above is just passing along commands and returning responses.

The actual connect logic is in the Arduino WiFi component included in the NINA firmware: https://github.com/adafruit/nina-fw/blob/b1145b466d65eee8d510b4a0cd7dab7783eb29c5/arduino/libraries/WiFi/src/WiFi.cpp#L169 and in the event handler: https://github.com/adafruit/nina-fw/blob/b1145b466d65eee8d510b4a0cd7dab7783eb29c5/arduino/libraries/WiFi/src/WiFi.cpp#L594

I'm not sure which aspects of the WiFi.begin function would trigger one or more events that the event handler is catching, although if a station becomes disconnected, the event handler will report WL_CONNECT_FAILED for No AP Found and Auth Fail (which doesn't seem entirely accurate though a Disconnect might imply a prior connection and therefore the SSID did exist ...a little murky).

mMerlin commented 8 months ago

The ESP32SPI library sends the command to connect to the NINA firmware, and handles any errors here:

I am familiar with that block of code. I reproduced a variation of it as part of the testing that went into the original post. Note though that the 'handling' that does is only after the 10 second timeout. It looks at the final status. The test code that I included monitors changes in the status while the while loop is still happening. My code is not a solution. Just research to get a better idea of what is happening. That shows that:

Analysis: Whatever setting the passphrase does, it results in a new status. That is typically 1 (WL_NO_SSID_AVAIL). The exception was the case when the ssid used does not exist (in range). That case could be interaction with the previous failed connection attempt. To verify, I changed the timeout from 10 seconds to 20 seconds, and reran just the testing_connect portion of the sample code.

main.py output:
ESP32 SPI AP connection test
  Board ID: pyportal
  Implementation: (name='circuitpython', version=(8, 2, 10), mpy=517)
  Platform: MicroChip SAMD51
ESP32 found and in idle mode
  Firmware vers. 1.7.7     
  MAC addr: 14:48:57:12:CF:A4
With test version of connect_AP
Connecting to AP...
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.202026
  0: 0 at 0.0
  1: 1 at 0.202026
  2: 3 at 7.315
  3: 3 at 0.0
esp.status =3
Connected to Dungeon    RSSI: -62
My IP address is 192.168.2.25
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.21698
  0: 6 at 0.0
  1: 1 at 0.21698
  2: 6 at 13.104
  3: 6 at 6.90503
Error when SSID exists, but using wrong password: ('Failed to connect to ssid', b'Dungeon')
esp.status =6
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.192993
  0: 6 at 0.0
  1: 1 at 0.192993
  2: 4 at 1.93201
  3: 4 at 18.077
Error when SSID does not exist: ('Failed to connect to ssid', b'NO SUCH SSID HERE')
esp.status =4
type(self).__name__ =ExtendedSPIControl in testing version of connect_ap()
set_passphrase elapsed: 0.204956
  0: 4 at 0.0
  1: 1 at 0.204956
  2: 3 at 1.93103
  3: 3 at 0.0
esp.status =3
Connected to Dungeon    RSSI: -65
My IP address is 192.168.2.25

The important difference in that is that the bad password case detects a status change to 6 (WL_DISCONNECTED) 13.104 seconds after the passphrase is set, AND the following case where the ssid does not exist goes to status 1 (WL_NO_SSID_AVAIL) when setting the passphrase.

It takes more than 10 seconds for an attempt to connect using a bad passphrase to timeout and disconnect (on the esp32 side). Analysing the pattern of status changes, WL_NO_SSID_AVAIL is NEVER a final status for a connection attempt. That status means that some operation is still pending.

With the longer timeout, which prevents interaction with the failed (still failing) previous connection attempt, setting the passphrase always results in a status of 1 (WL_NO_SSID_AVAIL). The actual result status is whatever that changes to, whenever it changes. The case for the AP not being in range can be sped up significantly by watching for that status change, instead of waiting for the timeout. In the existing structure, timing out (with a status of 1) actually means that an attempt is still in progress.

Assessment: Looking over the patterns from those tests, here is a naive, shell for a connect_AP replacement that handles those cases. It is likely not sufficient as is, since I do not know enough about what other conditions might be encountered. Simply adding WL_CONNECTION_LOST to the unable to connect case might cover it.

    def fixed_connect_AP(self, ssid:str, password:str, timeout_s:int=10) -> int:  # pylint:disable=invalid-name
        """alternate structure"""
        # converting bytes to bytes does not change anything. No need to test before forcing.
        if self.status == wl_const.WL_NO_SSID_AVAIL:
            print('Could be a previous operation still in progress. Handle it')
            # send spi command to abort operation? and wait for 'better' status?
            raise ConnectionError("Operation pending: aborting connection to ssid", ssid)
        if isinstance(ssid, str):
            ssid = bytes(ssid, "utf-8")
        if password:
            if isinstance(password, str):
                password = bytes(password, "utf-8")
            self.wifi_set_passphrase(ssid, password)
        else:
            self.wifi_set_network(ssid)
        stat = self.status
        if stat != wl_const.WL_NO_SSID_AVAIL:
            print('Something strange: status should be "NO_SSID_AVAIL" immediately after '
                  'setting credentials')
            raise OSError(f'{stat:02X} status detected setting WiFi Credentials. ' +
                          f'Expecting only {wl_const.WL_NO_SSID_AVAIL:02X}')
        times = time.monotonic()
        while (time.monotonic() - times) < timeout_s:  # wait until have result or timeout
            stat = self.status
            if stat != wl_const.WL_NO_SSID_AVAIL:
                # the state has changed, we have some sort of result
                break
            time.sleep(0.05)
        if stat == wl_const.WL_CONNECTED:
            return stat
        if stat == wl_const.WL_NO_SSID_AVAIL:
            # possible bad password, weak signal, something else causing slow response
            raise ConnectionError("Timeout attempting to connect to ssid", ssid)
        if stat == wl_const.WL_DISCONNECTED:
            raise ConnectionError("Unable to connect to ssid", ssid)
        if stat == wl_const.WL_CONNECT_FAILED:
            raise ConnectionError("Unable to locate ssid", ssid)
        raise OSError(f"Unknown error {stat:02X}")

Using that version in my test code, with a 20 seoncd timout gives:

With test version of connect_AP
Connecting to AP...
esp.status =3
Connected to Dungeon    RSSI: -57
My IP address is 192.168.2.25
Error when SSID exists, but using wrong password: ('Unable to connect to ssid', b'Dungeon')
esp.status =6
Error when SSID does not exist: ('Unable to locate ssid', b'NO SUCH SSID HERE')
esp.status =4
esp.status =3
Connected to Dungeon    RSSI: -52
My IP address is 192.168.2.25
Done!

With a 10 second timeout:

With test version of connect_AP
Connecting to AP...
esp.status =3
Connected to Dungeon    RSSI: -52
My IP address is 192.168.2.25
Error when SSID exists, but using wrong password: ('Timeout attempting to connect to ssid', b'Dungeon')
esp.status =1
Could be a previous operation still in progress. Handle it
Error when SSID does not exist: ('Operation pending: aborting connection to ssid', 'NO SUCH SSID HERE')
esp.status =1
Could be a previous operation still in progress. Handle it
Traceback (most recent call last):
  File "main.py", line 183, in <module>
  File "main.py", line 65, in fixed_connect_AP
ConnectionError: ('Operation pending: aborting connection to ssid', 'Dungeon')

With a 10 second timeout and increasing the sleep time between connection attempts to 5 seconds:

With test version of connect_AP
Connecting to AP...
esp.status =3
Connected to Dungeon    RSSI: -60
My IP address is 192.168.2.25
Error when SSID exists, but using wrong password: ('Timeout attempting to connect to ssid', b'Dungeon')
esp.status =1
Error when SSID does not exist: ('Unable to locate ssid', b'NO SUCH SSID HERE')
esp.status =4
esp.status =3
Connected to Dungeon    RSSI: -59
My IP address is 192.168.2.25
Done!