earlephilhower / arduino-pico

Raspberry Pi Pico Arduino core, for all RP2040 and RP2350 boards
GNU Lesser General Public License v2.1
1.98k stars 412 forks source link

LwIP Integration questioned #2430

Closed HamzaHajeir closed 1 week ago

HamzaHajeir commented 1 week ago

Hi

I've asked a question on Discussions while ago about an issue regarding arduino-pico integration which ended up with no conclusion.

However, I've managed to reproduce the issue with raw LwIP APIs, exterminating any other dependency to create an MCVE to replicate the issue.

MCVE:

#include <string>

#include <lwip/tcp.h>

#include <Arduino.h>
#include <WiFi.h>

#define BURST_SIZE 20
#define MSG_SIZE TCP_SND_BUF / BURST_SIZE
#define YOUR_SSID "XXXXXXXX"
#define YOUR_PWD "XXXXXXXX"

struct tcp_pcb* pcb;

IPAddress ip(192,168,100,4);
#define REMOTE_PORT     8007
bool cnx=false;
bool issueSend = false;
uint32_t freeHeap() { return rp2040.getFreeHeap(); }
uint32_t maxBlock() { return freeHeap()/2; }
template<int I,typename... Args>
void _PRINTF(const char* fmt, Args... args) {
    Serial.printf(std::string(std::string("H4AT:% 7lu: H=%u M=%u ")+fmt).c_str(),millis(),freeHeap(),maxBlock(),args...);
}

#define PRINTF(...) _PRINTF<0>(__VA_ARGS__)

struct Data {
    uint8_t* data;
    uint16_t len;
};
std::queue<Data> rxQueue;

uint8_t msg[MSG_SIZE];

void popQueue(std::queue<Data>& Q) {
  if (Q.size()) {
    auto &d = Q.front();
    free(d.data);
    Q.pop();
  }
}
void _tcp_error(void* arg, err_t err) {
    PRINTF("err=%d\n", err);
    cnx=false;
}

err_t _tcp_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) {
    PRINTF("_tcp_recv %p %p %d\n", tpcb, p, err);
    if (p){ 
        PRINTF("data %p tot_len %d\n", p->payload, p->tot_len);

        auto cpydata=static_cast<uint8_t*>(malloc(p->tot_len));
        if (cpydata) {
            pbuf_copy_partial(p,cpydata,p->tot_len,0);
            rxQueue.push({cpydata,p->tot_len});
        }

        tcp_recved(tpcb, p->tot_len);
        pbuf_free(p);

    } else {
        PRINTF("Closed!\n");
        cnx=false;
    }
    return ERR_OK;
}
err_t _tcp_sent(void* arg, struct tcp_pcb* p, u16_t len) {
    Serial.printf("_tcp_sent %d\n", len);
    return ERR_OK;
}

err_t _tcp_connected(void* arg, void* tpcb, err_t err){
    Serial.printf("Connected to echoserver\n");
    cnx = true;
    issueSend = true;
    tcp_nagle_enable(pcb);
    tcp_recv(pcb, _tcp_recv);
    tcp_sent(pcb, _tcp_sent);
    return ERR_OK;
}

void sendData(const uint8_t* data,size_t len,bool copy){
    size_t  bytesSent=0;
    size_t  bytesRemaining=len;

    while(bytesRemaining){
        size_t available=tcp_sndbuf(pcb);
        PRINTF("H=%lu available=%d\n", rp2040.getFreeHeap(), available);
        if(available && (tcp_sndqueuelen(pcb) < TCP_SND_QUEUELEN )){ // if there is any space at all (and space in Q!), use it
            auto chunk=std::min(bytesRemaining,available); // send as much as will fit
            auto err=tcp_write(pcb,data+bytesSent,chunk,TCP_WRITE_FLAG_COPY);
            // error handling deliberately omitted
            PRINTF("(e=%d) AVAILABLE=%d SENT=%d REMAINING=%d QL=%d\n",err,available,bytesSent,bytesRemaining,tcp_sndqueuelen(pcb));
            err=tcp_output(pcb);
            if(err) PRINTF("ERROR! tcp_output returns %d\n",err);
            else {
              bytesSent+=chunk;
              bytesRemaining-=chunk;
            }
        }
    }
}

void setup(){
    Serial.begin(921600);
    WiFi.mode(WIFI_STA);
    WiFi.beginNoBlock(YOUR_SSID, YOUR_PWD);
    while (WiFi.waitForConnectResult() != WL_CONNECTED){
        Serial.println("Connection Failed! Rebooting...");
        Serial.print(".");
        delay(5000);
        rp2040.reboot();
    }
    Serial.printf("\nIP: %s\n", WiFi.localIP().toString().c_str());
    pcb=tcp_new();
    tcp_err(pcb,_tcp_error);
    Serial.printf("MSS=%d SND_BUF=%d WND=%d SND_QUEUELEN=%d\n",TCP_MSS,TCP_SND_BUF,TCP_WND,TCP_SND_QUEUELEN);

    ip_addr_t ipt;
    ip_addr_set_ip4_u32(&ipt, ip);
    err_t err = tcp_connect(pcb, &ipt, REMOTE_PORT,(tcp_connected_fn)&_tcp_connected);
}

void loop(){
    static uint32_t prevmillis = millis();
    if (millis() - prevmillis >= 500){
        issueSend = true;
        prevmillis = millis();
    }
    if (issueSend){
#define SEND_EACH_LOOP 1
#if SEND_EACH_LOOP
        static int I = 0;
        if (I<BURST_SIZE) {
            sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
            I++;
        } else {
            issueSend=false;
            I=0;
        }
#else
        for (int i = 0; i < BURST_SIZE; i++)
            sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
        issueSend = false;
#endif
    }
    if (rxQueue.size()){
        auto d = rxQueue.front();

        /* process data on loop */
        Serial.printf("Processing Data %p %d\n", d.data, d.len);

        /* Free it */
        popQueue(rxQueue);
    }
}

platformio.ini:

[common]
platform = https://github.com/maxgerhardt/platform-raspberrypi.git#f519f650cbc787f5f58660b6e1bcf71c5083ecb8
platform_packages =
   framework-arduinopico@https://github.com/earlephilhower/arduino-pico.git#3.9.5
framework = arduino
board_build.core = earlephilhower
monitor_speed = 921600
monitor_eol = LF
lib_ldf_mode = deep+
board_build.filesystem_size = 0.5m
upload_port = F:
debug_build_flags = -Og -ggdb3 -g3
    ; -DPIO_FRAMEWORK_ARDUINO_NO_USB

debug_tool = cmsis-dap

[env:pico]
extends = common
board = rpipicow

build_flags = ${common.build_flags}
    '-DARDUINO_BOARD="RASPBERRY_PI_PICO_W"'

ECHO SERVER:

import asyncore
import socket

class EchoHandler(asyncore.dispatcher_with_send):

    def handle_read(self):
        data = self.recv(32768)
        print("RX ",len(data))
        #print(data)

        if(data):
            print("TX ",len(data))
            self.send(data)

class EchoServer(asyncore.dispatcher):

    def __init__(self, host, port):
        asyncore.dispatcher.__init__(self)
        self.create_socket(socket.AF_INET, socket.SOCK_STREAM)
        self.set_reuse_addr()
        self.bind((host, port))
        self.listen(5)

    def handle_accept(self):
        pair = self.accept()
        if pair is not None:
            sock, addr = pair
            print ('Incoming connection from %s' % repr(addr))
            handler = EchoHandler(sock)

server = EchoServer('0.0.0.0', 8007)
print("Start")
asyncore.loop()

Symptoms:

The issue appears as an infinite loop inside mallinfo which calculates the memory information, including used heap size. After a while (1 minute - 30 minutes mostly)

Inserting this code into the body of _tcp_recv callback accelerates the issue appearance:

        for (int i=0; i<10; i++) {
            auto d = static_cast<uint8_t*>(malloc(p->tot_len));
            rxQueue.push({d,p->tot_len});
            delay(10);
        }

CALL STACK:

``` isr_hardfault@0x100030c4 (Unknown Source:98) @0xfffffff9 (Unknown Source:0) __malloc_update_mallinfo@0x1001cabe (\__malloc_update_mallinfo.dbgasm:32) _mallinfo_r@0x1001cae4 (\_mallinfo_r.dbgasm:7) mallinfo@0x10018380 (\mallinfo.dbgasm:7) RP2040::getUsedHeap@0x100033c4 (c:\Users\Hamza\.platformio\packages\framework-arduinopico\cores\rp2040\RP2040Support.h:238) RP2040::getFreeHeap@0x100033c4 (c:\Users\Hamza\.platformio\packages\framework-arduinopico\cores\rp2040\RP2040Support.h:234) sendData@0x10003a5a (c:\Users\Hamza\Documents\PlatformIO\Projects\RPi Pico\src\main.cpp:456) loop@0x10003de2 (c:\Users\Hamza\Documents\PlatformIO\Projects\RPi Pico\src\main.cpp:534) main@0x10006302 (c:\Users\Hamza\.platformio\packages\framework-arduinopico\cores\rp2040\main.cpp:150) ``` Note: the ignore line numbers, as the project is being reused for multiple sketches using preprocessors.

__malloc_update_mallinfo.dbgasm:

``` 0x1001ca80: 03 21 movs r1, #3 0x1001ca82: 12 4b ldr r3, [pc, #72] @ (0x1001cacc <__malloc_update_mallinfo+76>) 0x1001ca84: f0 b5 push {r4, r5, r6, r7, lr} 0x1001ca86: 9a 68 ldr r2, [r3, #8] 0x1001ca88: 52 68 ldr r2, [r2, #4] 0x1001ca8a: 8a 43 bics r2, r1 0x1001ca8c: 14 00 movs r4, r2 0x1001ca8e: 02 39 subs r1, #2 0x1001ca90: 0f 2a cmp r2, #15 0x1001ca92: 00 dc bgt.n 0x1001ca96 <__malloc_update_mallinfo+22> 0x1001ca94: 00 21 movs r1, #0 0x1001ca96: fe 20 movs r0, #254 @ 0xfe 0x1001ca98: 03 27 movs r7, #3 0x1001ca9a: 80 00 lsls r0, r0, #2 0x1001ca9c: 1e 18 adds r6, r3, r0 0x1001ca9e: 58 69 ldr r0, [r3, #20] 0x1001caa0: 08 33 adds r3, #8 0x1001caa2: 98 42 cmp r0, r3 0x1001caa4: 0b d1 bne.n 0x1001cabe <__malloc_update_mallinfo+62> 0x1001caa6: 86 42 cmp r6, r0 0x1001caa8: 07 d1 bne.n 0x1001caba <__malloc_update_mallinfo+58> 0x1001caaa: 09 4b ldr r3, [pc, #36] @ (0x1001cad0 <__malloc_update_mallinfo+80>) 0x1001caac: 59 60 str r1, [r3, #4] 0x1001caae: 19 68 ldr r1, [r3, #0] 0x1001cab0: 1a 62 str r2, [r3, #32] 0x1001cab2: 89 1a subs r1, r1, r2 0x1001cab4: d9 61 str r1, [r3, #28] 0x1001cab6: 5c 62 str r4, [r3, #36] @ 0x24 0x1001cab8: f0 bd pop {r4, r5, r6, r7, pc} 0x1001caba: 03 00 movs r3, r0 0x1001cabc: ef e7 b.n 0x1001ca9e <__malloc_update_mallinfo+30> |>>>> 0x1001cabe: 45 68 ldr r5, [r0, #4] 0x1001cac0: 01 31 adds r1, #1 0x1001cac2: bd 43 bics r5, r7 0x1001cac4: c0 68 ldr r0, [r0, #12] 0x1001cac6: 52 19 adds r2, r2, r5 0x1001cac8: eb e7 b.n 0x1001caa2 <__malloc_update_mallinfo+34> 0x1001caca: c0 46 nop @ (mov r8, r8) 0x1001cacc: c4 13 asrs r4, r0, #15 0x1001cace: 00 20 movs r0, #0 0x1001cad0: 70 1c adds r0, r6, #1 0x1001cad2: 00 20 movs r0, #0 ```

The application asks periodically for the heap size for internal management. Therefor more chances of hanging happens.

Running With Arduino Pico 4.0.1:

Running with the newest release shows a different breaking point: Breaks in free() call.

CALL STACK:

``` isr_svcall@0x100030c6 (Unknown Source:89) @0xfffffff9 (Unknown Source:0) _free_r@0x1001b5d4 (\_free_r.dbgasm:62) free@0x100175ce (\free.dbgasm:6) __wrap_free@0x10005e66 (c:\Users\Hamza\.platformio\packages\framework-arduinopico\cores\rp2040\malloc-lock.cpp:103) popQueue@0x10003904 (c:\Users\Hamza\Documents\PlatformIO\Projects\RPi Pico\src\main.cpp:317) loop@0x100039b8 (c:\Users\Hamza\Documents\PlatformIO\Projects\RPi Pico\src\main.cpp:440) main@0x10005d7a (c:\Users\Hamza\.platformio\packages\framework-arduinopico\cores\rp2040\main.cpp:150) ```

_free_r.dbgasm:

``` 0x1001b554: f8 b5 push {r3, r4, r5, r6, r7, lr} 0x1001b556: 04 00 movs r4, r0 0x1001b558: 0d 1e subs r5, r1, #0 0x1001b55a: 2c d0 beq.n 0x1001b5b6 <_free_r+98> 0x1001b55c: fc f7 50 fa bl 0x10017a00 <__malloc_lock> 0x1001b560: 29 00 movs r1, r5 0x1001b562: 08 39 subs r1, #8 0x1001b564: 4f 68 ldr r7, [r1, #4] 0x1001b566: 01 26 movs r6, #1 0x1001b568: 3b 00 movs r3, r7 0x1001b56a: 03 22 movs r2, #3 0x1001b56c: b3 43 bics r3, r6 0x1001b56e: c8 18 adds r0, r1, r3 0x1001b570: 45 68 ldr r5, [r0, #4] 0x1001b572: 95 43 bics r5, r2 0x1001b574: 4e 4a ldr r2, [pc, #312] @ (0x1001b6b0 <_free_r+348>) 0x1001b576: 96 68 ldr r6, [r2, #8] 0x1001b578: b4 46 mov r12, r6 0x1001b57a: 01 26 movs r6, #1 0x1001b57c: 37 40 ands r7, r6 0x1001b57e: 84 45 cmp r12, r0 0x1001b580: 1a d1 bne.n 0x1001b5b8 <_free_r+100> 0x1001b582: 5b 19 adds r3, r3, r5 0x1001b584: 00 2f cmp r7, #0 0x1001b586: 06 d1 bne.n 0x1001b596 <_free_r+66> 0x1001b588: 08 68 ldr r0, [r1, #0] 0x1001b58a: 09 1a subs r1, r1, r0 0x1001b58c: 8d 68 ldr r5, [r1, #8] 0x1001b58e: 1b 18 adds r3, r3, r0 0x1001b590: c8 68 ldr r0, [r1, #12] 0x1001b592: e8 60 str r0, [r5, #12] 0x1001b594: 85 60 str r5, [r0, #8] 0x1001b596: 01 20 movs r0, #1 0x1001b598: 18 43 orrs r0, r3 0x1001b59a: 48 60 str r0, [r1, #4] 0x1001b59c: 91 60 str r1, [r2, #8] 0x1001b59e: 45 4a ldr r2, [pc, #276] @ (0x1001b6b4 <_free_r+352>) 0x1001b5a0: 12 68 ldr r2, [r2, #0] 0x1001b5a2: 9a 42 cmp r2, r3 0x1001b5a4: 04 d8 bhi.n 0x1001b5b0 <_free_r+92> 0x1001b5a6: 44 4b ldr r3, [pc, #272] @ (0x1001b6b8 <_free_r+356>) 0x1001b5a8: 20 00 movs r0, r4 0x1001b5aa: 19 68 ldr r1, [r3, #0] 0x1001b5ac: ff f7 84 ff bl 0x1001b4b8 <_malloc_trim_r> 0x1001b5b0: 20 00 movs r0, r4 0x1001b5b2: fc f7 2d fa bl 0x10017a10 <__malloc_unlock> 0x1001b5b6: f8 bd pop {r3, r4, r5, r6, r7, pc} 0x1001b5b8: 00 26 movs r6, #0 0x1001b5ba: 45 60 str r5, [r0, #4] 0x1001b5bc: b4 46 mov r12, r6 0x1001b5be: b7 42 cmp r7, r6 0x1001b5c0: 09 d1 bne.n 0x1001b5d6 <_free_r+130> 0x1001b5c2: 0f 68 ldr r7, [r1, #0] 0x1001b5c4: 3d 4e ldr r6, [pc, #244] @ (0x1001b6bc <_free_r+360>) 0x1001b5c6: c9 1b subs r1, r1, r7 0x1001b5c8: db 19 adds r3, r3, r7 0x1001b5ca: 8f 68 ldr r7, [r1, #8] 0x1001b5cc: b7 42 cmp r7, r6 0x1001b5ce: 17 d0 beq.n 0x1001b600 <_free_r+172> 0x1001b5d0: ce 68 ldr r6, [r1, #12] 0x1001b5d2: fe 60 str r6, [r7, #12] |>>>> 0x1001b5d4: b7 60 str r7, [r6, #8] 0x1001b5d6: 46 19 adds r6, r0, r5 0x1001b5d8: 77 68 ldr r7, [r6, #4] 0x1001b5da: 01 26 movs r6, #1 0x1001b5dc: 37 42 tst r7, r6 0x1001b5de: 15 d1 bne.n 0x1001b60c <_free_r+184> 0x1001b5e0: 67 46 mov r7, r12 0x1001b5e2: 5b 19 adds r3, r3, r5 0x1001b5e4: 85 68 ldr r5, [r0, #8] 0x1001b5e6: 00 2f cmp r7, #0 0x1001b5e8: 0d d1 bne.n 0x1001b606 <_free_r+178> 0x1001b5ea: 34 4f ldr r7, [pc, #208] @ (0x1001b6bc <_free_r+360>) 0x1001b5ec: bd 42 cmp r5, r7 0x1001b5ee: 0a d1 bne.n 0x1001b606 <_free_r+178> 0x1001b5f0: 1e 43 orrs r6, r3 0x1001b5f2: 51 61 str r1, [r2, #20] 0x1001b5f4: 11 61 str r1, [r2, #16] 0x1001b5f6: cd 60 str r5, [r1, #12] 0x1001b5f8: 8d 60 str r5, [r1, #8] 0x1001b5fa: 4e 60 str r6, [r1, #4] 0x1001b5fc: cb 50 str r3, [r1, r3] 0x1001b5fe: d7 e7 b.n 0x1001b5b0 <_free_r+92> 0x1001b600: 01 26 movs r6, #1 0x1001b602: b4 46 mov r12, r6 0x1001b604: e7 e7 b.n 0x1001b5d6 <_free_r+130> 0x1001b606: c0 68 ldr r0, [r0, #12] 0x1001b608: e8 60 str r0, [r5, #12] 0x1001b60a: 85 60 str r5, [r0, #8] 0x1001b60c: 01 25 movs r5, #1 0x1001b60e: 28 00 movs r0, r5 0x1001b610: 18 43 orrs r0, r3 0x1001b612: 48 60 str r0, [r1, #4] 0x1001b614: 60 46 mov r0, r12 0x1001b616: cb 50 str r3, [r1, r3] 0x1001b618: 00 28 cmp r0, #0 0x1001b61a: c9 d1 bne.n 0x1001b5b0 <_free_r+92> 0x1001b61c: 80 20 movs r0, #128 @ 0x80 0x1001b61e: 80 00 lsls r0, r0, #2 0x1001b620: 83 42 cmp r3, r0 0x1001b622: 0d d2 bcs.n 0x1001b640 <_free_r+236> 0x1001b624: d8 08 lsrs r0, r3, #3 0x1001b626: 5b 09 lsrs r3, r3, #5 0x1001b628: 9d 40 lsls r5, r3 0x1001b62a: 53 68 ldr r3, [r2, #4] 0x1001b62c: 1d 43 orrs r5, r3 0x1001b62e: c3 00 lsls r3, r0, #3 0x1001b630: 9b 18 adds r3, r3, r2 0x1001b632: 55 60 str r5, [r2, #4] 0x1001b634: 9a 68 ldr r2, [r3, #8] 0x1001b636: cb 60 str r3, [r1, #12] 0x1001b638: 8a 60 str r2, [r1, #8] 0x1001b63a: 99 60 str r1, [r3, #8] 0x1001b63c: d1 60 str r1, [r2, #12] 0x1001b63e: b7 e7 b.n 0x1001b5b0 <_free_r+92> 0x1001b640: 5d 0a lsrs r5, r3, #9 0x1001b642: 04 2d cmp r5, #4 0x1001b644: 12 d8 bhi.n 0x1001b66c <_free_r+280> 0x1001b646: 98 09 lsrs r0, r3, #6 0x1001b648: 38 30 adds r0, #56 @ 0x38 0x1001b64a: c6 00 lsls r6, r0, #3 0x1001b64c: b6 18 adds r6, r6, r2 0x1001b64e: b5 68 ldr r5, [r6, #8] 0x1001b650: 03 27 movs r7, #3 0x1001b652: ae 42 cmp r6, r5 0x1001b654: 25 d1 bne.n 0x1001b6a2 <_free_r+334> 0x1001b656: 01 23 movs r3, #1 0x1001b658: 80 10 asrs r0, r0, #2 0x1001b65a: 83 40 lsls r3, r0 0x1001b65c: 50 68 ldr r0, [r2, #4] 0x1001b65e: 03 43 orrs r3, r0 0x1001b660: 53 60 str r3, [r2, #4] 0x1001b662: ce 60 str r6, [r1, #12] 0x1001b664: 8d 60 str r5, [r1, #8] 0x1001b666: b1 60 str r1, [r6, #8] 0x1001b668: e9 60 str r1, [r5, #12] 0x1001b66a: a1 e7 b.n 0x1001b5b0 <_free_r+92> 0x1001b66c: 14 2d cmp r5, #20 0x1001b66e: 02 d8 bhi.n 0x1001b676 <_free_r+290> 0x1001b670: 28 00 movs r0, r5 0x1001b672: 5b 30 adds r0, #91 @ 0x5b 0x1001b674: e9 e7 b.n 0x1001b64a <_free_r+246> 0x1001b676: 54 2d cmp r5, #84 @ 0x54 0x1001b678: 02 d8 bhi.n 0x1001b680 <_free_r+300> 0x1001b67a: 18 0b lsrs r0, r3, #12 0x1001b67c: 6e 30 adds r0, #110 @ 0x6e 0x1001b67e: e4 e7 b.n 0x1001b64a <_free_r+246> 0x1001b680: aa 20 movs r0, #170 @ 0xaa 0x1001b682: 40 00 lsls r0, r0, #1 0x1001b684: 85 42 cmp r5, r0 0x1001b686: 02 d8 bhi.n 0x1001b68e <_free_r+314> 0x1001b688: d8 0b lsrs r0, r3, #15 0x1001b68a: 77 30 adds r0, #119 @ 0x77 0x1001b68c: dd e7 b.n 0x1001b64a <_free_r+246> 0x1001b68e: 0c 4e ldr r6, [pc, #48] @ (0x1001b6c0 <_free_r+364>) 0x1001b690: 7e 20 movs r0, #126 @ 0x7e 0x1001b692: b5 42 cmp r5, r6 0x1001b694: d9 d8 bhi.n 0x1001b64a <_free_r+246> 0x1001b696: 98 0c lsrs r0, r3, #18 0x1001b698: 7c 30 adds r0, #124 @ 0x7c 0x1001b69a: d6 e7 b.n 0x1001b64a <_free_r+246> 0x1001b69c: ad 68 ldr r5, [r5, #8] 0x1001b69e: ae 42 cmp r6, r5 0x1001b6a0: 03 d0 beq.n 0x1001b6aa <_free_r+342> 0x1001b6a2: 6a 68 ldr r2, [r5, #4] 0x1001b6a4: ba 43 bics r2, r7 0x1001b6a6: 9a 42 cmp r2, r3 0x1001b6a8: f8 d8 bhi.n 0x1001b69c <_free_r+328> 0x1001b6aa: ee 68 ldr r6, [r5, #12] 0x1001b6ac: d9 e7 b.n 0x1001b662 <_free_r+270> 0x1001b6ae: c0 46 nop @ (mov r8, r8) 0x1001b6b0: 0c 10 asrs r4, r1, #32 0x1001b6b2: 00 20 movs r0, #0 0x1001b6b4: 08 10 asrs r0, r1, #32 0x1001b6b6: 00 20 movs r0, #0 0x1001b6b8: f4 18 adds r4, r6, r3 0x1001b6ba: 00 20 movs r0, #0 0x1001b6bc: 14 10 asrs r4, r2, #32 0x1001b6be: 00 20 movs r0, #0 0x1001b6c0: 54 05 lsls r4, r2, #21 0x1001b6c2: 00 00 movs r0, r0 ```

Analysis:

The issue presents when a memory is being allocated within _tcp_recv callback by malloc and freed on main loop, even without pbuf_copy_partial().

Hypothesis:

The arduino integration wraps the raw LwIP APIs to lock the async_context that runs lwip context. Where all of these are of calling from app down to LwIP context.

The wrapping file does not show any other-way around wrapping, the LwIP core asserts the input calls to be processed with locked context tcp_input() ip4_input() ethernet_input() netif_input().

This alone does not explain the root cause, but could contribute to.

earlephilhower commented 1 week ago

Once I fixed the small syntax error in the sketch (std::queue<Data> vs std::queue<struct Data>) and converted it to run on my currently hooked up W5100 board using IRQ-based packet processing (same code paths as the Pico W in lwipEthernet) it's been running for 35 minutes so far without incident.

#include <string>
#include <lwip/tcp.h>
#include <Arduino.h>
#include <WiFi.h>
#include <W5100lwIP.h>

// To use Interrupt-driven mode, pass in an SPI object and an IRQ pin like so:
Wiznet5100lwIP eth(17, SPI, 21);

#define BURST_SIZE 20
#define MSG_SIZE TCP_SND_BUF / BURST_SIZE
#define YOUR_SSID "XXXXXXXX"
#define YOUR_PWD "XXXXXXXX"

struct tcp_pcb* pcb;

IPAddress ip(192,168,1,8);
#define REMOTE_PORT   8007
bool cnx=false;
bool issueSend = false;
uint32_t freeHeap() { return rp2040.getFreeHeap(); }
uint32_t maxBlock() { return freeHeap()/2; }
template<int I,typename... Args>
void _PRINTF(const char* fmt, Args... args) {
  Serial.printf(std::string(std::string("H4AT:% 7lu: H=%u M=%u ")+fmt).c_str(),millis(),freeHeap(),maxBlock(),args...);
}

#define PRINTF(...) _PRINTF<0>(__VA_ARGS__)

struct Data {
  uint8_t* data;
  uint16_t len;
} ;
std::queue<Data> rxQueue;

uint8_t msg[MSG_SIZE];

void popQueue(std::queue<struct Data>& Q) {
  if (Q.size()) {
  auto &d = Q.front();
  free(d.data);
  Q.pop();
  }
}
void _tcp_error(void* arg, err_t err) {
  PRINTF("err=%d\n", err);
  cnx=false;
}

err_t _tcp_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) {
  PRINTF("_tcp_recv %p %p %d\n", tpcb, p, err);
  if (p){ 
    PRINTF("data %p tot_len %d\n", p->payload, p->tot_len);

    auto cpydata=static_cast<uint8_t*>(malloc(p->tot_len));
    if (cpydata) {
      pbuf_copy_partial(p,cpydata,p->tot_len,0);
      rxQueue.push({cpydata,p->tot_len});
    }

    tcp_recved(tpcb, p->tot_len);
    pbuf_free(p);

  } else {
    PRINTF("Closed!\n");
    cnx=false;
  }
  return ERR_OK;
}
err_t _tcp_sent(void* arg, struct tcp_pcb* p, u16_t len) {
  Serial.printf("_tcp_sent %d\n", len);
  return ERR_OK;
}

err_t _tcp_connected(void* arg, void* tpcb, err_t err){
  Serial.printf("Connected to echoserver\n");
  cnx = true;
  issueSend = true;
  tcp_nagle_enable(pcb);
  tcp_recv(pcb, _tcp_recv);
  tcp_sent(pcb, _tcp_sent);
  return ERR_OK;
}

void sendData(const uint8_t* data,size_t len,bool copy){
    size_t  bytesSent=0;
    size_t  bytesRemaining=len;

    while(bytesRemaining){
        size_t available=tcp_sndbuf(pcb);
    PRINTF("H=%lu available=%d\n", rp2040.getFreeHeap(), available);
        if(available && (tcp_sndqueuelen(pcb) < TCP_SND_QUEUELEN )){ // if there is any space at all (and space in Q!), use it
            auto chunk=std::min(bytesRemaining,available); // send as much as will fit
            auto err=tcp_write(pcb,data+bytesSent,chunk,TCP_WRITE_FLAG_COPY);
            // error handling deliberately omitted
            PRINTF("(e=%d) AVAILABLE=%d SENT=%d REMAINING=%d QL=%d\n",err,available,bytesSent,bytesRemaining,tcp_sndqueuelen(pcb));
            err=tcp_output(pcb);
            if(err) PRINTF("ERROR! tcp_output returns %d\n",err);
            else {
              bytesSent+=chunk;
              bytesRemaining-=chunk;
            }
        }
    }
}

void setup(){
  Serial.begin(921600);
  delay(5000);
  // Start the Ethernet port
  if (!eth.begin()) {
    Serial.println("No wired Ethernet hardware detected. Check pinouts, wiring.");
    while (1) {
      delay(1000);
    }
  }

  while (!eth.connected()) {
    Serial.print(".");
    delay(500);
  }  Serial.println("");
  Serial.println("Ethernet connected");
  Serial.println("IP address: ");
  Serial.println(eth.localIP());

  //WiFi.mode(WIFI_STA);
  //WiFi.beginNoBlock(YOUR_SSID, YOUR_PWD);
  //while (WiFi.waitForConnectResult() != WL_CONNECTED){
//    Serial.println("Connection Failed! Rebooting...");
    //Serial.print(".");
    //delay(5000);
    //rp2040.reboot();
  //}
  //Serial.printf("\nIP: %s\n", WiFi.localIP().toString().c_str());
  pcb=tcp_new();
  tcp_err(pcb,_tcp_error);
  Serial.printf("MSS=%d SND_BUF=%d WND=%d SND_QUEUELEN=%d\n",TCP_MSS,TCP_SND_BUF,TCP_WND,TCP_SND_QUEUELEN);

  ip_addr_t ipt;
  ip_addr_set_ip4_u32(&ipt, ip);
  err_t err = tcp_connect(pcb, &ipt, REMOTE_PORT,(tcp_connected_fn)&_tcp_connected);
}

void loop(){
  static uint32_t prevmillis = millis();
  if (millis() - prevmillis >= 500){
    issueSend = true;
    prevmillis = millis();
  }
  if (issueSend){
#define SEND_EACH_LOOP 1
#if SEND_EACH_LOOP
    static int I = 0;
    if (I<BURST_SIZE) {
      sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
      I++;
    } else {
      issueSend=false;
      I=0;
    }
#else
    for (int i = 0; i < BURST_SIZE; i++)
      sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
    issueSend = false;
#endif
  }
  if (rxQueue.size()){
    auto d = rxQueue.front();

    /* process data on loop */
    Serial.printf("Processing Data %p %d\n", d.data, d.len);

    /* Free it */
    popQueue(rxQueue);
  }
}

Giving output of the following over USB:

....
Processing Data 0x20011058 584
H4AT:2155185: H=191784 M=95892 H=191840 available=11680
H4AT:2155185: H=191760 M=95880 (e=0) AVAILABLE=11680 SENT=0 REMAINING=584 QL=1
Processing Data 0x200112a8 584
H4AT:2155206: H=191784 M=95892 H=191840 available=11680
H4AT:2155207: H=191760 M=95880 (e=0) AVAILABLE=11680 SENT=0 REMAINING=584 QL=1
Processing Data 0x20011058 584
....

Note that I have built using git master not 3.9.5 release, since I've got other things going on RP2350, and I used the IDE because I could never get P.IO to take my active git tree as the current version to use.

earlephilhower commented 1 week ago

Update: It's at 58 mins runtime without incident so far. I'll leave it sit overnight and if it's still good try and find an OG PicoW to replicate the experiment on. If that dies where this runs, then the issue is inside the CYW43 driver and not the infrastructure here which would be interesting.

HamzaHajeir commented 1 week ago

I think you can get PIO to work atop of git master, but required to remove the present one under .platformio/packages DIR [framework-arduinopico], specify the git URL in platform_packages in platformio.ini as follows:

platform_packages =
   framework-arduinopico@https://github.com/earlephilhower/arduino-pico.git#master

Then PIO is enforced to download it [IDK how they run the cache regarding branches...].

However, it's sometimes hard to get it right away, that's why I now run a version to accelerate the incident, it gets the issue here in less a minute:

BTW I don't have an ethernet board so couldn't run the provided.

#include <string>
#include <lwip/tcp.h>
#include <Arduino.h>
#include <WiFi.h>

#define BURST_SIZE 20
#define MSG_SIZE TCP_SND_BUF / BURST_SIZE
#define YOUR_SSID "XXXXXXXX"
#define YOUR_PWD "XXXXXXXX"

struct tcp_pcb* pcb;

IPAddress ip(192,168,100,4);
#define REMOTE_PORT     8007
bool cnx=false;
bool issueSend = false;
uint32_t freeHeap() { return rp2040.getFreeHeap(); }
uint32_t maxBlock() { return freeHeap()/2; }
template<int I,typename... Args>
void _PRINTF(const char* fmt, Args... args) {
    Serial.printf(std::string(std::string("H4AT:% 7lu: H=%u M=%u ")+fmt).c_str(),millis(),freeHeap(),maxBlock(),args...);
}

#define PRINTF(...) _PRINTF<0>(__VA_ARGS__)

struct Data {
    uint8_t* data;
    uint16_t len;
};
std::queue<struct Data> rxQueue;

uint8_t msg[MSG_SIZE];

void popQueue(std::queue<struct Data>& Q) {
  if (Q.size()) {
    auto &d = Q.front();
    free(d.data);
    Q.pop();
  }
}
void _tcp_error(void* arg, err_t err) {
    PRINTF("err=%d\n", err);
    cnx=false;
}

err_t _tcp_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) {
    PRINTF("_tcp_recv %p %p %d\n", tpcb, p, err);
    if (p){ 
        PRINTF("data %p tot_len %d\n", p->payload, p->tot_len);

        auto cpydata=static_cast<uint8_t*>(malloc(p->tot_len));
        if (cpydata) {
            // pbuf_copy_partial(p,cpydata,p->tot_len,0);
            rxQueue.push({cpydata,p->tot_len});
        }

        for (int i=0; i<10; i++) {
            auto d = static_cast<uint8_t*>(malloc(p->tot_len));
            rxQueue.push({d,p->tot_len});
            delay(10);
        }

        tcp_recved(tpcb, p->tot_len);
        pbuf_free(p);

    } else {
        PRINTF("Closed!\n");
        cnx=false;
    }
    return ERR_OK;
}
err_t _tcp_sent(void* arg, struct tcp_pcb* p, u16_t len) {
    Serial.printf("_tcp_sent %d\n", len);
    return ERR_OK;
}

err_t _tcp_connected(void* arg, void* tpcb, err_t err){
    Serial.printf("Connected to echoserver\n");
    cnx = true;
    issueSend = true;
    tcp_nagle_enable(pcb);
    tcp_recv(pcb, _tcp_recv);
    tcp_sent(pcb, _tcp_sent);
    return ERR_OK;
}

void sendData(const uint8_t* data,size_t len,bool copy){
    size_t  bytesSent=0;
    size_t  bytesRemaining=len;

    while(bytesRemaining){
        size_t available=tcp_sndbuf(pcb);
        PRINTF("H=%lu available=%d\n", rp2040.getFreeHeap(), available);
        if(available && (tcp_sndqueuelen(pcb) < TCP_SND_QUEUELEN )){ // if there is any space at all (and space in Q!), use it
            auto chunk=std::min(bytesRemaining,available); // send as much as will fit
            auto err=tcp_write(pcb,data+bytesSent,chunk,TCP_WRITE_FLAG_COPY);
            // error handling deliberately omitted
            PRINTF("(e=%d) AVAILABLE=%d SENT=%d REMAINING=%d QL=%d\n",err,available,bytesSent,bytesRemaining,tcp_sndqueuelen(pcb));
            err=tcp_output(pcb);
            if(err) PRINTF("ERROR! tcp_output returns %d\n",err);
            else {
              bytesSent+=chunk;
              bytesRemaining-=chunk;
            }
        }
    }
}

void setup(){
    Serial.begin(921600);
    WiFi.mode(WIFI_STA);
    WiFi.beginNoBlock(YOUR_SSID, YOUR_PWD);
    while (WiFi.waitForConnectResult() != WL_CONNECTED){
        Serial.println("Connection Failed! Rebooting...");
        Serial.print(".");
        delay(5000);
        rp2040.reboot();
    }
    Serial.printf("\nIP: %s\n", WiFi.localIP().toString().c_str());
    pcb=tcp_new();
    tcp_err(pcb,_tcp_error);
    Serial.printf("MSS=%d SND_BUF=%d WND=%d SND_QUEUELEN=%d\n",TCP_MSS,TCP_SND_BUF,TCP_WND,TCP_SND_QUEUELEN);

    ip_addr_t ipt;
    ip_addr_set_ip4_u32(&ipt, ip);
    err_t err = tcp_connect(pcb, &ipt, REMOTE_PORT,(tcp_connected_fn)&_tcp_connected);
}

void loop(){
    static uint32_t prevmillis = millis();
    if (millis() - prevmillis >= 500){
        issueSend = true;
        prevmillis = millis();
    }
    if (issueSend){
#define SEND_EACH_LOOP 1
#if SEND_EACH_LOOP
        static int I = 0;
        if (I<BURST_SIZE) {
            sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
            I++;
        } else {
            issueSend=false;
            I=0;
        }
#else
        for (int i = 0; i < BURST_SIZE; i++)
            sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
        issueSend = false;
#endif
    }
    if (rxQueue.size()){
        auto &d = rxQueue.front();

        /* process data on loop */
        Serial.printf("Processing Data %p %d\n", d.data, d.len);

        /* Free it */
        popQueue(rxQueue);
    }
}

Per struct in cpp, AFAIK it's technically the same to class, but differs in default accessibility (public for struct).

But anyway I've used struct Data instead of Data.

earlephilhower commented 1 week ago

The sample here ran for > 12hrs so far, so killing. Will try w/a Pico W instead, soon. If that fails then the issue is pinpointed to something related to the PicoSDK CYW43 implementation.

earlephilhower commented 1 week ago

I have been running the same example using a PicoW for 60 mins so far, no issues seen. Still dumping

....
H4AT:3276075: H=188944 M=94472 _tcp_recv 0x2000d3f0 0x2000c7e0 0
H4AT:3276076: H=188944 M=94472 data 0x2000c826 tot_len 584
H4AT:3276074: H=189000 M=94500 H=189056 available=10512
H4AT:3276077: H=188384 M=94192 (e=0) AVAILABLE=10512 SENT=0 REMAINING=584 QL=2
_tcp_sent 584
....

Qualitatively, the throughput of log messages seems lower vs. the wired test but I do not have an exact characterization there. It's running at a good rate, just that the wired connection felt like it was always writing. With WiFi it's maybe .5s between log dumps.

I'll keep it going for now, but I would suggest trying git master at your site and see if you can still repro the failure. Something in the RP2350B update/SDK 2.0 move may have unbroken a weird corner case or something...

#include <string>

#include <lwip/tcp.h>

#include <Arduino.h>
#include <WiFi.h>

#define BURST_SIZE 20
#define MSG_SIZE TCP_SND_BUF / BURST_SIZE
#define YOUR_SSID XXXX
#define YOUR_PWD YYYY

struct tcp_pcb* pcb;

IPAddress ip(192,168,1,8);
#define REMOTE_PORT   8007
bool cnx=false;
bool issueSend = false;
uint32_t freeHeap() { return rp2040.getFreeHeap(); }
uint32_t maxBlock() { return freeHeap()/2; }
template<int I,typename... Args>
void _PRINTF(const char* fmt, Args... args) {
  Serial.printf(std::string(std::string("H4AT:% 7lu: H=%u M=%u ")+fmt).c_str(),millis(),freeHeap(),maxBlock(),args...);
}

#define PRINTF(...) _PRINTF<0>(__VA_ARGS__)

typedef struct Data {
  uint8_t* data;
  uint16_t len;
} ;
std::queue<Data> rxQueue;

uint8_t msg[MSG_SIZE];

void popQueue(std::queue<struct Data>& Q) {
  if (Q.size()) {
  auto &d = Q.front();
  free(d.data);
  Q.pop();
  }
}
void _tcp_error(void* arg, err_t err) {
  PRINTF("err=%d\n", err);
  cnx=false;
}

err_t _tcp_recv(void *arg, struct tcp_pcb *tpcb, struct pbuf *p, err_t err) {
  PRINTF("_tcp_recv %p %p %d\n", tpcb, p, err);
  if (p){ 
    PRINTF("data %p tot_len %d\n", p->payload, p->tot_len);

    auto cpydata=static_cast<uint8_t*>(malloc(p->tot_len));
    if (cpydata) {
      pbuf_copy_partial(p,cpydata,p->tot_len,0);
      rxQueue.push({cpydata,p->tot_len});
    }

    tcp_recved(tpcb, p->tot_len);
    pbuf_free(p);

  } else {
    PRINTF("Closed!\n");
    cnx=false;
  }
  return ERR_OK;
}
err_t _tcp_sent(void* arg, struct tcp_pcb* p, u16_t len) {
  Serial.printf("_tcp_sent %d\n", len);
  return ERR_OK;
}

err_t _tcp_connected(void* arg, void* tpcb, err_t err){
  Serial.printf("Connected to echoserver\n");
  cnx = true;
  issueSend = true;
  tcp_nagle_enable(pcb);
  tcp_recv(pcb, _tcp_recv);
  tcp_sent(pcb, _tcp_sent);
  return ERR_OK;
}

void sendData(const uint8_t* data,size_t len,bool copy){
    size_t  bytesSent=0;
    size_t  bytesRemaining=len;

    while(bytesRemaining){
        size_t available=tcp_sndbuf(pcb);
    PRINTF("H=%lu available=%d\n", rp2040.getFreeHeap(), available);
        if(available && (tcp_sndqueuelen(pcb) < TCP_SND_QUEUELEN )){ // if there is any space at all (and space in Q!), use it
            auto chunk=std::min(bytesRemaining,available); // send as much as will fit
            auto err=tcp_write(pcb,data+bytesSent,chunk,TCP_WRITE_FLAG_COPY);
            // error handling deliberately omitted
            PRINTF("(e=%d) AVAILABLE=%d SENT=%d REMAINING=%d QL=%d\n",err,available,bytesSent,bytesRemaining,tcp_sndqueuelen(pcb));
            err=tcp_output(pcb);
            if(err) PRINTF("ERROR! tcp_output returns %d\n",err);
            else {
              bytesSent+=chunk;
              bytesRemaining-=chunk;
            }
        }
    }
}

void setup(){
  Serial.begin(921600);
  delay(5000);

  WiFi.mode(WIFI_STA);
  WiFi.beginNoBlock(YOUR_SSID, YOUR_PWD);
  while (WiFi.waitForConnectResult() != WL_CONNECTED){
    Serial.println("Connection Failed! Rebooting...");
    Serial.print(".");
    delay(5000);
    rp2040.reboot();
  }
  Serial.printf("\nIP: %s\n", WiFi.localIP().toString().c_str());
  pcb=tcp_new();
  tcp_err(pcb,_tcp_error);
  Serial.printf("MSS=%d SND_BUF=%d WND=%d SND_QUEUELEN=%d\n",TCP_MSS,TCP_SND_BUF,TCP_WND,TCP_SND_QUEUELEN);

  ip_addr_t ipt;
  ip_addr_set_ip4_u32(&ipt, ip);
  err_t err = tcp_connect(pcb, &ipt, REMOTE_PORT,(tcp_connected_fn)&_tcp_connected);
}

void loop(){
  static uint32_t prevmillis = millis();
  if (millis() - prevmillis >= 500){
    issueSend = true;
    prevmillis = millis();
  }
  if (issueSend){
#define SEND_EACH_LOOP 1
#if SEND_EACH_LOOP
    static int I = 0;
    if (I<BURST_SIZE) {
      sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
      I++;
    } else {
      issueSend=false;
      I=0;
    }
#else
    for (int i = 0; i < BURST_SIZE; i++)
      sendData(msg, MSG_SIZE, false); // 1x only has to be on main loop
    issueSend = false;
#endif
  }
  if (rxQueue.size()){
    auto d = rxQueue.front();

    /* process data on loop */
    Serial.printf("Processing Data %p %d\n", d.data, d.len);

    /* Free it */
    popQueue(rxQueue);
  }
}
HamzaHajeir commented 1 week ago

I've just seen a bug within the acceleration code regarding memory management, Therefore I need to make sure about the incident over v4.0.1 and further test on master.

I'll feedback the conclusion soon.

earlephilhower commented 1 week ago

The PicoW is at 3 hrs now and I need the USB port back so I'm going to stop it and replace it with a 2350. FWIW, no issue was seen using the latest git master. Will wait for your update...

earlephilhower commented 1 week ago

Any update w/the latest releases on this, @HamzaHajeir ?

HamzaHajeir commented 1 week ago

Hi Earle, I was just drafting this message.

Had the final conclusion: The bug disappears with the new release 4.0.1. And the incident within 4.0.1 was an issue with memory management.

HamzaHajeir commented 1 week ago

@earlephilhower

While the issue was replicated in 3.9.5, it was not replicated by any correct code in 4.0.1, including the continuous stress test modified from H4AsyncTCP library.

What's worth to add that I can confirm the upstack calls (lwip->app) are locked by the async_context (at least for 4.0.1).

Thank you for taking the issue seriously. Sometime soon I'll announce the support in Discussions.

earlephilhower commented 1 week ago

Great, glad to hear it's running. Thanks for the update!