uNetworking / uWebSockets.js

μWebSockets for Node.js back-ends :metal:
Apache License 2.0
7.86k stars 569 forks source link

BUG: uWebSockets and headers with lazy delivered ending #879

Closed webcarrot closed 1 year ago

webcarrot commented 1 year ago

This appears to be some kind of "bold timing assumptions" issue.

example.mjs

import { App } from 'uWebSockets.js';
import { Socket } from 'node:net';

const app = App();

const PAYLOAD = 'ABCDEFG';
// set to true to "FIX" issue on client side
const CORK = false;

app.any('/*', (res, req) => {
    console.log('u:Request');
    const headers = {};
    req.forEach((k, v) => {
        headers[k] = v;
    });
    let chunks = [];
    let total = 0;
    res.onAborted(() => {
        console.log('u:Aborted', { total });
        const INVALID = Buffer.concat(chunks, total);
        console.log({
            UTF8: INVALID.toString('utf-8'),
            HEX: INVALID.toString('hex'),
            headers: headers,
            chunks: chunks.map((el) => el.byteLength),
        });
    });
    const onDone = () => {
        res.end('hi');
        console.log('u:OK');
        chunks = [];
    };
    res.onData((chunk, isLast) => {
        const buffer = Buffer.from(new Uint8Array(chunk));
        chunks.push(buffer);
        const str = buffer.toString('utf-8');
        const isBroken = !isLast && str !== PAYLOAD;
        console.log(isBroken ? 'u:BrokenChunk' : 'u:Chunk', { str });
        total += chunk.byteLength;
        if (isLast) {
            onDone();
        }
    });
});

await new Promise((r) => app.listen(5900, r));

const data = Buffer.from(PAYLOAD, 'utf-8');

while (true) {
    try {
        console.log('n:request');
        await new Promise((resolve, reject) => {
            const socket = new Socket({ readable: true, writable: true, allowHalfOpen: true });
            socket.connect(5900, '127.0.0.1', () => {
                console.log('n:connect');

                socket.on('ready', () => {
                    console.log('n:ready');
                    if (CORK) socket.cork();
                    socket.write(`POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\n`, 'ascii');
                    socket.write(`transfer-encoding: chunked\r\n`, 'ascii');
                    socket.write(`\r\n${data.byteLength.toString(16)}\r\n`, 'ascii');
                    socket.write(data);
                    socket.write(`\r\n0\r\n`, 'ascii');
                    if (CORK) socket.uncork();
                });

                let dataReceived = false;
                socket.on('data', () => {
                    dataReceived = true;
                    console.log('n:data');
                    socket.end();
                });

                socket.on('error', () => {
                    console.log('n:error');
                    reject(err);
                });
                socket.on('end', () => {
                    console.log('n:end', dataReceived);
                    dataReceived ? resolve() : reject(new Error('broken'));
                });
            });
        });
    } catch (err) {
        console.log(err.message);
        break;
    }
}

Example output:

[...✂️...]
n:request
n:connect
n:ready
u:Request
u:Chunk { str: 'ABCDEFG' }
u:Chunk { str: '' }
u:OK
n:data
n:end true
n:request
n:connect
n:ready
u:Request
u:BrokenChunk { str: '7\r\nABCDEFG\r\n0\r\n' }
u:Aborted { total: 15 }
{
  UTF8: '7\r\nABCDEFG\r\n0\r\n',
  HEX: '370d0a414243444546470d0a300d0a',
  headers: {
    host: '127.0.0.1',
    connection: 'close',
    'transfer-encoding': 'chunked'
  },
  chunks: [ 15 ]
}
n:end false
broken
webcarrot commented 1 year ago

Something like this also "fix" the issue:

socket.cork();
socket.write(`POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\n`, 'ascii');
socket.write(`transfer-encoding: chunked\r\n`, 'ascii');
socket.write(`\r\n${data.byteLength.toString(16)}\r\n`, 'ascii');
socket.uncork()
socket.write(data);
socket.write(`\r\n0\r\n`, 'ascii');

Looks like uWebSockets expecting that information about first chunk length will be a part of headers payload (whatever it is).

webcarrot commented 1 year ago

For example undici send information about first chunk length after sending headers: https://github.com/nodejs/undici/blob/main/lib/client.js#L1684

Looks OK as chunks are part of the body.

webcarrot commented 1 year ago

To always reproduce:

// headers
socket.cork();
socket.write(`POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\n`, 'ascii');
socket.write(`transfer-encoding: chunked\r\n`, 'ascii');
socket.uncork();
setTimeout(() => {
  // and after some time first chunk - with headers ending...
  socket.write(`\r\n${data.byteLength.toString(16)}\r\n`, 'ascii');
  socket.write(data);
  socket.write(`\r\n0\r\n`, 'ascii');
}, 1);
webcarrot commented 1 year ago

Hmm maybe it's an undici bug.

uNetworkingAB commented 1 year ago

Interesting. Will have a look.

webcarrot commented 1 year ago

Interesting. Will have a look.

@uNetworkingAB Thanks!

I created: https://github.com/nodejs/undici/issues/1981

I have no idea which behaviour is correct (undici vs uWebSockets).

webcarrot commented 1 year ago

In fact is all about missing proper ending of headers section in content of first(?) (TCP?) packet/chunk.

In the same time I was thinking that TCP packages are not trustworthy as firewalls/proxies etc can potentially repack them in unexpected way.

uNetworkingAB commented 1 year ago

This is not an issue re. general TCP, it's most likely an issue with the chunked encoding parser specifically. The library does no such assumptions about TCP (that would be seriously broken). So consider this isolated to chunked encoding - you can try by removing transfer-encoding: chunked from your reproducer.

uNetworkingAB commented 1 year ago

I cannot reproduce this, see my test:

async function checkReportedBug() {
    const connection = await Deno.connect({
    port: 3000,
    hostname: "localhost",
    });

    {
        const request = new TextEncoder().encode(
        "POST / HTTP/1.1\r\nHost: example.com\r\nTransfer-Encoding: chunked\r\n\r\n",
        );
        const _bytesWritten = await connection.write(request);
    }
    await new Promise(resolve => setTimeout(resolve, 500));
    {
        const request = new TextEncoder().encode(
        "5\r\nHello\r\n0\r\n",
        );
        const _bytesWritten = await connection.write(request);
    }

    const buffer = new Uint8Array(256);
    await connection.read(buffer);
    connection.close();

    console.log("Responded with: " + new TextDecoder().decode(buffer));
}

This sends the body as a separate TCP segment as can be seen with strace on the server:

recvfrom(8, "POST / HTTP/1.1\r\nHost: example.c"..., 524288, 0, NULL, NULL) = 66

[...]

recvfrom(8, "5\r\nHello\r\n0\r\n", 524288, 0, NULL, NULL) = 13

This is tested with the Crc32 example in uWebSockets repo. It returns the correct CRC32 for "Hello": f7d18982

Are you sure you have the latest version? Can you try against the Crc32 example?

uNetworkingAB commented 1 year ago

To always reproduce:

// headers
socket.cork();
socket.write(`POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\n`, 'ascii');
socket.write(`transfer-encoding: chunked\r\n`, 'ascii');
socket.uncork();
setTimeout(() => {
  // and after some time first chunk - with headers ending...
  socket.write(`\r\n${data.byteLength.toString(16)}\r\n`, 'ascii');
  socket.write(data);
  socket.write(`\r\n0\r\n`, 'ascii');
}, 1);

This case is wrong; you have too much "\r\n" before the chunk

webcarrot commented 1 year ago

@uNetworkingAB

uWebSockets.js v20.19.0

IMHO request is OK.

In this 👇🏻 example I split HTTP request into 2 TCP packages.

From my observation it looks like uWebSockets start to have troubles when:

import { App } from 'uWebSockets.js';
import { Socket } from 'node:net';

const app = App();

app.any('/*', (res, req) => {
    const headers = {};
    req.forEach((k, v) => {
        headers[k] = v;
    });
    const chunks = [];
    let total = 0;
    res.onAborted(() => {
        console.log('u:Aborted', {
            data: Buffer.concat(chunks, total).toString('ascii'),
            headers: headers,
            chunks: chunks.map((el) => el.byteLength),
        });
    });
    const onDone = () => {
        res.end('hi');
    };
    res.onData((chunk, isLast) => {
        chunks.push(new Uint8Array(new Uint8Array(chunk)));
        total += chunk.byteLength;
        if (isLast) onDone();
    });
});

await new Promise((r) => app.listen(5900, r));

const REQUEST = Buffer.from(
    `POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\ntransfer-encoding: chunked\r\n\r\n7\r\nABCDEFG\r\n0\r\n`,
    'ascii',
);

/** @description This one works fine */
// const REQUEST = Buffer.from(
//  `POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\ncontent-length: 7\r\n\r\nABCDEFG`,
//  'ascii',
// );

async function makeRequest(lastChunkOffset) {
    const firstChunk = REQUEST.subarray(0, lastChunkOffset);
    const lastChunk = REQUEST.subarray(lastChunkOffset);
    try {
        await new Promise((resolve, reject) => {
            const socket = new Socket({ readable: true, writable: true, allowHalfOpen: true });
            socket.connect(5900, '127.0.0.1', () => {
                socket.on('ready', () => {
                    socket.write(firstChunk);
                    if (lastChunk.byteLength) {
                        setTimeout(() => {
                            socket.write(lastChunk);
                        }, 1);
                    }
                });

                let dataReceived = false;
                socket.on('data', () => {
                    dataReceived = true;
                    socket.end();
                });

                socket.on('error', (err) => {
                    reject(err);
                });
                socket.on('end', () => {
                    dataReceived ? resolve() : reject(new Error('broken'));
                });
            });
        });
        console.info('ok', lastChunkOffset);
    } catch (err) {
        console.error('Broken for', {
            lastChunkOffset,
            firstChunk: firstChunk.toString('ascii'),
            lastChunk: lastChunk.toString('ascii'),
        });
    }
}

for (let lastChunkOffset = REQUEST.byteLength; lastChunkOffset > 0; lastChunkOffset--) {
    await makeRequest(lastChunkOffset);
}

process.exit(0);
uNetworkingAB commented 1 year ago

Okay can you make this reproduce against the Crc32 example posted? Then I can include it in CI testing (there's already similar tests in CI that can be extended). You just clone the main repo uWebSockets and hit "make" and you will have Crc32 binary to run - it takes HTTP data and sends back the crc32 of it, so it will be more strict than your "hi" test and I can check it without setting up uWebSockets.js (I don't have that set up right now)

uNetworkingAB commented 1 year ago

https://github.com/uNetworking/uWebSockets/blob/master/examples/Crc32.cpp

webcarrot commented 1 year ago

@uNetworkingAB Results are basically the same.

As a c++ script kiddie i modified: https://github.com/uNetworking/uWebSockets/blob/master/src/HttpParser.h#L474-L487 to (comment out CONSUME_MINIMALLY check):

//                if (!CONSUME_MINIMALLY) {
                    /* Go ahead and parse it (todo: better heuristics for emitting FIN to the app level) */
                    std::string_view dataToConsume(data, length);
                    for (auto chunk : uWS::ChunkIterator(&dataToConsume, &remainingStreamingBytes)) {
                        dataHandler(user, chunk, chunk.length() == 0);
                    }
                    if (isParsingInvalidChunkedEncoding(remainingStreamingBytes)) {
                        return {0, FULLPTR};
                    }
                    unsigned int consumed = (length - (unsigned int) dataToConsume.length());
                    data = (char *) dataToConsume.data();
                    length = (unsigned int) dataToConsume.length();
                    consumedTotal += consumed;
 //               }

... as it was a "difference" between working/not-working flow and this change "fixed" uWebSockets. Obviously it's not a solution (but tests are passing 😉 ).

Another bad option(?):

                if (!CONSUME_MINIMALLY || length > 0) {
webcarrot commented 1 year ago

Updated test script:

import assert from 'node:assert';
import { Socket } from 'node:net';

const REQUEST = Buffer.from(
    `POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\ntransfer-encoding: chunked\r\n\r\n7\r\nABCDEFG\r\n0\r\n`,
    'ascii',
);

/** @description This one works fine */
// const REQUEST = Buffer.from(
//  `POST / HTTP/1.1\r\nhost: 127.0.0.1\r\nconnection: close\r\ncontent-length: 7\r\n\r\nABCDEFG`,
//  'ascii',
// );

async function makeRequest(lastChunkOffset) {
    const firstChunk = REQUEST.subarray(0, lastChunkOffset);
    const lastChunk = REQUEST.subarray(lastChunkOffset);
    const time = Date.now();
    let output = '';
    try {
        output = await new Promise((resolve, reject) => {
            const socket = new Socket({ readable: true, writable: true, allowHalfOpen: true });
            socket.connect(3000, '127.0.0.1', () => {
                socket.on('ready', () => {
                    socket.write(firstChunk);
                    if (lastChunk.byteLength) {
                        setTimeout(() => {
                            socket.write(lastChunk);
                        }, 1);
                    }
                });
                const chunks = [];
                socket.on('data', (data) => {
                    chunks.push(data);
                    socket.end();
                });

                socket.on('error', (err) => {
                    reject(err);
                });
                socket.on('end', () => {
                    chunks.length
                        ? resolve(Buffer.concat(chunks).toString('ascii'))
                        : reject(new Error('No response'));
                });
            });
        });
        // check output - just in case - "normally" uWebSockets will abort and destroy connection so this should always pass
        assert(output.endsWith('Content-Length: 8\r\n\r\ne6f94bc\n'), 'Invalid output');
        console.log('Success', {
            timeMs: Date.now() - time,
            lastChunkOffset,
            output,
            firstChunk: firstChunk.toString('ascii'),
            lastChunk: lastChunk.toString('ascii'),
        });
    } catch (err) {
        console.log(err.message, {
            timeMs: Date.now() - time,
            lastChunkOffset,
            output,
            firstChunk: firstChunk.toString('ascii'),
            lastChunk: lastChunk.toString('ascii'),
        });
    }
}

for (let lastChunkOffset = REQUEST.byteLength; lastChunkOffset > 0; lastChunkOffset--) {
    await makeRequest(lastChunkOffset);
}

process.exit(0);
webcarrot commented 1 year ago

@uNetworkingAB OK now i know (🤣):

https://github.com/uNetworking/uWebSockets/blob/master/src/HttpParser.h#L538 vs https://github.com/uNetworking/uWebSockets/blob/master/src/HttpParser.h#L583

BUT: https://github.com/uNetworking/uWebSockets/blob/master/src/HttpParser.h#L527

webcarrot commented 1 year ago

@uNetworkingAB 👉🏻 https://github.com/uNetworking/uWebSockets/pull/1585

webcarrot commented 1 year ago

@uNetworkingAB What about uWebSockets.js release? 🙏🏻

webcarrot commented 1 year ago

Thanks! 🎉