awslabs / aws-crt-nodejs

NodeJS bindings for the AWS Common Runtime.
Apache License 2.0
38 stars 24 forks source link

Uncatchable MQTT error thrown #403

Closed samuelplexcelon closed 1 year ago

samuelplexcelon commented 1 year ago

While using the MQTT client, every once in a while the server crashes with an error I have not been able to catch in JavaScript, but seems to be thrown in the C lib (aws-crt v1.15.5).

I have not been able to replicate this on purpose, but it seemingly happens some hours after the server starts up.

The server is an EC2 instance running Amazon Linux 2.

I'm not sure if I'm doing something wrong here, or if this is an issue other have come across.

I've made a basic wrapper class for the clients (a stripped down version of the client in aws-iot-device-sdk-js-v2)

class ShadowClient {

    /* ... */

    subscribe(serialNumber, callback, qos = this.#QoS) {
        try {
            return this.#subClient.subscribe(
                `$aws/things/${serialNumber}/shadow/update/accepted`,
                qos,
                (topic, message, dup, qos, retain)=> {
                    message = JSON.parse(textDecoder.decode(message));
                    callback(message, topic);
                }
            );
        } catch(error) {
            console.log('Could not set up AwsShadowClient subscribe.', error);
            throw error;
        }
    }

    publish(serialNumber, payload, qos = this.#QoS) {
        try {
            return this.#pubClient.publish(
                `$aws/things/${serialNumber}/shadow/update`,
                payload,
                qos
            );
        } catch(error) {
            console.log('Could not publish AwsShadowClient', error);
            throw error;
        }
    }

    unsubscribe(serialNumber) {
        try {
            return this.#subClient.unsubscribe(`$aws/things/${serialNumber}/shadow/update`);
        }
        catch (error) {
            console.log('Could not unsubscribe AwsShadowClient', error);
            throw error;
        }
    }
}

Which is called in websocket-server.js

webSocketServer.on('connection', async (centralConnectionSocket) => {
    const UUID = uuid();
    let logTag = `[SN: ${UUID}]`;
    let deviceName = '';

/* ... Set deviceName among other things */

    centralConnectionSocket.on('close', (code, reason = '[empty]') => {
        console.log(logTag, `Disconnected from central server. Stopping active script.\nDisconnect info:\nCode: ${code}\nReason: '${reason}'`);

        stopEverything()
        .catch(() => {
            console.log(logTag, 'Failed to stop everything');
        })
        .finally(async ()=>{
            try {
                try {
                    // WARNING: We need to make sure the instrument closes the tunnel. Otherwise, everything breaks
                    const shadowClient = await AwsShadowClient.factory();

                    // Unsub to clear the previous disconnect and error handlers
                    await shadowClient.unsubscribe(deviceName);

                    await shadowClient.publish(deviceName, {
                        state: {
                            desired: {
                                uuid: UUID, myMsg: 'some msg'
                            }
                        }
                    });
                }
                catch (error) {
                        console.log(logTag, 'Failed in MQTT workflow. Forcing port release anyway.\n', error.message);
                    }
                    finally {
                        releasePort();
                    }
                }
                finally {
                    // ... Do some more clean up
                    }
                }
            });
        });
}

And this is what is written to the log file:

[SN: 0000-0000] Failed to stop everything
Signal received: 1742688112, errno: 32561
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
Signal received: 0, errno: 0
################################################################################
Stack trace:
################################################################################
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(+0x6e903) [0x7f31741df903]
/lib64/libpthread.so.0(+0x118e0) [0x7f31781578e0]
/lib64/libgcc_s.so.1(+0xe407) [0x7f3178372407]
/lib64/libgcc_s.so.1(_Unwind_Backtrace+0x62) [0x7f3178373e52]
/lib64/libc.so.6(backtrace+0x78) [0x7f3177e95728]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace+0x1b) [0x7f31743ec6db]
/var/app/current/node_modules/aws-crt/dist/bin/linux-x64/aws-crt-nodejs.node(aws_backtrace_print+0x41) [0x7f31743ec931]
bretambrose commented 1 year ago

It's hard to tell anything from that output unfortunately. You could try enabling CRT logging (https://github.com/awslabs/aws-crt-nodejs/blob/main/lib/native/io.ts#L86) and/or you could make a debug build of the CRT to get a better stack trace (install from source and override debug to be true: https://github.com/awslabs/aws-crt-nodejs/blob/main/scripts/build.js#L117). Feel free to attach the results to this thread in case they provide better information.

samuelplexcelon commented 1 year ago

I appreciate the feedback! I'll enable logs and debug and see what I come up with.

bretambrose commented 1 year ago

Also, for what it's worth, we have a low-level shadow client here: https://github.com/aws/aws-iot-device-sdk-js-v2/blob/main/lib/iotshadow/iotshadowclient.ts

You also might see better results with the mqtt5 client (https://www.github.com/awslabs/aws-crt-nodejs/blob/main/MQTT5-UserGuide.md) but I don't want to send you on a wild goose chase and even if you do, we still need to fix the problem in the 311 client.

samuelplexcelon commented 1 year ago

I've come across a seemingly separate bug(?) that I'll open another issue for. I haven't been able to replicate this.