Azure / azure-iot-sdk-node

A Node.js SDK for connecting devices to Microsoft Azure IoT services
https://docs.microsoft.com/en-us/azure/iot-hub/
Other
262 stars 227 forks source link

Device client fails on disconnect using AMQP/AMQP-WS (using NoRetry policy) #221

Closed wmmihaa closed 6 years ago

wmmihaa commented 6 years ago

Context

Description of the issue:

Device client fails on disconnect using AMQP/AMQP-WS

Code sample exhibiting the issue:

'use strict';

require('colors');
var DeviceClient = require('azure-iot-device').Client;
var Message = require('azure-iot-common').Message;;
var DeviceProtocol;
var connectionString = 'XXXXX';
var messageCount = 0;
var messageSentCount = 0;
if (process.argv.length !== 3) {
    console.log("Usage: node app [-amqp | -amqpws | -mqtt | -mqttws]".yellow);
    return;
}

console.log();

switch (process.argv[2].toLowerCase()) {
    case "-amqp":
    case "amqp":
        console.log('Using AMQP'.green);
        DeviceProtocol = require('azure-iot-device-amqp').Amqp;
        break;
    case "-amqpws":
    case "amqpws":
        console.log('Using AMQP-WS'.green);
        DeviceProtocol = require('azure-iot-device-amqp').AmqpWs;
        break;
    case "-mqtt":
    case "mqtt":
        console.log('Using MQTT'.green);
        DeviceProtocol = require('azure-iot-device-mqtt').Mqtt;
        break;
    case "-mqttws":
    case "mqttws":
        console.log('Using MQTT-WS'.green);
        DeviceProtocol = require('azure-iot-device-mqtt').MqttWs;
        break;
    default:
        console.log('Unsupported protocol'.red);
        console.log("Usage: node app [-amqp | -amqpws | -mqtt | -mqttws]".yellow);
        break;

}

var deviceClient = DeviceClient.fromConnectionString(connectionString, DeviceProtocol);
// Disable retry policy
let NoRetry = require('azure-iot-common').NoRetry;
deviceClient.setRetryPolicy(new NoRetry());

deviceClient.open(function (err, transport) {
    if (err) {
        console.log('ERROR: '.red + err.message.red);
        process.abort();
    }
    else {
        console.log('AZURE IoT: Connected'.green);
        deviceClient.on('disconnect', function () {
            console.log('AZURE IoT: Disconnected'.bgGreen.white);
        });
        deviceClient.on('error', function (err) {
            console.log('AZURE IoT: Error: '.red + err.message.red);
        });
        deviceClient.getTwin(function (err, twin) {
            if (err) {
                console.log('AZURE IoT: Could not get twin: '.red + err.message.red);
            }
            else {
                console.log("AZURE IoT: Device twin is ready".green);
                twin.on('properties.desired', function (desiredChange) {
                    console.log("AZURE IoT: Received new state".green);
                    console.log("");
                    sendEvents();
                });
            }
        });
    }
});

function sendEvents() {

    setInterval(function () {
        messageCount++;
        var message = new Message(JSON.stringify({ dt: new Date(), messageCount: messageCount }));
        deviceClient.sendEvent(message, function (err) {
            messageSentCount ++ ;
            if (err) {
                let m = 'Unable to send message to to Azure IoT Hub  ' + messageSentCount + '/(' + messageCount + ')';
                console.log(m.red);
            }
            else {
                let m = 'Event has been sent to Azure IoT Hub  ' + messageSentCount + '/(' + messageCount + ')';
                console.log(m.green);
            }

        });
    }, 5000);

}

Reproduce issue

  1. Open a console (don't rurn in VS Code)
  2. RUN: npm i azure-iot-common@1.3.0 azure-iot-device@1.3.0 azure-iot-device-amqp@1.3.0 azure-iot-device-mqtt@1.3.0 colors@1.1.2
  3. Type:
    npm app amqp
  4. Disable wireless network adapter

Console log of the issue:

λ node app.js amqpws

Using AMQP-WS
AZURE IoT: Connected
AZURE IoT: Device twin is ready
AZURE IoT: Received new state

Event has been sent to Azure IoT Hub  1/(1)
Unable to send message to to Azure IoT Hub  2/(5)
Unable to send message to to Azure IoT Hub  3/(5)
Unable to send message to to Azure IoT Hub  4/(5)
Unable to send message to to Azure IoT Hub  5/(5)
AZURE IoT: Disconnected
C:\temp\deviceConnectivityTest\node_modules\azure-iot-amqp-base\lib\amqp.js:109
                        callback();
                        ^

TypeError: callback is not a function
    at constructor.amqpError (C:\temp\deviceConnectivityTest\node_modules\azure-iot-amqp-base\lib\amqp.js:109:25)
    at constructor.handle (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:613:25)
    at constructor.Fsm.(anonymous function) [as handle] (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:466:63)
    at constructor.<anonymous> (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:702:17)
    at C:\temp\deviceConnectivityTest\node_modules\machina\node_modules\lodash\index.js:2874:23
    at C:\temp\deviceConnectivityTest\node_modules\machina\node_modules\lodash\index.js:3049:15
    at Function.<anonymous> (C:\temp\deviceConnectivityTest\node_modules\machina\node_modules\lodash\index.js:3346:13)
    at constructor.processQueue (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:701:6)
    at constructor.Fsm.(anonymous function) [as processQueue] (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:466:63)
    at constructor.transition (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:661:12)
    at constructor.Fsm.(anonymous function) [as transition] (C:\temp\deviceConnectivityTest\node_modules\machina\lib\machina.js:466:63)
    at C:\temp\deviceConnectivityTest\node_modules\azure-iot-amqp-base\lib\amqp.js:366:48
    at disconnect (C:\temp\deviceConnectivityTest\node_modules\azure-iot-amqp-base\lib\amqp.js:319:33)
    at C:\temp\deviceConnectivityTest\node_modules\azure-iot-amqp-base\lib\amqp.js:363:33
    at C:\temp\deviceConnectivityTest\node_modules\async\dist\async.js:473:16
    at iteratorCallback (C:\temp\deviceConnectivityTest\node_modules\async\dist\async.js:1050:13)
pierreca commented 6 years ago

@wmmihaa Looks like 2 issues here:

  1. we're calling a callback that doesn't exist or isn't a function - that's a simple code mistake
  2. we end up in a state where we shouldn't be getting errors, so there might be a deeper issue here.

In the interest of solving 2. I'd like to understand the setup that exposed the problem a little better:

[edit] It looks like what happened is that an error is emitted while the client is trying to disconnect. activating the debug traces (by setting the DEBUG environment variable to azure* or * to be super verbose) might help (please scrub the logs for sensitive information before posting them here)

pierreca commented 6 years ago

@wmmihaa I submitted #221 to prevent the exception you're seeing but I'm still worried about the second problem I describe in my first comment. If you come up with more information please let us know.

wmmihaa commented 6 years ago

Did you want me to with DEBUG setting and send you the log?

Let me know if there is anything I can do...

pierreca commented 6 years ago

@wmmihaa Yeah that'd be great - I'm not sure what error the amqp10 client emitted while we were disconnecting but it's suspicious: if we're only detaching/disconnecting, and we have callbacks on the detach/disconnect, why is it emitting errors? that's what i'd like to find out

wmmihaa commented 6 years ago

Here you go... BTW... were you not able to reproduce?

221.log

pierreca commented 6 years ago

@wmmihaa In order to repro i'd like to understand how you "disable the network adapter" on which OS - not clear to me if you're using WSL and Ubuntu on Windows, or Ubuntu and windows separately and both show the problem, and how you disable the network adapter (right click? ifup/down?)

pierreca commented 6 years ago

@wmmihaa I'm going to go out on a limb and ask - does the same problem happens when you're not using the twin (ie not getting or updating it in any way, so that these links stay detached)?

wmmihaa commented 6 years ago

@pierreca I am using WSL and Ubuntu on Windows + Windows 10, and They are not showing the same exception, although they both fail: WSL:

Using AMQP
AZURE IoT: Connected
AZURE IoT: Device twin is ready
AZURE IoT: Received new state

Event has been sent to Azure IoT Hub  1/(1)
/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/frames.js:64
  stream.write(buffer, callback);
        ^

TypeError: Cannot read property 'write' of null
    at Object.frames.writeFrame (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/frames.js:64:9)
    at Connection.sendFrame (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/connection.js:329:10)
    at SenderLink.Link._sendDetach (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/link.js:228:27)
    at Object.sendDetach (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/link.js:34:14)
    at Object.event [as sendDetach] (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/stately.js/Stately.js:203:67)
    at SenderLink.Link.detach (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/amqp10/lib/link.js:129:11)
    at constructor._onEnter (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/azure-iot-amqp-base/lib/sender_link.js:204:47)
    at constructor.transition (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/machina/lib/machina.js:653:28)
    at constructor.Fsm.(anonymous function) [as transition] (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/machina/lib/machina.js:466:63)
    at constructor.detach (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/azure-iot-amqp-base/lib/sender_link.js:156:69)
    at constructor.handle (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/machina/lib/machina.js:613:25)
    at constructor.Fsm.(anonymous function) [as handle] (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/machina/lib/machina.js:466:63)
    at SenderLink.detach (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/azure-iot-amqp-base/lib/sender_link.js:225:19)
    at /mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/azure-iot-device-amqp/lib/amqp.js:417:50
    at /mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/async/dist/async.js:3866:24
    at replenish (/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest/node_modules/async/dist/async.js:998:17)
wmmihaa@DESKTOP-6PS46DD:/mnt/c/temp/deviceConnectivityTest/deviceConnectivityTest$

And also, I'm just toggle of the WIFI. I can't run the script on the device without installing screen or a startup script (which I can't do on the ones at work), but if you find it important I can do that later today (CET) when I get home.

wmmihaa commented 6 years ago

I'm going to go out on a limb and ask - does the same problem happens when you're not using the twin (ie not getting or updating it in any way, so that these links stay detached)?

@pierreca No the issue seams related to the twin. If I don't enable twin state, it all works:

λ node app.js amqp

Using AMQP
AZURE IoT: Connected
Event has been sent to Azure IoT Hub  1/(1)
Event has been sent to Azure IoT Hub  2/(2) // ==> Network disabled
Unable to send message to to Azure IoT Hub  3/(6)
Unable to send message to to Azure IoT Hub  4/(6)
Unable to send message to to Azure IoT Hub  5/(6)
Unable to send message to to Azure IoT Hub  6/(6)
AZURE IoT: Disconnected
Unable to send message to to Azure IoT Hub  7/(7)
Unable to send message to to Azure IoT Hub  8/(8)
Unable to send message to to Azure IoT Hub  9/(9)
Unable to send message to to Azure IoT Hub  10/(10)
Event has been sent to Azure IoT Hub  11/(11) // ==> Network enabled
Event has been sent to Azure IoT Hub  12/(12)

BTW... this works even better as I don't loose any messages, compared to MQTT where I lost a few messages. - Still not an option though, as I need the state :(

pierreca commented 6 years ago

@wmmihaa FYI we released a new version of the packages that include a fix for the undefined callback (still smelling like there's an issue in the twin code though) https://github.com/Azure/azure-iot-sdk-node/releases/tag/2018-2-7

wmmihaa commented 6 years ago

image

I ran the same scenario again, with stage enabled. No unhandled exception thrown and managed events triggered. However... the device does not seem to receive state updates after re-connecting.

pierreca commented 6 years ago

@wmmihaa yes the Twin feature has recovery issues that we plan on fixing within the next couple of weeks. [edit] see my answer on #206

wmmihaa commented 6 years ago

Looking forward to it... ;)