Closed BillyTheFrog closed 1 year ago
The LL APIs are not thread safe. I would avoid using multi threading when calling these APIs. Consider using the non LL APIs are these are thread safe and implement locking in the SDK.
Also, for some reason you are calling IoTHubDeviceClient_LL_DoWork in multiple locations. This is very unusual and should not be required.
The expected pseudo implementation for LL apis is as follows:
main()
{
while()
{
run_application code();
send_any_iot_message();
IoTHubDeviceClient_LL_DoWork();
sleep(100ms);
}
}
Samples are located here: https://github.com/Azure/azure-iot-sdk-c/tree/main/iothub_client/samples
I read about these advices, either in the documentation or the iothub's examples.
To address the first point made, my use of thread is ONLY regarding signal handling, thus does not have any related impact to anything happening in the iothub's C sdk (no functions from the SDK will ever be called from that thread, nor shared resources will be accessed).
Regarding the several calls of the DoWork function, it is in a healthy way only called once per loop, as advised in your pseudo implementation. However in cases of errors before returning I might trigger forced calls in order to attempt sending all messages.
This code can be broken down to your pseudo implementation and then the problem subsists for me.
Flooding messages seems to -after some time- trigger an ill heap-use-after-free.
Any news ?
As stated above, this is probably a reentry issue calling non-locking APIs from another thread. If possible, please provide a C99 non-multithreaded sample that repos this issue.
As always, we welcome external contributions to this open source repo!
#if !defined(unix) || !defined(__unix) || !defined(__unix__)
#warning Please compile under any unix-like OS ; e.g. Linux
#endif
#include <sys/time.h>
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include "iothub_device_client_ll.h"
#include "iothub_client_options.h"
#include "iothubtransportmqtt.h"
#include "iothub.h"
#define CONNSTR "[ YOUR CONNECTION STRING GOES HERE ]"
typedef struct {
char const *_connStr;
char *_msg;
uint16_t _count;
IOTHUB_DEVICE_CLIENT_LL_HANDLE _deviceHandle;
} IoTHubLink;
typedef struct {
IoTHubLink *_link;
char *_msg;
} MessageCbData;
static char *GetTimeStamp(void) {
char *const ts = strdup("YYYY-mm-ddTHH:MM:SS.uuuZ__"), *str;
if (!ts)
return (NULL);
struct timeval tv;
struct tm *local;
time_t tm = time(NULL);
local = localtime(&tm);
if (!strftime(ts, 26, "%Y-%m-%dT%H:%M:%S.", local) || gettimeofday(&tv, NULL) || 0 >= sprintf(ts + 20, "%3.3d", tv.tv_usec)) {
24[ts] = 0;
return (ts);
}
23[ts] = 'Z';
24[ts] = 0;
return (ts);
}
static void IoTHubConnStatusCb(IOTHUB_CLIENT_CONNECTION_STATUS const status, IOTHUB_CLIENT_CONNECTION_STATUS_REASON const reason, void *vLink) {
IoTHubLink *link = vLink;
char const *reasonStr = NULL;
uint8_t ok = 0;
switch (reason) {
case IOTHUB_CLIENT_CONNECTION_EXPIRED_SAS_TOKEN:
reasonStr = "SAS token expired";
break;
case IOTHUB_CLIENT_CONNECTION_DEVICE_DISABLED:
reasonStr = "device disabled";
break;
case IOTHUB_CLIENT_CONNECTION_BAD_CREDENTIAL:
reasonStr = "bad credential";
break;
case IOTHUB_CLIENT_CONNECTION_RETRY_EXPIRED:
reasonStr = "retry expired";
break;
case IOTHUB_CLIENT_CONNECTION_NO_NETWORK:
reasonStr = "no network";
break;
case IOTHUB_CLIENT_CONNECTION_COMMUNICATION_ERROR:
reasonStr = "communication error";
break;
case IOTHUB_CLIENT_CONNECTION_OK:
reasonStr = "everything is fine";
ok = 1;
break;
case IOTHUB_CLIENT_CONNECTION_NO_PING_RESPONSE:
reasonStr = "no answer to ping";
break;
case IOTHUB_CLIENT_CONNECTION_QUOTA_EXCEEDED:
reasonStr = "quota exceeded";
break;
default:
abort();
}
switch (status) {
case IOTHUB_CLIENT_CONNECTION_AUTHENTICATED:
if (ok)
printf("IoTHub client authenticated successfully (%s)\n", reasonStr);
else
fprintf(stderr, "IoTHub client authenticated successfully but status does not match (%s)\n", reasonStr);
break;
case IOTHUB_CLIENT_CONNECTION_UNAUTHENTICATED:
fprintf(stderr, "IoTHub client disconnected (%s)\n", reasonStr);
break;
default:
abort();
}
}
static void IoTHubMsgStatusCb(IOTHUB_CLIENT_CONFIRMATION_RESULT const status, void *vData) {
MessageCbData *data = (MessageCbData *)vData;
switch (status) {
case IOTHUB_CLIENT_CONFIRMATION_OK:
printf("Sent message \"%s\" (everything is fine)\n", data->_msg);
break;
case IOTHUB_CLIENT_CONFIRMATION_BECAUSE_DESTROY:
fprintf(stderr, "Discarded message \"%s\" (destroyed)\n", data->_msg);
break;
case IOTHUB_CLIENT_CONFIRMATION_MESSAGE_TIMEOUT:
fprintf(stderr, "Discarded message \"%s\" (timed out)\n", data->_msg);
break;
case IOTHUB_CLIENT_CONFIRMATION_ERROR:
fprintf(stderr, "Discarded message \"%s\" (error)\n", data->_msg);
break;
default:
abort();
}
free(data->_msg);
free(data);
}
static int Send(IoTHubLink *link) {
IOTHUB_MESSAGE_HANDLE messageHandle;
char *index, *ts, *msg = strdup(link->_msg);;
MessageCbData *cbData;
if (!msg)
return (1);
if (link->_count && (!(index = strstr(msg, ":")) || 0 >= sprintf(index + 1, "%d}", link->_count++))) {
free(msg);
return (1);
}
if (!(messageHandle = IoTHubMessage_CreateFromString(msg))) {
free(msg);
return (1);
}
if (IOTHUB_CLIENT_OK != IoTHubMessage_SetContentTypeSystemProperty(messageHandle, "application%2fjson") || IOTHUB_CLIENT_OK != IoTHubMessage_SetContentEncodingSystemProperty(messageHandle, "utf-8")) {
IoTHubMessage_Destroy(messageHandle);
free(msg);
return (1);
}
ts = GetTimeStamp();
if (IOTHUB_CLIENT_OK != IoTHubMessage_SetMessageCreationTimeUtcSystemProperty(messageHandle, ts ? ts : "YYYY-mm-ddTHH:MM:SS.uuuZ") || !(cbData = malloc(sizeof(MessageCbData)))) {
IoTHubMessage_Destroy(messageHandle);
free(ts);
free(msg);
return (1);
}
cbData->_link = link;
cbData->_msg = msg;
if (IOTHUB_CLIENT_OK != IoTHubDeviceClient_LL_SendEventAsync(link->_deviceHandle, messageHandle, &IoTHubMsgStatusCb, cbData)) {
IoTHubMessage_Destroy(messageHandle);
free(ts);
free(msg);
return (1);
}
IoTHubMessage_Destroy(messageHandle);
free(ts);
return (0);
}
static int SendLoop(IoTHubLink *link, long int const periodicSending) {
int ret = 0;
uint32_t count = 0;
while (!ret) {
ret = Send(link);
if (!periodicSending || !(count++ % periodicSending))
IoTHubDeviceClient_LL_DoWork(link->_deviceHandle);
}
return (ret);
}
static IoTHubLink *IoTHubLinkConstructStrings(char const *const key, char const *const value, IoTHubLink *const link) {
link->_connStr = CONNSTR;
if (!(link->_msg = malloc(strlen(key) + strlen(value) + 8))) {
free(link);
IoTHub_Deinit();
return (NULL);
}
strcat(strcat(strcat(strcat(strcpy(link->_msg, "{\""), key), "\":\""), value), "\"}");
link->_count = strcmp(value, "[$count]") ? 0 : 1;
return (link);
}
static IoTHubLink *IoTHubLinkConstructIoTClient(IoTHubLink *const link) {
bool urlEncode = true;
if (!(link->_deviceHandle = IoTHubDeviceClient_LL_CreateFromConnectionString(link->_connStr, MQTT_Protocol))) {
free(link->_msg);
free(link);
IoTHub_Deinit();
return (NULL);
}
if (IOTHUB_CLIENT_OK != IoTHubDeviceClient_LL_SetOption(link->_deviceHandle, OPTION_AUTO_URL_ENCODE_DECODE, &urlEncode)) {
IoTHubDeviceClient_LL_Destroy(link->_deviceHandle);
free(link->_msg);
free(link);
IoTHub_Deinit();
return (NULL);
}
if (IOTHUB_CLIENT_OK != IoTHubDeviceClient_LL_SetConnectionStatusCallback(link->_deviceHandle, &IoTHubConnStatusCb, link)) {
IoTHubDeviceClient_LL_Destroy(link->_deviceHandle);
free(link->_msg);
free(link);
IoTHub_Deinit();
return (NULL);
}
return (link);
}
static IoTHubLink *IoTHubLinkConstruct(char const *const key, char const *const value) {
IoTHubLink *link;
if (IoTHub_Init())
return (NULL);
if (!(link = malloc(sizeof(IoTHubLink)))) {
IoTHub_Deinit();
return (NULL);
}
if (!IoTHubLinkConstructStrings(key, value, link) || !IoTHubLinkConstructIoTClient(link))
return (NULL);
return (link);
}
static void IoTHubLinkDestruct(IoTHubLink *link) {
IoTHubDeviceClient_LL_Destroy(link->_deviceHandle);
free(link->_msg);
free(link);
IoTHub_Deinit();
}
int main(int const ac, char const *const *const av) {
int ret = 0;
IoTHubLink *link = IoTHubLinkConstruct((2 < ac ? 2[av] : "Alive"), 3 < ac ? 3[av] : "[$count]");
if (!link)
return (1);
ret = SendLoop(link, atol(1 < ac ? 1[av] : "0"));
IoTHubLinkDestruct(link);
return (ret);
}
The above example is C99, non multithreaded, only calls do_work in one single case, and on my side on my machines reproduces the issue under 10' average (sometimes requires a few tries).
Logs uploaded :
OPTION_LOG_TRACE enabled on the LL API
Crash1 : compiled in debug Crash 2 : compiled in debug with an address sanitizer Crash 3 : compiled in debug with a thread sanitizer
Hi @BillyTheFrog , After looking into your (latest) sample I can tell you that the way you are using the Azure IoT SDK C is not the expected design for a production environment. Three things stand out:
I have modified one of our samples to have both functions above run in the same loop with no delay, and I'm running it under valgrind to verify if there are any crashes. I'll share details if we get any repro.
The variable "periodicSending" can modulate the sending frequency ; every X message sent IoTHubDeviceClient_LL_DoWork will be called. From my comment in this issue
To call 'IoTHubDeviceClient_LL_DoWork' less frequently, an argument can be given (arg. #1 of the compiled binary) to call the function every X messages passed to the SDK (putting 5 as first argument will pass 5 messages to the SDK before calling the function). By default the function is called after every message passed.
See 1.
I do not care, as I do not expect this to work, this test is testing the robustness of a C code that shouldn't crash (on what seems to be a freed node in a linked list) and it should only crash if I am doing something wrong with the library flow
"I have modified one of our samples to have both functions above run in the same loop with no delay" why not using my sample ? Does it have anything that seems bad in it ? In this case my sample could be faulty. But if everything seems compliant with both the library and ISO 9899 TC3, then I guess it's worth investigating the sample I provided.
Development Machine, OS, Compiler (and Other Relevant Toolchain Info) Bug reproduced on arch linux, debian, ubuntu, yocto, under both x86 and arm architectures.
SDK Version (Please Give Commit SHA if Manually Compiling) LTS_07_2022_Ref02
Protocol MQTT
Describe the Bug After a while of intensive sending (count between 1 and 2 mins), a crash occurs. It appears to be a double linked list issue, accessing a recently freed area. The crash tracks back to IoTHubDeviceClient_LL_DoWork. I also saw some errors in a thread sanitizer. To monitor this issue I used valgrind and google's sanitizers (address & thread). See attached logs below for details.
Here is a minimal example that managed to reproduce the issue for me. I checked the documentation and hopefully I didn't misuse the SDK.
Please replace '[ YOUR CONNECTION STRING GOES HERE ]' by your own connection string. Once compiled, running the program should reproduce the issue.
To call 'IoTHubDeviceClient_LL_DoWork' less frequently, an argument can be given (arg. #1 of the compiled binary) to call the function every X messages passed to the SDK (putting 5 as first argument will pass 5 messages to the SDK before calling the function). By default the function is called after every message passed.
The second and third arguments of the binary are also optional, they allow you to specify the key/value pair in the message sent. By default beeing "Alive" for the key (arg. #2) and "[$count]" for the value (arg. #3) the message will contain an increasing value (to keep track, as a message ID).
Console Logs Thread sanitizer log:
Memory sanitizer log: