getdnsapi / getdns

A modern asynchronous DNS API https://getdnsapi.net/
Other
468 stars 126 forks source link

Crash with filedescriptor > 1024 #527

Open clauderobi opened 2 years ago

clauderobi commented 2 years ago

Hi,

I am using the synchonous mode.

I have been using getdns for over a year on Linux but know I am porting my code to Windows and I am getting an error when the file descriptor is higher than 1024. This happens after a few successful requests.

I am aware of this ticket https://github.com/getdnsapi/getdns/issues/222, which says that poll is now the default for the event loop but I can tell that this is not the case; using gdb I was able to see the code going in functions part of the select_eventloop.c and use the select mechanism.

I tested with 1.7.2 and got the same behavior.

What is the solution?

PS. The code should be cleanly failing but it not the case; the request is still attempted but never returns. The result is a thread that consumes 100% of the CPU. Not good! PPS I am cross-compiling with mingw32-w64

wtoorop commented 2 years ago

Not good indeed,

I can see in CMakeLists.txt that the default eventloop based on select is chosen when a test trying to compile with poll fails:

# Event loop extension
set(DEFAULT_EVENTLOOP "select_eventloop")
if (ENABLE_POLL_EVENTLOOP)
  if (HAVE_SYS_POLL_H)
    set(TEST_CFLAG "-DHAVE_SYS_POLL_H=1")
  endif ()
  try_compile(USE_POLL_DEFAULT_EVENTLOOP
    ${CMAKE_CURRENT_BINARY_DIR}
    ${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests/test_poll.c
    COMPILE_DEFINITIONS "${TEST_CFLAG}"
    )
  if (USE_POLL_DEFAULT_EVENTLOOP)
    set(DEFAULT_EVENTLOOP "poll_eventloop")
  endif ()
endif ()

Could you share your config.h perhaps to see why it was not included. Also, I will try to reproduce your issue.

wtoorop commented 2 years ago

And I agree that in case of a compile for windows and for some reason the select eventloop was choses, fds > 1024 should soft fail...

clauderobi commented 2 years ago

Here is my config.h file.

ifndef CONFIG_H

define CONFIG_H

define PACKAGE "getdns"

define PACKAGE_NAME "getdns"

define PACKAGE_VERSION "1.7.2"

define PACKAGE_URL "https://getdnsapi.net"

define PACKAGE_BUGREPORT "team@getdnsapi.net"

define PACKAGE_STRING "getdns 1.7.2"

define PACKAGE_TARNAME "getdns-1.7.2"

define HAVE_ASSERT_H 1

define HAVE_INTTYPES_H 1

define HAVE_LIMITS_H 1

/ #undef HAVE_SYS_LIMITS_H /

define HAVE_STDARG_H 1

define HAVE_STDINT_H 1

define HAVE_STDIO_H 1

define HAVE_STDLIB_H 1

define HAVE_STRING_H 1

define HAVE_TIME_H 1

define HAVE_UNISTD_H 1

define HAVE_FCNTL_H 1

define HAVE_SIGNAL_H 1

/ #undef HAVE_SYS_POLL_H / / #undef HAVE_POLL_H / / #undef HAVE_RESOURCE_H /

define HAVE_SYS_TYPES_H 1

define HAVE_SYS_STAT_H 1

/ #undef HAVE_ENDIAN_H / / #undef HAVE_NETDB_H / / #undef HAVE_ARPA_INET_H / / #undef HAVE_NETINET_IN_H / / #undef HAVE_NETINET_TCP_H / / #undef HAVE_SYS_SELECT_H / / #undef HAVE_SYS_SOCKET_H / / #undef HAVE_SYS_SYSCTL_H /

define HAVE_SYS_TIME_H 1

/ #undef HAVE_SYS_WAIT_H /

define HAVE_WINDOWS_H 1

define HAVE_WINSOCK_H 1

define HAVE_WINSOCK2_H 1

define HAVE_WS2TCPIP_H 1

define GETDNS_ON_WINDOWS 1

define USE_WINSOCK 1

define HAVE_SSL 1

/ #undef USE_DANESSL /

define HAVE_OPENSSL_SSL_H 1

define HAVE_OPENSSL_EVP_H 1

define HAVE_OPENSSL_ERR_H 1

define HAVE_OPENSSL_RAND_H 1

define HAVE_OPENSSL_CONF_H 1

define HAVE_OPENSSL_ENGINE_H 1

define HAVE_OPENSSL_BN_H 1

define HAVE_OPENSSL_DSA_H 1

define HAVE_OPENSSL_RSA_H 1

/ #undef HAVE_OPENSSL_PARAM_BUILD_H /

define HAVE_DSA_SIG_SET0 1

define HAVE_DSA_SET0_PQG 1

define HAVE_DSA_SET0_KEY 1

define HAVE_RSA_SET0_KEY 1

define HAVE_EVP_MD5 1

define HAVE_EVP_SHA1 1

define HAVE_EVP_SHA224 1

define HAVE_EVP_SHA256 1

define HAVE_EVP_SHA384 1

define HAVE_EVP_SHA512 1

/ #undef HAVE_EVP_DSS1 /

define HAVE_EVP_DIGESTVERIFY 1

define HAVE_EVP_MD_CTX_NEW 1

define HAVE_HMAC_CTX_NEW 1

/ #undef HAVE_NETTLE_GET_SECP_256R1 / / #undef HAVE_NETTLE_GET_SECP_384R1 /

define HAVE_TLS_CLIENT_METHOD 1

define HAVE_OPENSSL_VERSION_NUM 1

define HAVE_OPENSSL_VERSION 1

define HAVE_SSL_CTX_DANE_ENABLE 1

define HAVE_SSL_CTX_SET_CIPHERSUITES 1

define HAVE_SSL_SET_CIPHERSUITES 1

define HAVE_OPENSSL_INIT_CRYPTO 1

/ #undef HAVE_OSSL_PARAM_BLD_NEW /

define HAVE_SSL_DANE_ENABLE 1

define HAVE_DECL_SSL_CTX_SET1_CURVES_LIST 1

define HAVE_DECL_SSL_SET1_CURVES_LIST 1

define HAVE_DECL_SSL_SET_MIN_PROTO_VERSION 1

define HAVE_X509_GET_NOTAFTER 1

define HAVE_X509_GET0_NOTAFTER 1

define HAVE_PTHREAD 1

/ #undef HAVE_WINDOWS_THREADS /

/ #undef RUNSTATEDIR /

define TRUST_ANCHOR_FILE "/usr/local/etc/unbound/getdns-root.key"

define GETDNS_FN_RESOLVCONF "/etc/resolv.conf"

define GETDNS_FN_HOSTS "C:/Windows/System32/Drivers/etc/hosts"

define DNSSEC_ROADBLOCK_AVOIDANCE 1

/ #undef HAVE_MDNS_SUPPORT /

define STUB_NATIVE_DNSSEC 1

define MAXIMUM_UPSTREAM_OPTION_SPACE 3000

define EDNS_PADDING_OPCODE 12

define MAX_CNAME_REFERRALS 100

define DRAFT_RRTYPES 1

define EDNS_COOKIES 1

define EDNS_COOKIE_OPCODE 10

define EDNS_COOKIE_ROLLOVER_TIME (246060)

define UDP_MAX_BACKOFF 1000

/ #undef HAVE_DECL_GETENTROPY / / #undef HAVE_DECL_INET_PTON / / #undef HAVE_DECL_INET_NTOP / / #undef HAVE_WIN_DECL_INET_PTON / / #undef HAVE_WIN_DECL_INET_NTOP /

define HAVE_DECL_MKSTEMP 1

/ #undef HAVE_DECL_SIGEMPTYSET / / #undef HAVE_DECL_SIGFILLSET / / #undef HAVE_DECL_SIGADDSET / / #undef HAVE_DECL_STRPTIME /

/ #undef HAVE_DECL_TCP_FASTOPEN / / #undef HAVE_DECL_TCP_FASTOPEN_CONNECT / / #undef HAVE_DECL_MSG_FASTOPEN /

if defined(HAVE_DECL_INET_PTON) || defined(HAVE_WIN_DECL_INET_PTON)

undef HAVE_DECL_INET_PTON

define HAVE_DECL_INET_PTON 1

endif

if defined(HAVE_DECL_INET_NTOP) || defined(HAVE_WIN_DECL_INET_NTOP)

undef HAVE_DECL_INET_NTOP

define HAVE_DECL_INET_NTOP 1

endif

/ #undef HAVE_FCNTL /

define HAVE_GETTIMEOFDAY 1

define HAVE_IOCTLSOCKET 1

/ #undef HAVE_SIGEMPTYSET / / #undef HAVE_SIGFILLSET / / #undef HAVE_SIGADDSET / / #undef HAVE_STRPTIME /

/ #undef HAVE_SIGSET_T /

define HAVE__SIGSET_T 1

/ #undef HAVE_BSD_STDLIB_H / / #undef HAVE_BSD_STRING_H /

/ #undef HAVE_DECL_STRLCPY / / #undef HAVE_DECL_ARC4RANDOM / / #undef HAVE_DECL_ARC4RANDOM_UNIFORM / / #undef HAVE_BSD_DECL_STRLCPY / / #undef HAVE_BSD_DECL_ARC4RANDOM / / #undef HAVE_BSD_DECL_ARC4RANDOM_UNIFORM /

/ #undef HAVE_STRLCPY / / #undef HAVE_ARC4RANDOM / / #undef HAVE_ARC4RANDOM_UNIFORM /

/ #undef HAVE_LIBUNBOUND / / #undef HAVE_UNBOUND_EVENT_H / / #undef HAVE_UNBOUND_EVENT_API / / #undef HAVE_UB_CTX_SET_STUB /

/ #undef HAVE_LIBIDN / / #undef HAVE_LIBIDN2 /

/ #undef HAVE_NETTLE / / #undef HAVE_NETTLE_DSA_COMPAT_H / / #undef HAVE_NETTLE_EDDSA_H /

/ #undef HAVE_EVENT2_EVENT_H / / #undef HAVE_EVENT_BASE_NEW / / #undef HAVE_EVENT_BASE_FREE /

define DEFAULT_EVENTLOOP "select_eventloop"

/ #undef USE_POLL_DEFAULT_EVENTLOOP /

/ #undef STRPTIME_WORKS /

/ #undef FD_SETSIZE /

define REQ_DEBUG 1

define SCHED_DEBUG 1

define STUB_DEBUG 1

define DAEMON_DEBUG 1

define SEC_DEBUG 1

define SERVER_DEBUG 1

define ANCHOR_DEBUG 1

/ #undef KEEP_CONNECTIONS_OPEN_DEBUG /

define USE_SHA1 1

define USE_SHA2 1

define USE_GOST 1

define USE_ECDSA 1

define USE_DSA 1

define USE_ED25519 1

define USE_ED448 1

/ #undef USE_OSX_TCP_FASTOPEN /

/ #undef HAVE_DECL_TCP_USER_TIMEOUT /

/ #undef HAVE_NEW_UV_TIMER_CB /

define HAVE_TARGET_ENDIANNESS

/ #undef TARGET_IS_BIG_ENDIAN /

define HAVE_FUNC 1

ifdef HAVE_FUNC

define FUNC func

else

define FUNC FUNCTION

endif

ifdef GETDNS_ON_WINDOWS

/* On windows it is allowed to increase the FD_SETSIZE

/ the version of the windows API enabled /

ifndef WINVER

define WINVER 0x0600 // 0x0502

endif

ifndef _WIN32_WINNT

define _WIN32_WINNT 0x0600 // 0x0502

endif

ifdef HAVE_WS2TCPIP_H

include

endif

ifdef _MSC_VER

if _MSC_VER >= 1800

define PRIsz "zu"

else

define PRIsz "Iu"

endif

include

typedef SSIZE_T ssize_t;

else

define PRIsz "Iu"

endif

ifdef HAVE_WINSOCK2_H

include

endif

/ detect if we need to cast to unsigned int for FD_SET to avoid warnings /

ifdef HAVE_WINSOCK2_H

define FD_SET_T (u_int)

else

define FD_SET_T

endif

/ Windows wants us to use _strdup instead of strdup /

ifndef strdup

define strdup _strdup

endif

/ Windows doesn't have strcasecmp and strncasecmp. /

define strcasecmp _stricmp

define strncasecmp _strnicmp

else

define PRIsz "zu"

endif

ifdef HAVE_STDINT_H

include

endif

ifdef HAVE_STDIO_H

include

endif

ifdef HAVE_UNISTD_H

include

endif

ifdef HAVE_ASSERT_H

include

endif

ifdef HAVE_STRING_H

include

endif

ifdef __cplusplus

extern "C" {

endif

if STDC_HEADERS

include

include

endif

ifdef HAVE_BSD_STDLIB_H

include <bsd/stdlib.h>

endif

ifdef HAVE_BSD_STRING_H

include <bsd/string.h>

endif

if !defined(HAVE_STRLCPY) || !HAVE_DECL_STRLCPY || !defined(strlcpy)

size_t strlcpy(char dst, const char src, size_t siz);

else

ifndef __BSD_VISIBLE

define __BSD_VISIBLE 1

endif

endif

if !defined(HAVE_ARC4RANDOM) || !HAVE_DECL_ARC4RANDOM

uint32_t arc4random(void);

endif

if !defined(HAVE_ARC4RANDOM_UNIFORM) || !HAVE_DECL_ARC4RANDOM_UNIFORM

uint32_t arc4random_uniform(uint32_t upper_bound);

endif

ifndef HAVE_ARC4RANDOM

void explicit_bzero(void buf, size_t len); int getentropy(void buf, size_t len); void arc4random_buf(void* buf, size_t n); void _ARC4_LOCK(void); void _ARC4_UNLOCK(void);

endif

ifdef COMPAT_SHA512

ifndef SHA512_DIGEST_LENGTH

define SHA512_BLOCK_LENGTH 128

define SHA512_DIGEST_LENGTH 64

define SHA512_DIGEST_STRING_LENGTH (SHA512_DIGEST_LENGTH * 2 + 1)

typedef struct _SHA512_CTX { uint64_t state[8]; uint64_t bitcount[2]; uint8_t buffer[SHA512_BLOCK_LENGTH]; } SHA512_CTX;

endif / SHA512_DIGEST_LENGTH /

void SHA512_Init(SHA512_CTX); void SHA512_Update(SHA512_CTX, void, size_t); void SHA512_Final(uint8_t[SHA512_DIGEST_LENGTH], SHA512_CTX); unsigned char SHA512(void data, unsigned int data_len, unsigned char *digest);

endif / COMPAT_SHA512 /

ifdef USE_WINSOCK

ifndef _CUSTOM_VSNPRINTF

define _CUSTOM_VSNPRINTF

static inline int _gldns_custom_vsnprintf(char str, size_t size, const char format, va_list ap) { int r = vsnprintf(str, size, format, ap); return r == -1 ? _vscprintf(format, ap) : r; }

define vsnprintf _gldns_custom_vsnprintf

endif

endif

ifdef __cplusplus

}

endif

/* Use on-board gldns /

define USE_GLDNS 1

ifdef HAVE_SSL

define GLDNS_BUILD_CONFIG_HAVE_SSL 1

endif

ifdef HAVE_STDARG_H

include

endif

include

ifdef HAVE_SYS_SOCKET_H

include <sys/socket.h>

endif

ifdef HAVE_SYS_SELECT_H

include <sys/select.h>

endif

ifdef HAVE_SYS_TYPES_H

include <sys/types.h>

endif

ifdef HAVE_SYS_STAT_H

include <sys/stat.h>

endif

ifdef HAVE_NETINET_IN_H

include <netinet/in.h>

endif

ifdef HAVE_NETINET_TCP_H

include <netinet/tcp.h>

endif

ifdef HAVE_ARPA_INET_H

include <arpa/inet.h>

endif

ifdef HAVE_SIGNAL_H

include

endif

ifdef HAVE_SYS_TYPES_H

include <sys/types.h>

endif

ifdef HAVE_INTTYPES_H

include

endif

ifdef HAVE_LIMITS_H

include

endif

ifdef HAVE_SYS_LIMITS_H

include <sys/limits.h>

endif

ifdef PATH_MAX

define _GETDNS_PATH_MAX PATH_MAX

else

define _GETDNS_PATH_MAX 2048

endif

ifndef PRIu64

define PRIu64 "llu"

endif

ifdef HAVE_ATTR_FORMAT

define ATTR_FORMAT(archetype, string_index, first_to_check) \

__attribute__ ((format (archetype, string_index, first_to_check)))

else / !HAVE_ATTR_FORMAT /

define ATTR_FORMAT(archetype, string_index, first_to_check) / empty /

endif / !HAVE_ATTR_FORMAT /

if defined(DOXYGEN)

define ATTR_UNUSED(x) x

elif defined(__cplusplus)

define ATTR_UNUSED(x)

elif defined(GNUC)

define ATTR_UNUSED(x) x attribute((unused))

else / !HAVE_ATTR_UNUSED /

define ATTR_UNUSED(x) x

endif / !HAVE_ATTR_UNUSED /

ifdef TIME_WITH_SYS_TIME

include <sys/time.h>

include

else

ifdef HAVE_SYS_TIME_H

include <sys/time.h>

else

include

endif

endif

ifdef __cplusplus

extern "C" {

endif

if !defined(HAVE_STRPTIME) || !defined(STRPTIME_WORKS)

define strptime unbound_strptime

struct tm; char strptime(const char s, const char format, struct tm tm);

endif

if !defined(HAVE_SIGSET_T) && defined(HAVE__SIGSET_T)

typedef _sigset_t sigset_t;

endif

if !defined(HAVE_SIGEMPTYSET)

define sigemptyset(pset) (*(pset) = 0)

endif

if !defined(HAVE_SIGFILLSET)

define sigfillset(pset) (*(pset) = (sigset_t)-1)

endif

if !defined(HAVE_SIGADDSET)

define sigaddset(pset, num) (*(pset) |= (1L<<(num)))

endif

ifdef HAVE_LIBUNBOUND

include

ifdef HAVE_UNBOUND_EVENT_H

include

else

ifdef HAVE_UNBOUND_EVENT_API

ifndef _UB_EVENT_PRIMITIVES

define _UB_EVENT_PRIMITIVES

struct ub_event_base; struct ub_ctx ub_ctx_create_ub_event(struct ub_event_base base); typedef void (ub_event_callback_t)(void, int, void, int, int, char); int ub_resolve_event(struct ub_ctx ctx, const char name, int rrtype, int rrclass, void mydata, ub_event_callback_t callback, int async_id);

endif

endif

endif

endif

ifndef HAVE_DECL_INET_PTON

int inet_pton(int af, const char src, void dst);

endif

ifndef HAVE_DECL_INET_NTOP

const char inet_ntop(int af, const void src, char *dst, size_t size);

endif

ifndef HAVE_DECL_MKSTEMP

int mkstemp(char *template);

endif

ifndef HAVE_GETTIMEOFDAY

int gettimeofday(struct timeval tv, void tz);

endif

ifdef __cplusplus

}

endif

endif / CONFIG_H /`

Andersama commented 2 years ago

This is a significant problem for windows adoption because currently vcpkg builds version 1.7.0 with a hardcoded FD_SETSIZE of 1024.

Not sure how windows tends to handle file descriptors, but in my case, all file descriptors returned by upstream_find_for_netreq were well above 1024:

getdns_return_t
_getdns_submit_stub_request(getdns_network_req *netreq, uint64_t *now_ms)
{
    int fd = -1;
    getdns_dns_req *dnsreq;
    getdns_context *context;

    DEBUG_STUB("%s %-35s: MSG: %p TYPE: %d\n", STUB_DEBUG_ENTRY, __FUNC__,
               (void*)netreq, netreq->request_type);

    dnsreq = netreq->owner;
    context = dnsreq->context;

    /* This does a best effort to get a initial fd.
     * All other set up is done async*/
    fd = upstream_find_for_netreq(netreq);

The crash, or in my instance infinite loop comes from the fact the scheduling function fails and doesn't set I think what are the timeout callbacks. So when the handlers run in the future, it turns into an infinite loop.

static getdns_return_t
select_eventloop_schedule(getdns_eventloop *loop,
    int fd, uint64_t timeout, getdns_eventloop_event *event)
{
    _getdns_select_eventloop *select_loop  = (_getdns_select_eventloop *)loop;
    size_t i;

    DEBUG_SCHED( "%s(loop: %p, fd: %d, timeout: %"PRIu64", event: %p, FD_SETSIZE: %d)\n"
            , __FUNC__, (void *)loop, fd, timeout, (void *)event, FD_SETSIZE);

    if (!loop || !event)
        return GETDNS_RETURN_INVALID_PARAMETER;

    if (fd >= (int)FD_SETSIZE) {
        DEBUG_SCHED( "ERROR: fd %d >= FD_SETSIZE: %d!\n"
                   , fd, FD_SETSIZE);
        return GETDNS_RETURN_GENERIC_ERROR;
    }

I'd suspect there's a potential bug fix to make sure the GETDNS_SCHEDULE_EVENT macro under _getdns_submit_stub_request returns GETDNS_RETURN_GOOD only if the the macro succeeds in scheduling the event. The main fix would be to remove the FD_SETSIZE limit entirely, because file descriptors I think are handed out by the underlying operating system, a program shouldn't rely on the file descriptor it retrieves being in a certain range.

Andersama commented 2 years ago

It looks like there needs to be a bit of a rewrite such that fd_events and potentially a few other places are more like a dynamic array holding file descriptors for later use.