Open clauderobi opened 2 years ago
Not good indeed,
I can see in CMakeLists.txt that the default eventloop based on select is chosen when a test trying to compile with poll fails:
# Event loop extension
set(DEFAULT_EVENTLOOP "select_eventloop")
if (ENABLE_POLL_EVENTLOOP)
if (HAVE_SYS_POLL_H)
set(TEST_CFLAG "-DHAVE_SYS_POLL_H=1")
endif ()
try_compile(USE_POLL_DEFAULT_EVENTLOOP
${CMAKE_CURRENT_BINARY_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/cmake/tests/test_poll.c
COMPILE_DEFINITIONS "${TEST_CFLAG}"
)
if (USE_POLL_DEFAULT_EVENTLOOP)
set(DEFAULT_EVENTLOOP "poll_eventloop")
endif ()
endif ()
Could you share your config.h perhaps to see why it was not included. Also, I will try to reproduce your issue.
And I agree that in case of a compile for windows and for some reason the select eventloop was choses, fds > 1024 should soft fail...
Here is my config.h file.
/ #undef HAVE_SYS_LIMITS_H /
/ #undef HAVE_SYS_POLL_H / / #undef HAVE_POLL_H / / #undef HAVE_RESOURCE_H /
/ #undef HAVE_ENDIAN_H / / #undef HAVE_NETDB_H / / #undef HAVE_ARPA_INET_H / / #undef HAVE_NETINET_IN_H / / #undef HAVE_NETINET_TCP_H / / #undef HAVE_SYS_SELECT_H / / #undef HAVE_SYS_SOCKET_H / / #undef HAVE_SYS_SYSCTL_H /
/ #undef HAVE_SYS_WAIT_H /
/ #undef USE_DANESSL /
/ #undef HAVE_OPENSSL_PARAM_BUILD_H /
/ #undef HAVE_EVP_DSS1 /
/ #undef HAVE_NETTLE_GET_SECP_256R1 / / #undef HAVE_NETTLE_GET_SECP_384R1 /
/ #undef HAVE_OSSL_PARAM_BLD_NEW /
/ #undef HAVE_WINDOWS_THREADS /
/ #undef RUNSTATEDIR /
/ #undef HAVE_MDNS_SUPPORT /
/ #undef HAVE_DECL_GETENTROPY / / #undef HAVE_DECL_INET_PTON / / #undef HAVE_DECL_INET_NTOP / / #undef HAVE_WIN_DECL_INET_PTON / / #undef HAVE_WIN_DECL_INET_NTOP /
/ #undef HAVE_DECL_SIGEMPTYSET / / #undef HAVE_DECL_SIGFILLSET / / #undef HAVE_DECL_SIGADDSET / / #undef HAVE_DECL_STRPTIME /
/ #undef HAVE_DECL_TCP_FASTOPEN / / #undef HAVE_DECL_TCP_FASTOPEN_CONNECT / / #undef HAVE_DECL_MSG_FASTOPEN /
/ #undef HAVE_FCNTL /
/ #undef HAVE_SIGEMPTYSET / / #undef HAVE_SIGFILLSET / / #undef HAVE_SIGADDSET / / #undef HAVE_STRPTIME /
/ #undef HAVE_SIGSET_T /
/ #undef HAVE_BSD_STDLIB_H / / #undef HAVE_BSD_STRING_H /
/ #undef HAVE_DECL_STRLCPY / / #undef HAVE_DECL_ARC4RANDOM / / #undef HAVE_DECL_ARC4RANDOM_UNIFORM / / #undef HAVE_BSD_DECL_STRLCPY / / #undef HAVE_BSD_DECL_ARC4RANDOM / / #undef HAVE_BSD_DECL_ARC4RANDOM_UNIFORM /
/ #undef HAVE_STRLCPY / / #undef HAVE_ARC4RANDOM / / #undef HAVE_ARC4RANDOM_UNIFORM /
/ #undef HAVE_LIBUNBOUND / / #undef HAVE_UNBOUND_EVENT_H / / #undef HAVE_UNBOUND_EVENT_API / / #undef HAVE_UB_CTX_SET_STUB /
/ #undef HAVE_LIBIDN / / #undef HAVE_LIBIDN2 /
/ #undef HAVE_NETTLE / / #undef HAVE_NETTLE_DSA_COMPAT_H / / #undef HAVE_NETTLE_EDDSA_H /
/ #undef HAVE_EVENT2_EVENT_H / / #undef HAVE_EVENT_BASE_NEW / / #undef HAVE_EVENT_BASE_FREE /
/ #undef USE_POLL_DEFAULT_EVENTLOOP /
/ #undef STRPTIME_WORKS /
/ #undef FD_SETSIZE /
/ #undef KEEP_CONNECTIONS_OPEN_DEBUG /
/ #undef USE_OSX_TCP_FASTOPEN /
/ #undef HAVE_DECL_TCP_USER_TIMEOUT /
/ #undef HAVE_NEW_UV_TIMER_CB /
/ #undef TARGET_IS_BIG_ENDIAN /
/* On windows it is allowed to increase the FD_SETSIZE
/ the version of the windows API enabled /
typedef SSIZE_T ssize_t;
/ detect if we need to cast to unsigned int for FD_SET to avoid warnings /
/ Windows wants us to use _strdup instead of strdup /
/ Windows doesn't have strcasecmp and strncasecmp. /
extern "C" {
size_t strlcpy(char dst, const char src, size_t siz);
uint32_t arc4random(void);
uint32_t arc4random_uniform(uint32_t upper_bound);
void explicit_bzero(void buf, size_t len); int getentropy(void buf, size_t len); void arc4random_buf(void* buf, size_t n); void _ARC4_LOCK(void); void _ARC4_UNLOCK(void);
typedef struct _SHA512_CTX { uint64_t state[8]; uint64_t bitcount[2]; uint8_t buffer[SHA512_BLOCK_LENGTH]; } SHA512_CTX;
void SHA512_Init(SHA512_CTX); void SHA512_Update(SHA512_CTX, void, size_t); void SHA512_Final(uint8_t[SHA512_DIGEST_LENGTH], SHA512_CTX); unsigned char SHA512(void data, unsigned int data_len, unsigned char *digest);
static inline int _gldns_custom_vsnprintf(char str, size_t size, const char format, va_list ap) { int r = vsnprintf(str, size, format, ap); return r == -1 ? _vscprintf(format, ap) : r; }
}
/* Use on-board gldns /
__attribute__ ((format (archetype, string_index, first_to_check)))
extern "C" {
struct tm; char strptime(const char s, const char format, struct tm tm);
typedef _sigset_t sigset_t;
struct ub_event_base; struct ub_ctx ub_ctx_create_ub_event(struct ub_event_base base); typedef void (ub_event_callback_t)(void, int, void, int, int, char); int ub_resolve_event(struct ub_ctx ctx, const char name, int rrtype, int rrclass, void mydata, ub_event_callback_t callback, int async_id);
int inet_pton(int af, const char src, void dst);
const char inet_ntop(int af, const void src, char *dst, size_t size);
int mkstemp(char *template);
int gettimeofday(struct timeval tv, void tz);
}
This is a significant problem for windows adoption because currently vcpkg builds version 1.7.0 with a hardcoded FD_SETSIZE
of 1024
.
Not sure how windows tends to handle file descriptors, but in my case, all file descriptors returned by upstream_find_for_netreq
were well above 1024:
getdns_return_t
_getdns_submit_stub_request(getdns_network_req *netreq, uint64_t *now_ms)
{
int fd = -1;
getdns_dns_req *dnsreq;
getdns_context *context;
DEBUG_STUB("%s %-35s: MSG: %p TYPE: %d\n", STUB_DEBUG_ENTRY, __FUNC__,
(void*)netreq, netreq->request_type);
dnsreq = netreq->owner;
context = dnsreq->context;
/* This does a best effort to get a initial fd.
* All other set up is done async*/
fd = upstream_find_for_netreq(netreq);
The crash, or in my instance infinite loop comes from the fact the scheduling function fails and doesn't set I think what are the timeout callbacks. So when the handlers run in the future, it turns into an infinite loop.
static getdns_return_t
select_eventloop_schedule(getdns_eventloop *loop,
int fd, uint64_t timeout, getdns_eventloop_event *event)
{
_getdns_select_eventloop *select_loop = (_getdns_select_eventloop *)loop;
size_t i;
DEBUG_SCHED( "%s(loop: %p, fd: %d, timeout: %"PRIu64", event: %p, FD_SETSIZE: %d)\n"
, __FUNC__, (void *)loop, fd, timeout, (void *)event, FD_SETSIZE);
if (!loop || !event)
return GETDNS_RETURN_INVALID_PARAMETER;
if (fd >= (int)FD_SETSIZE) {
DEBUG_SCHED( "ERROR: fd %d >= FD_SETSIZE: %d!\n"
, fd, FD_SETSIZE);
return GETDNS_RETURN_GENERIC_ERROR;
}
I'd suspect there's a potential bug fix to make sure the GETDNS_SCHEDULE_EVENT
macro under _getdns_submit_stub_request
returns GETDNS_RETURN_GOOD only if the the macro succeeds in scheduling the event. The main fix would be to remove the FD_SETSIZE limit entirely, because file descriptors I think are handed out by the underlying operating system, a program shouldn't rely on the file descriptor it retrieves being in a certain range.
It looks like there needs to be a bit of a rewrite such that fd_events
and potentially a few other places are more like a dynamic array holding file descriptors for later use.
Hi,
I am using the synchonous mode.
I have been using getdns for over a year on Linux but know I am porting my code to Windows and I am getting an error when the file descriptor is higher than 1024. This happens after a few successful requests.
I am aware of this ticket https://github.com/getdnsapi/getdns/issues/222, which says that poll is now the default for the event loop but I can tell that this is not the case; using gdb I was able to see the code going in functions part of the select_eventloop.c and use the select mechanism.
I tested with 1.7.2 and got the same behavior.
What is the solution?
PS. The code should be cleanly failing but it not the case; the request is still attempted but never returns. The result is a thread that consumes 100% of the CPU. Not good! PPS I am cross-compiling with mingw32-w64