bus1 / dbus-broker

Linux D-Bus Message Broker
https://github.com/bus1/dbus-broker/wiki
Apache License 2.0
667 stars 78 forks source link

EINVAL from recvmsg() makes dbus-broker fail #275

Closed poettering closed 2 years ago

poettering commented 2 years ago

I am running the systemd test suite from my session and it reliably makes dbus-broker abort with the following:

Okt 06 09:56:11 iota dbus-broker-launch[41659]: ERROR socket_recvmsg @ ../src/dbus/socket.c +577: Invalid argument
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       connection_dispatch @ ../src/dbus/connection.c +231
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       peer_dispatch_connection @ ../src/bus/peer.c +39
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       peer_dispatch @ ../src/bus/peer.c +225
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       dispatch_context_dispatch @ ../src/util/dispatch.c +344
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       broker_run @ ../src/broker/broker.c +219
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       run @ ../src/broker/main.c +261
Okt 06 09:56:11 iota dbus-broker-launch[41659]:       main @ ../src/broker/main.c +295

I don't know what precisely triggrs this, but it sucks hard, since dbus going down means my session going down, i.e. gnome and everything

Some of the tests in the systemd testuite do connect to the session bus, no idea (yet) which one would thta be.

dbus-broker-29-4.fc35.x86_64 kernel 5.15.0-0.rc3.25.fc36.x86_64

poettering commented 2 years ago

Seems to be test-build-track that triggers this.

Here's the final strace of dbus-broker right before dying:

clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=12864744}) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\t\0\0\0\377\377\377\377?\0\0\0\5\1u\0\3\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\1s\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.4\0", iov_len=9}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=89}], 1, MSG_DONTWAIT|MSG_NOSIGNAL) = 1
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLIN|EPOLLOUT, data={u32=4242937248, u64=94497818416544}}], 7, 0) = 1
recvmsg(7<socket:[408071]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\224\0\0\0\4\0\0\0\177\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\3\1s\0\10\0\0\0AddMatch\0\0\0\0\0\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\10\1g\0\1s\0\0\217\0\0\0type='signal',sender='org.freedesktop.DBus',path='/org/freedesktop/DBus',interface='org.freedesktop.DBus',member='NameOwnerChanged',arg0=':1.3'\0l\1\0\1\t\0\0\0\5\0\0\0\177\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\3\1s\0\f\0\0\0GetNameOwner\0\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\10\1g\0\1s\0\0\4\0\0\0:1.3\0", iov_len=2048}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 445
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13088761}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13143991}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13198700}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13254015}) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\0\0\0\0\377\377\377\377>\0\0\0\5\1u\0\4\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\0\0\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="", iov_len=0}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=80}, {msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\t\0\0\0\377\377\377\377?\0\0\0\5\1u\0\5\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\1s\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.3\0", iov_len=9}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=89}], 2, MSG_DONTWAIT|MSG_NOSIGNAL) = 2
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLOUT, data={u32=4242937248, u64=94497818416544}}, {events=EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP, data={u32=4242973984, u64=94497818453280}}], 7, 0) = 2
recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
recvmsg(11<socket:[408072]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 ECONNRESET (Connection reset by peer)
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13524271}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13582307}) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 11<socket:[408072]>, NULL) = 0
close(11<socket:[408072]>)              = 0
epoll_wait(3<anon_inode:[eventpoll]>, [], 6, 0) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\4\1\1\35\0\0\0\377\377\377\377\211\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\3\1s\0\20\0\0\0NameOwnerChanged\0\0\0\0\0\0\0\0\10\1g\0\3sss\0\0\0\0\0\0\0\0", iov_len=160}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.4\0\0\0\0\4\0\0\0:1.4\0\0\0\0\0\0\0\0\0", iov_len=29}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=189}], 1, MSG_DONTWAIT|MSG_NOSIGNAL) = 1
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLIN|EPOLLOUT, data={u32=4242937248, u64=94497818416544}}], 6, -1) = 1
recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EINVAL (Invalid argument)
write(2<socket:[403358]>, "ERROR socket_recvmsg @ ../src/dbus/socket.c +577: Invalid argument\n", 67) = 67
write(2<socket:[403358]>, "      connection_dispatch @ ../src/dbus/connection.c +231\n", 58) = 58
write(2<socket:[403358]>, "      peer_dispatch_connection @ ../src/bus/peer.c +39\n", 55) = 55
write(2<socket:[403358]>, "      peer_dispatch @ ../src/bus/peer.c +225\n", 45) = 45
write(2<socket:[403358]>, "      dispatch_context_dispatch @ ../src/util/dispatch.c +344\n", 62) = 62
write(2<socket:[403358]>, "      broker_run @ ../src/broker/broker.c +219\n", 47) = 47
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 8<socket:[404150]>, NULL) = 0
close(8<socket:[404150]>)               = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 7<socket:[408071]>, NULL) = 0
close(7<socket:[408071]>)               = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 10<socket:[404152]>, NULL) = 0
close(10<socket:[404152]>)              = 0
memfd_create("dbus-broker-log", MFD_CLOEXEC|MFD_ALLOW_SEALING) = 7</memfd:dbus-broker-log (deleted)>
ftruncate(7</memfd:dbus-broker-log (deleted)>, 2097152) = 0
mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_SHARED, 7</memfd:dbus-broker-log (deleted)>, 0) = 0x7f571eebd000
sendto(4<socket:[403361]>, "DBUS_BROKER_METRICS_DISPATCH_COUNT=57\nDBUS_BROKER_METRICS_DISPATCH_MIN=4223\nDBUS_BROKER_METRICS_DISPATCH_MAX=407253\nDBUS_BROKER_METRICS_DISPATCH_AVG=46549\nDBUS_BROKER_METRICS_DISPATCH_STDDEV=58721\nPRIORITY=6\nSYSLOG_FACILITY=3\nSYSLOG_IDENTIFIER=dbus-broker\nERRNO=0\nCODE_FILE=../src/broker/broker.c\nCODE_LINE=184\nCODE_FUNC=broker_log_metrics\nDBUS_BROKER_LOG_DROPPED=0\nMESSAGE_ID=8af3357071af4153af414daae07d38e7\nMESSAGE=Dispatched 57 messages @ 46(\302\26159)\316\274s / message.\n", 466, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 466
rt_sigprocmask(SIG_SETMASK, [HUP INT TERM CHLD], NULL, 8) = 0
write(2<socket:[403358]>, "      run @ ../src/broker/main.c +261\n", 38) = 38
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 6<socket:[111708]>, NULL) = 0
close(6<socket:[111708]>)               = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 9<socket:[403373]>, NULL) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 5<anon_inode:[signalfd]>, NULL) = 0
close(5<anon_inode:[signalfd]>)         = 0
close(3<anon_inode:[eventpoll]>)        = 0
munmap(0x7f571eebd000, 2097152)         = 0
close(7</memfd:dbus-broker-log (deleted)>) = 0
munmap(0x7f571f4d0000, 4096)            = 0
write(2<socket:[403358]>, "      main @ ../src/broker/main.c +295\n", 39) = 39
exit_group(1)                           = ?
+++ exited with 1 +++
poettering commented 2 years ago

No idea what might trigger recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EINVAL (Invalid argument)

Maybe a kernel issue?

The only interesting thing about test-bus-track that i see is that it calls shutdown() on the dbus socket it operates on, in order shut down traffic artificially without actually invalidating the fd.

poettering commented 2 years ago

Ah, yeah, if I skip that one test the test suite of systemd doesn't make my session die anymore. Yay.

So this might be caused by the rc kernel, dunno.

teg commented 2 years ago

Thanks for the report @poettering ! Are you able to reproduce with a non-rc kernel?

teg commented 2 years ago

Thanks for the report @poettering ! Are you able to reproduce with a non-rc kernel?

poettering commented 2 years ago

with 5.14.0-60.fc36.x86_64 the issue goes away. Let's consider this a kernel issue then. Should this resurface on non-rc kernels we can reopen this I guess.

teg commented 2 years ago

Thanks!