Closed poettering closed 2 years ago
Seems to be test-build-track
that triggers this.
Here's the final strace
of dbus-broker
right before dying:
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=12864744}) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\t\0\0\0\377\377\377\377?\0\0\0\5\1u\0\3\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\1s\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.4\0", iov_len=9}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=89}], 1, MSG_DONTWAIT|MSG_NOSIGNAL) = 1
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLIN|EPOLLOUT, data={u32=4242937248, u64=94497818416544}}], 7, 0) = 1
recvmsg(7<socket:[408071]>, {msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\1\0\1\224\0\0\0\4\0\0\0\177\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\3\1s\0\10\0\0\0AddMatch\0\0\0\0\0\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\10\1g\0\1s\0\0\217\0\0\0type='signal',sender='org.freedesktop.DBus',path='/org/freedesktop/DBus',interface='org.freedesktop.DBus',member='NameOwnerChanged',arg0=':1.3'\0l\1\0\1\t\0\0\0\5\0\0\0\177\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\3\1s\0\f\0\0\0GetNameOwner\0\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\10\1g\0\1s\0\0\4\0\0\0:1.3\0", iov_len=2048}], msg_iovlen=1, msg_controllen=0, msg_flags=MSG_CMSG_CLOEXEC}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = 445
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13088761}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13143991}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13198700}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13254015}) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\0\0\0\0\377\377\377\377>\0\0\0\5\1u\0\4\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\0\0\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="", iov_len=0}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=80}, {msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\2\1\1\t\0\0\0\377\377\377\377?\0\0\0\5\1u\0\5\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\6\1s\0\4\0\0\0:1.3\0\0\0\0\10\1g\0\1s\0\0", iov_len=80}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.3\0", iov_len=9}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=89}], 2, MSG_DONTWAIT|MSG_NOSIGNAL) = 2
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLOUT, data={u32=4242937248, u64=94497818416544}}, {events=EPOLLIN|EPOLLOUT|EPOLLERR|EPOLLHUP, data={u32=4242973984, u64=94497818453280}}], 7, 0) = 2
recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EAGAIN (Resource temporarily unavailable)
recvmsg(11<socket:[408072]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 ECONNRESET (Connection reset by peer)
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13524271}) = 0
clock_gettime(CLOCK_THREAD_CPUTIME_ID, {tv_sec=0, tv_nsec=13582307}) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 11<socket:[408072]>, NULL) = 0
close(11<socket:[408072]>) = 0
epoll_wait(3<anon_inode:[eventpoll]>, [], 6, 0) = 0
sendmmsg(7<socket:[408071]>, [{msg_hdr={msg_name=NULL, msg_namelen=0, msg_iov=[{iov_base="l\4\1\1\35\0\0\0\377\377\377\377\211\0\0\0\7\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\1\1o\0\25\0\0\0/org/freedesktop/DBus\0\0\0\2\1s\0\24\0\0\0org.freedesktop.DBus\0\0\0\0\3\1s\0\20\0\0\0NameOwnerChanged\0\0\0\0\0\0\0\0\10\1g\0\3sss\0\0\0\0\0\0\0\0", iov_len=160}, {iov_base=NULL, iov_len=0}, {iov_base=NULL, iov_len=0}, {iov_base="\4\0\0\0:1.4\0\0\0\0\4\0\0\0:1.4\0\0\0\0\0\0\0\0\0", iov_len=29}], msg_iovlen=4, msg_controllen=0, msg_flags=0}, msg_len=189}], 1, MSG_DONTWAIT|MSG_NOSIGNAL) = 1
epoll_wait(3<anon_inode:[eventpoll]>, [{events=EPOLLIN|EPOLLOUT, data={u32=4242937248, u64=94497818416544}}], 6, -1) = 1
recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EINVAL (Invalid argument)
write(2<socket:[403358]>, "ERROR socket_recvmsg @ ../src/dbus/socket.c +577: Invalid argument\n", 67) = 67
write(2<socket:[403358]>, " connection_dispatch @ ../src/dbus/connection.c +231\n", 58) = 58
write(2<socket:[403358]>, " peer_dispatch_connection @ ../src/bus/peer.c +39\n", 55) = 55
write(2<socket:[403358]>, " peer_dispatch @ ../src/bus/peer.c +225\n", 45) = 45
write(2<socket:[403358]>, " dispatch_context_dispatch @ ../src/util/dispatch.c +344\n", 62) = 62
write(2<socket:[403358]>, " broker_run @ ../src/broker/broker.c +219\n", 47) = 47
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 8<socket:[404150]>, NULL) = 0
close(8<socket:[404150]>) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 7<socket:[408071]>, NULL) = 0
close(7<socket:[408071]>) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 10<socket:[404152]>, NULL) = 0
close(10<socket:[404152]>) = 0
memfd_create("dbus-broker-log", MFD_CLOEXEC|MFD_ALLOW_SEALING) = 7</memfd:dbus-broker-log (deleted)>
ftruncate(7</memfd:dbus-broker-log (deleted)>, 2097152) = 0
mmap(NULL, 2097152, PROT_READ|PROT_WRITE, MAP_SHARED, 7</memfd:dbus-broker-log (deleted)>, 0) = 0x7f571eebd000
sendto(4<socket:[403361]>, "DBUS_BROKER_METRICS_DISPATCH_COUNT=57\nDBUS_BROKER_METRICS_DISPATCH_MIN=4223\nDBUS_BROKER_METRICS_DISPATCH_MAX=407253\nDBUS_BROKER_METRICS_DISPATCH_AVG=46549\nDBUS_BROKER_METRICS_DISPATCH_STDDEV=58721\nPRIORITY=6\nSYSLOG_FACILITY=3\nSYSLOG_IDENTIFIER=dbus-broker\nERRNO=0\nCODE_FILE=../src/broker/broker.c\nCODE_LINE=184\nCODE_FUNC=broker_log_metrics\nDBUS_BROKER_LOG_DROPPED=0\nMESSAGE_ID=8af3357071af4153af414daae07d38e7\nMESSAGE=Dispatched 57 messages @ 46(\302\26159)\316\274s / message.\n", 466, MSG_DONTWAIT|MSG_NOSIGNAL, NULL, 0) = 466
rt_sigprocmask(SIG_SETMASK, [HUP INT TERM CHLD], NULL, 8) = 0
write(2<socket:[403358]>, " run @ ../src/broker/main.c +261\n", 38) = 38
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 6<socket:[111708]>, NULL) = 0
close(6<socket:[111708]>) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 9<socket:[403373]>, NULL) = 0
epoll_ctl(3<anon_inode:[eventpoll]>, EPOLL_CTL_DEL, 5<anon_inode:[signalfd]>, NULL) = 0
close(5<anon_inode:[signalfd]>) = 0
close(3<anon_inode:[eventpoll]>) = 0
munmap(0x7f571eebd000, 2097152) = 0
close(7</memfd:dbus-broker-log (deleted)>) = 0
munmap(0x7f571f4d0000, 4096) = 0
write(2<socket:[403358]>, " main @ ../src/broker/main.c +295\n", 39) = 39
exit_group(1) = ?
+++ exited with 1 +++
No idea what might trigger recvmsg(7<socket:[408071]>, {msg_namelen=0}, MSG_DONTWAIT|MSG_CMSG_CLOEXEC) = -1 EINVAL (Invalid argument)
Maybe a kernel issue?
The only interesting thing about test-bus-track that i see is that it calls shutdown() on the dbus socket it operates on, in order shut down traffic artificially without actually invalidating the fd.
Ah, yeah, if I skip that one test the test suite of systemd doesn't make my session die anymore. Yay.
So this might be caused by the rc kernel, dunno.
Thanks for the report @poettering ! Are you able to reproduce with a non-rc kernel?
Thanks for the report @poettering ! Are you able to reproduce with a non-rc kernel?
with 5.14.0-60.fc36.x86_64
the issue goes away. Let's consider this a kernel issue then. Should this resurface on non-rc kernels we can reopen this I guess.
Thanks!
I am running the systemd test suite from my session and it reliably makes dbus-broker abort with the following:
I don't know what precisely triggrs this, but it sucks hard, since dbus going down means my session going down, i.e. gnome and everything
Some of the tests in the systemd testuite do connect to the session bus, no idea (yet) which one would thta be.
dbus-broker-29-4.fc35.x86_64 kernel 5.15.0-0.rc3.25.fc36.x86_64