openzipkin / brave

Java distributed tracing implementation compatible with Zipkin backend services.
Apache License 2.0
2.36k stars 713 forks source link

Blocking calls when used with WebFlux #1442

Open lukasznowak-viacom opened 1 month ago

lukasznowak-viacom commented 1 month ago

Recently we started moving towards reactive stack based on WebFlux. Unfortunately during the PoC tests we encountered performance degradation compared to the old solution based on MVC. After investigation we found that micrometer-tracing library with brave bridge is called by reactive stack in blocking way and because of that we see much more CPU usage + higher overall latencies. Some investigation results can be found there already -> https://github.com/micrometer-metrics/tracing/issues/777

We are using:

Please find stack trace from https://github.com/reactor/BlockHound used in our service:

[ERROR] [2024-07-09 08:09:01,148] [client-settings-service,, ,,] [reactor-http-epoll-11] r.n.h.s.HttpServerOperations: [9a3a060a-4034, L:/172.17.0.6:9304 - R:/10.242.106.249:50730] Error starting response. Replying error status-
reactor.blockhound.BlockingOperationError: Blocking call! jdk.internal.misc.Unsafe#park
    at java.base/jdk.internal.misc.Unsafe.park(Unknown Source) ~[na:na]
    at java.base/java.util.concurrent.locks.LockSupport.park(Unknown Source) ~[na:na]
    at java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(Unknown Source) ~[na:na]
    at java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(Unknown Source) ~[na:na]
    at java.base/java.util.concurrent.locks.ReentrantLock$Sync.lock(Unknown Source) ~[na:na]
    at java.base/java.util.concurrent.locks.ReentrantLock.lock(Unknown Source) ~[na:na]
    at java.base/java.lang.ref.ReferenceQueue.poll(Unknown Source) ~[na:na]
    at brave.internal.recorder.PendingSpans.expungeStaleEntries(PendingSpans.java:129) ~[brave-5.16.0.jar!/:na]
    at brave.internal.collect.WeakConcurrentMap.getIfPresent(WeakConcurrentMap.java:50) ~[brave-5.16.0.jar!/:na]
    at brave.internal.recorder.PendingSpans.get(PendingSpans.java:59) ~[brave-5.16.0.jar!/:na]
    at brave.internal.recorder.PendingSpans.getOrCreate(PendingSpans.java:64) ~[brave-5.16.0.jar!/:na]
    at brave.Tracer._toSpan(Tracer.java:410) ~[brave-5.16.0.jar!/:na]
    at brave.Tracer.nextSpan(Tracer.java:365) ~[brave-5.16.0.jar!/:na]
    at io.micrometer.tracing.brave.bridge.BraveSpanBuilder.span(BraveSpanBuilder.java:78) ~[micrometer-tracing-bridge-brave-1.2.5.jar!/:1.2.5]
    at io.micrometer.tracing.brave.bridge.BraveSpanBuilder.start(BraveSpanBuilder.java:166) ~[micrometer-tracing-bridge-brave-1.2.5.jar!/:1.2.5]
    at io.micrometer.tracing.handler.PropagatingReceiverTracingObservationHandler.onStart(PropagatingReceiverTracingObservationHandler.java:74) ~[micrometer-tracing-1.2.5.jar!/:1.2.5]
    at io.micrometer.tracing.handler.PropagatingReceiverTracingObservationHandler.onStart(PropagatingReceiverTracingObservationHandler.java:35) ~[micrometer-tracing-1.2.5.jar!/:1.2.5]
    at io.micrometer.observation.ObservationHandler$FirstMatchingCompositeObservationHandler.onStart(ObservationHandler.java:149) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
    at io.micrometer.observation.SimpleObservation.notifyOnObservationStarted(SimpleObservation.java:214) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
    at io.micrometer.observation.SimpleObservation.start(SimpleObservation.java:161) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
    at org.springframework.web.server.adapter.HttpWebHandlerAdapter$ObservationSignalListener.doFirst(HttpWebHandlerAdapter.java:383) ~[spring-web-6.1.6.jar!/:6.1.6]
    at reactor.core.publisher.MonoTapRestoringThreadLocals.subscribe(MonoTapRestoringThreadLocals.java:58) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.Mono.subscribe(Mono.java:4568) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.subscribeNext(MonoIgnoreThen.java:265) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.MonoIgnoreThen.subscribe(MonoIgnoreThen.java:51) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.Mono.subscribe(Mono.java:4568) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.subscribeNext(MonoIgnoreThen.java:265) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.MonoIgnoreThen.subscribe(MonoIgnoreThen.java:51) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:76) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:76) ~[reactor-core-3.6.5.jar!/:3.6.5]
    at reactor.netty.http.server.HttpServer$HttpServerHandle.onStateChange(HttpServer.java:1176) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
    at reactor.netty.ReactorNetty$CompositeConnectionObserver.onStateChange(ReactorNetty.java:710) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
    at reactor.netty.transport.ServerTransport$ChildObserver.onStateChange(ServerTransport.java:481) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
    at reactor.netty.http.server.HttpServerOperations.onInboundNext(HttpServerOperations.java:652) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
    at reactor.netty.channel.ChannelOperationsHandler.channelRead(ChannelOperationsHandler.java:114) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at reactor.netty.http.server.HttpTrafficHandler.channelRead(HttpTrafficHandler.java:240) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:289) ~[netty-handler-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.CombinedChannelDuplexHandler$DelegatingChannelHandlerContext.fireChannelRead(CombinedChannelDuplexHandler.java:436) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:346) ~[netty-codec-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:318) ~[netty-codec-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.CombinedChannelDuplexHandler.channelRead(CombinedChannelDuplexHandler.java:251) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:801) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:501) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:399) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
    at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
    at java.base/java.lang.Thread.run(Unknown Source) ~[na:na]