Recently we started moving towards reactive stack based on WebFlux. Unfortunately during the PoC tests we encountered performance degradation compared to the old solution based on MVC. After investigation we found that micrometer-tracing library with brave bridge is called by reactive stack in blocking way and because of that we see much more CPU usage + higher overall latencies. Some investigation results can be found there already -> https://github.com/micrometer-metrics/tracing/issues/777
[ERROR] [2024-07-09 08:09:01,148] [client-settings-service,, ,,] [reactor-http-epoll-11] r.n.h.s.HttpServerOperations: [9a3a060a-4034, L:/172.17.0.6:9304 - R:/10.242.106.249:50730] Error starting response. Replying error status-
reactor.blockhound.BlockingOperationError: Blocking call! jdk.internal.misc.Unsafe#park
at java.base/jdk.internal.misc.Unsafe.park(Unknown Source) ~[na:na]
at java.base/java.util.concurrent.locks.LockSupport.park(Unknown Source) ~[na:na]
at java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(Unknown Source) ~[na:na]
at java.base/java.util.concurrent.locks.AbstractQueuedSynchronizer.acquire(Unknown Source) ~[na:na]
at java.base/java.util.concurrent.locks.ReentrantLock$Sync.lock(Unknown Source) ~[na:na]
at java.base/java.util.concurrent.locks.ReentrantLock.lock(Unknown Source) ~[na:na]
at java.base/java.lang.ref.ReferenceQueue.poll(Unknown Source) ~[na:na]
at brave.internal.recorder.PendingSpans.expungeStaleEntries(PendingSpans.java:129) ~[brave-5.16.0.jar!/:na]
at brave.internal.collect.WeakConcurrentMap.getIfPresent(WeakConcurrentMap.java:50) ~[brave-5.16.0.jar!/:na]
at brave.internal.recorder.PendingSpans.get(PendingSpans.java:59) ~[brave-5.16.0.jar!/:na]
at brave.internal.recorder.PendingSpans.getOrCreate(PendingSpans.java:64) ~[brave-5.16.0.jar!/:na]
at brave.Tracer._toSpan(Tracer.java:410) ~[brave-5.16.0.jar!/:na]
at brave.Tracer.nextSpan(Tracer.java:365) ~[brave-5.16.0.jar!/:na]
at io.micrometer.tracing.brave.bridge.BraveSpanBuilder.span(BraveSpanBuilder.java:78) ~[micrometer-tracing-bridge-brave-1.2.5.jar!/:1.2.5]
at io.micrometer.tracing.brave.bridge.BraveSpanBuilder.start(BraveSpanBuilder.java:166) ~[micrometer-tracing-bridge-brave-1.2.5.jar!/:1.2.5]
at io.micrometer.tracing.handler.PropagatingReceiverTracingObservationHandler.onStart(PropagatingReceiverTracingObservationHandler.java:74) ~[micrometer-tracing-1.2.5.jar!/:1.2.5]
at io.micrometer.tracing.handler.PropagatingReceiverTracingObservationHandler.onStart(PropagatingReceiverTracingObservationHandler.java:35) ~[micrometer-tracing-1.2.5.jar!/:1.2.5]
at io.micrometer.observation.ObservationHandler$FirstMatchingCompositeObservationHandler.onStart(ObservationHandler.java:149) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
at io.micrometer.observation.SimpleObservation.notifyOnObservationStarted(SimpleObservation.java:214) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
at io.micrometer.observation.SimpleObservation.start(SimpleObservation.java:161) ~[micrometer-observation-1.12.5.jar!/:1.12.5]
at org.springframework.web.server.adapter.HttpWebHandlerAdapter$ObservationSignalListener.doFirst(HttpWebHandlerAdapter.java:383) ~[spring-web-6.1.6.jar!/:6.1.6]
at reactor.core.publisher.MonoTapRestoringThreadLocals.subscribe(MonoTapRestoringThreadLocals.java:58) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.Mono.subscribe(Mono.java:4568) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.subscribeNext(MonoIgnoreThen.java:265) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.MonoIgnoreThen.subscribe(MonoIgnoreThen.java:51) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.Mono.subscribe(Mono.java:4568) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.subscribeNext(MonoIgnoreThen.java:265) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.MonoIgnoreThen.subscribe(MonoIgnoreThen.java:51) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:76) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.MonoDeferContextual.subscribe(MonoDeferContextual.java:55) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.core.publisher.InternalMonoOperator.subscribe(InternalMonoOperator.java:76) ~[reactor-core-3.6.5.jar!/:3.6.5]
at reactor.netty.http.server.HttpServer$HttpServerHandle.onStateChange(HttpServer.java:1176) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
at reactor.netty.ReactorNetty$CompositeConnectionObserver.onStateChange(ReactorNetty.java:710) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
at reactor.netty.transport.ServerTransport$ChildObserver.onStateChange(ServerTransport.java:481) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
at reactor.netty.http.server.HttpServerOperations.onInboundNext(HttpServerOperations.java:652) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
at reactor.netty.channel.ChannelOperationsHandler.channelRead(ChannelOperationsHandler.java:114) ~[reactor-netty-core-1.1.18.jar!/:1.1.18]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:444) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at reactor.netty.http.server.HttpTrafficHandler.channelRead(HttpTrafficHandler.java:240) ~[reactor-netty-http-1.1.18.jar!/:1.1.18]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.handler.timeout.IdleStateHandler.channelRead(IdleStateHandler.java:289) ~[netty-handler-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.CombinedChannelDuplexHandler$DelegatingChannelHandlerContext.fireChannelRead(CombinedChannelDuplexHandler.java:436) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.handler.codec.ByteToMessageDecoder.fireChannelRead(ByteToMessageDecoder.java:346) ~[netty-codec-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.handler.codec.ByteToMessageDecoder.channelRead(ByteToMessageDecoder.java:318) ~[netty-codec-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.CombinedChannelDuplexHandler.channelRead(CombinedChannelDuplexHandler.java:251) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:442) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.fireChannelRead(AbstractChannelHandlerContext.java:412) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.DefaultChannelPipeline$HeadContext.channelRead(DefaultChannelPipeline.java:1410) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:440) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.AbstractChannelHandlerContext.invokeChannelRead(AbstractChannelHandlerContext.java:420) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.DefaultChannelPipeline.fireChannelRead(DefaultChannelPipeline.java:919) ~[netty-transport-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.epoll.AbstractEpollStreamChannel$EpollStreamUnsafe.epollInReady(AbstractEpollStreamChannel.java:801) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.epoll.EpollEventLoop.processReady(EpollEventLoop.java:501) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:399) ~[netty-transport-classes-epoll-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30) ~[netty-common-4.1.109.Final.jar!/:4.1.109.Final]
at java.base/java.lang.Thread.run(Unknown Source) ~[na:na]
Recently we started moving towards reactive stack based on WebFlux. Unfortunately during the PoC tests we encountered performance degradation compared to the old solution based on MVC. After investigation we found that micrometer-tracing library with brave bridge is called by reactive stack in blocking way and because of that we see much more CPU usage + higher overall latencies. Some investigation results can be found there already -> https://github.com/micrometer-metrics/tracing/issues/777
We are using:
Please find stack trace from https://github.com/reactor/BlockHound used in our service: