Closed oemergenc closed 10 months ago
Every now and then we are also getting this:
java.util.ConcurrentModificationException: null
at java.base/java.util.HashMap.computeIfAbsent(Unknown Source)
at io.micrometer.observation.Observation$Context.computeIfAbsent(Observation.java:1036)
at io.micrometer.tracing.handler.TracingObservationHandler.getTracingContext(TracingObservationHandler.java:179)
at io.micrometer.tracing.handler.DefaultTracingObservationHandler.onStart(DefaultTracingObservationHandler.java:45)
at io.micrometer.observation.ObservationHandler$FirstMatchingCompositeObservationHandler.onStart(ObservationHandler.java:149)
at io.micrometer.observation.SimpleObservation.notifyOnObservationStarted(SimpleObservation.java:222)
at io.micrometer.observation.SimpleObservation.start(SimpleObservation.java:169)
at org.springframework.security.web.server.ObservationWebFilterChainDecorator$AroundWebFilterObservation$SimpleAroundWebFilterObservation$ObservationReference.start(ObservationWebFilterChainDecorator.java:402)
at org.springframework.security.web.server.ObservationWebFilterChainDecorator$AroundWebFilterObservation$SimpleAroundWebFilterObservation.start(ObservationWebFilterChainDecorator.java:279)
at reactor.core.publisher.FluxPeekFuseable$PeekFuseableConditionalSubscriber.cancel(FluxPeekFuseable.java:445)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoFlatMap$FlatMapMain.cancel(MonoFlatMap.java:207)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.cancel(MonoIgnoreThen.java:143)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoFlatMap$FlatMapMain.cancel(MonoFlatMap.java:199)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.FluxPeekFuseable$PeekFuseableConditionalSubscriber.cancel(FluxPeekFuseable.java:452)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoFlatMap$FlatMapMain.cancel(MonoFlatMap.java:207)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.FluxDoOnEach$DoOnEachSubscriber.cancel(FluxDoOnEach.java:113)
at reactor.core.publisher.FluxPeek$PeekSubscriber.cancel(FluxPeek.java:153)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.cancel(MonoIgnoreThen.java:143)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.Operators.terminate(Operators.java:1277)
at reactor.netty.channel.ChannelOperations.terminate(ChannelOperations.java:481)
at io.netty.util.concurrent.AbstractEventExecutor.runTask(AbstractEventExecutor.java:174)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:167)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:470)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:413)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.base/java.lang.Thread.run(Unknown Source)
@chemicL any ideas what's going on?
So the first one looks like it happens during connection termination for some reason and the reactive chains get cancel
ed, which goes through the stack and somehow the cleanup works this way that it stops an Observation
, which was not yet started. The second one is less obvious to me, as it means that during cancel
ation of the chain (also when connection is terminated) a new Observation
is started and that leads to concurrent modification access on a map that is backing the Observation
's Context
.
I think the first is a logical mistake at some point above the raw context propagation theme. In the Spring Security's ObservationWebFilterChainDecorator
, it instructs the Observation
to be stopped upon receiving a cancel
signal. But, perhaps it's a valid assumption and the Observation
should be ok when that happens and just eliminate the attempt to stop.
The second one looks suspicious to me, but I can't speak for Spring Security on the reasons for it. I'd say this is most probably a mistake that the cancel signal triggers Observation::start
.
@oemergenc can you file an issue in Spring Security and reference this one there?
To summarise, my take would be:
WebFilterChain
I created this issue in spring security: https://github.com/spring-projects/spring-security/issues/13827
Anyone an idea what we can do to safely avoid the error in the meantime? This triggers our alerting system.
Theoretically you could create your own handlers that would set in context information about whether the observation got started / stopped and depending on that value delegate or not to default handlers. Does it make any sense?
Hi @marcingrzejszczak ,
could you guide me a little bit more here? Maybe there are some examples for custom handlers anywhere? That would help us a lot. Thanks
same place, ConcurrentModificationException
I think we can safely assume that this https://github.com/micrometer-metrics/micrometer/issues/3874 is the culprit. I'm about to close the root issue so I'll close this one too. If the problem repeats itself we can reopen this one
@marcingrzejszczak the problem is still there. We updated to implementation("io.micrometer:micrometer-observation:1.11.5")
, but still get errors like this one.
java.lang.IllegalStateException: Span wasn't started - an observation must be started (not only created)
at io.micrometer.tracing.handler.TracingObservationHandler.getRequiredSpan(TracingObservationHandler.java:202)
at io.micrometer.tracing.handler.DefaultTracingObservationHandler.onStop(DefaultTracingObservationHandler.java:50)
at io.micrometer.observation.ObservationHandler$FirstMatchingCompositeObservationHandler.onStop(ObservationHandler.java:197)
at io.micrometer.observation.SimpleObservation.lambda$notifyOnObservationStopped$0(SimpleObservation.java:276)
at java.base/java.util.ArrayDeque$DescendingIterator.forEachRemaining(Unknown Source)
at io.micrometer.observation.SimpleObservation.notifyOnObservationStopped(SimpleObservation.java:276)
at io.micrometer.observation.SimpleObservation.stop(SimpleObservation.java:196)
at org.springframework.security.web.server.ObservationWebFilterChainDecorator$AroundWebFilterObservation$SimpleAroundWebFilterObservation$ObservationReference.stop(ObservationWebFilterChainDecorator.java:426)
at org.springframework.security.web.server.ObservationWebFilterChainDecorator$AroundWebFilterObservation$SimpleAroundWebFilterObservation.stop(ObservationWebFilterChainDecorator.java:293)
at reactor.core.publisher.FluxPeekFuseable$PeekFuseableConditionalSubscriber.cancel(FluxPeekFuseable.java:445)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.MonoFlatMap$FlatMapMain.cancel(MonoFlatMap.java:207)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.FluxDoOnEach$DoOnEachSubscriber.cancel(FluxDoOnEach.java:113)
at reactor.core.publisher.FluxPeek$PeekSubscriber.cancel(FluxPeek.java:153)
at reactor.core.publisher.MonoContextWriteRestoringThreadLocals$ContextWriteRestoringThreadLocalsSubscriber.cancel(MonoContextWriteRestoringThreadLocals.java:164)
at reactor.core.publisher.FluxOnAssembly$OnAssemblySubscriber.cancel(FluxOnAssembly.java:654)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drainLoop(Operators.java:2399)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.drain(Operators.java:2367)
at reactor.core.publisher.Operators$MultiSubscriptionSubscriber.cancel(Operators.java:2179)
at reactor.core.publisher.MonoIgnoreThen$ThenIgnoreMain.cancel(MonoIgnoreThen.java:143)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.MonoPeekTerminal$MonoTerminalPeekSubscriber.cancel(MonoPeekTerminal.java:144)
at reactor.core.publisher.Operators.terminate(Operators.java:1277)
at reactor.netty.channel.ChannelOperations.terminate(ChannelOperations.java:481)
at io.netty.util.concurrent.AbstractEventExecutor.runTask(AbstractEventExecutor.java:174)
at io.netty.util.concurrent.AbstractEventExecutor.safeExecute(AbstractEventExecutor.java:167)
at io.netty.util.concurrent.SingleThreadEventExecutor.runAllTasks(SingleThreadEventExecutor.java:470)
at io.netty.channel.epoll.EpollEventLoop.run(EpollEventLoop.java:413)
at io.netty.util.concurrent.SingleThreadEventExecutor$4.run(SingleThreadEventExecutor.java:997)
at io.netty.util.internal.ThreadExecutorMap$2.run(ThreadExecutorMap.java:74)
at io.netty.util.concurrent.FastThreadLocalRunnable.run(FastThreadLocalRunnable.java:30)
at java.base/java.lang.Thread.run(Unknown Source)
Can we reopen this one?
Do you happen to have a minimal java sample that can reproduce the issue?
Hey @oemergenc can you provide a reproducer please?
If you would like us to look at this issue, please provide the requested information. If the information is not provided within the next 7 days this issue will be closed.
Closing due to lack of requested feedback. If you would like us to look at this issue, please provide the requested information and we will re-open.
Hi,
I have a spring boot webflux application which uses this functionality in an autoconfiguration to enable tracing information in access logs:
Unfortunately we are running into problems and our service every few hours logs this error message and we are clueless why?
Error:
Anyone an idea what we can do? We are using Spring boot 3.1.1 with
micrometer-tracing-1.1.4