gradle / test-retry-gradle-plugin

Gradle plugin to retry tests that have failed to mitigate test flakiness.
Apache License 2.0
222 stars 50 forks source link

test-retry was unable to retry the following test methods #166

Closed mauriciovilela closed 5 months ago

mauriciovilela commented 1 year ago

Java 17 Spring boot 2.7.0 Gradle 7.4.2 org.gradle.test-retry 1.5.0 com.adarshr.test-logger 3.2.0

Caused by: java.lang.IllegalStateException: org.gradle.test-retry was unable to retry the following test methods, which is unexpected. Please file a bug report at https://github.com/gradle/test-retry-gradle-plugin/issues

at org.gradle.testretry.internal.executer.RetryTestExecuter.failWithNonRetriedTestsIfAny(RetryTestExecuter.java:144)
        at org.gradle.testretry.internal.config.TestTaskConfigurer$FinalizeTaskAction.execute(TestTaskConfigurer.java:170)
        at org.gradle.testretry.internal.config.TestTaskConfigurer$FinalizeTaskAction.execute(TestTaskConfigurer.java:164)
        at org.gradle.testretry.internal.config.TestTaskConfigurer$ConditionalTaskAction.execute(TestTaskConfigurer.java:159)
        at org.gradle.testretry.internal.config.TestTaskConfigurer$ConditionalTaskAction.execute(TestTaskConfigurer.java:144)
        at org.gradle.api.internal.AbstractTask$TaskActionWrapper.execute(AbstractTask.java:802)
        at org.gradle.api.internal.AbstractTask$TaskActionWrapper.execute(AbstractTask.java:775)
        at org.gradle.api.internal.tasks.execution.TaskExecution$3.run(TaskExecution.java:242)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$1.execute(DefaultBuildOperationRunner.java:29)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$1.execute(DefaultBuildOperationRunner.java:26)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:66)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:157)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.run(DefaultBuildOperationRunner.java:47)
        at org.gradle.internal.operations.DefaultBuildOperationExecutor.run(DefaultBuildOperationExecutor.java:68)
        at org.gradle.api.internal.tasks.execution.TaskExecution.executeAction(TaskExecution.java:227)
        at org.gradle.api.internal.tasks.execution.TaskExecution.executeActions(TaskExecution.java:210)
        at org.gradle.api.internal.tasks.execution.TaskExecution.executeWithPreviousOutputFiles(TaskExecution.java:193)
        at org.gradle.api.internal.tasks.execution.TaskExecution.execute(TaskExecution.java:171)
        at org.gradle.internal.execution.steps.ExecuteStep.executeInternal(ExecuteStep.java:89)
        at org.gradle.internal.execution.steps.ExecuteStep.access$000(ExecuteStep.java:40)
        at org.gradle.internal.execution.steps.ExecuteStep$1.call(ExecuteStep.java:53)
        at org.gradle.internal.execution.steps.ExecuteStep$1.call(ExecuteStep.java:50)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$CallableBuildOperationWorker.execute(DefaultBuildOperationRunner.java:204)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$CallableBuildOperationWorker.execute(DefaultBuildOperationRunner.java:199)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:66)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:157)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.call(DefaultBuildOperationRunner.java:53)
        at org.gradle.internal.operations.DefaultBuildOperationExecutor.call(DefaultBuildOperationExecutor.java:73)
        at org.gradle.internal.execution.steps.ExecuteStep.execute(ExecuteStep.java:50)
        at org.gradle.internal.execution.steps.ExecuteStep.execute(ExecuteStep.java:40)
        at org.gradle.internal.execution.steps.RemovePreviousOutputsStep.execute(RemovePreviousOutputsStep.java:68)
        at org.gradle.internal.execution.steps.RemovePreviousOutputsStep.execute(RemovePreviousOutputsStep.java:38)
        at org.gradle.internal.execution.steps.ResolveInputChangesStep.execute(ResolveInputChangesStep.java:48)
        at org.gradle.internal.execution.steps.ResolveInputChangesStep.execute(ResolveInputChangesStep.java:36)
        at org.gradle.internal.execution.steps.CancelExecutionStep.execute(CancelExecutionStep.java:41)
        at org.gradle.internal.execution.steps.TimeoutStep.executeWithoutTimeout(TimeoutStep.java:74)
        at org.gradle.internal.execution.steps.TimeoutStep.execute(TimeoutStep.java:55)
        at org.gradle.internal.execution.steps.CreateOutputsStep.execute(CreateOutputsStep.java:51)
        at org.gradle.internal.execution.steps.CreateOutputsStep.execute(CreateOutputsStep.java:29)
        at org.gradle.internal.execution.steps.CaptureStateAfterExecutionStep.execute(CaptureStateAfterExecutionStep.java:61)
        at org.gradle.internal.execution.steps.CaptureStateAfterExecutionStep.execute(CaptureStateAfterExecutionStep.java:42)
        at org.gradle.internal.execution.steps.BroadcastChangingOutputsStep.execute(BroadcastChangingOutputsStep.java:60)
        at org.gradle.internal.execution.steps.BroadcastChangingOutputsStep.execute(BroadcastChangingOutputsStep.java:27)
        at org.gradle.internal.execution.steps.BuildCacheStep.executeWithoutCache(BuildCacheStep.java:180)
        at org.gradle.internal.execution.steps.BuildCacheStep.lambda$execute$1(BuildCacheStep.java:75)
        at org.gradle.internal.Either$Right.fold(Either.java:175)
        at org.gradle.internal.execution.caching.CachingState.fold(CachingState.java:59)
        at org.gradle.internal.execution.steps.BuildCacheStep.execute(BuildCacheStep.java:73)
        at org.gradle.internal.execution.steps.BuildCacheStep.execute(BuildCacheStep.java:48)
        at org.gradle.internal.execution.steps.StoreExecutionStateStep.execute(StoreExecutionStateStep.java:36)
        at org.gradle.internal.execution.steps.StoreExecutionStateStep.execute(StoreExecutionStateStep.java:25)
        at org.gradle.internal.execution.steps.RecordOutputsStep.execute(RecordOutputsStep.java:36)
        at org.gradle.internal.execution.steps.RecordOutputsStep.execute(RecordOutputsStep.java:22)
        at org.gradle.internal.execution.steps.SkipUpToDateStep.executeBecause(SkipUpToDateStep.java:110)
        at org.gradle.internal.execution.steps.SkipUpToDateStep.lambda$execute$2(SkipUpToDateStep.java:56)
        at org.gradle.internal.execution.steps.SkipUpToDateStep.execute(SkipUpToDateStep.java:56)
        at org.gradle.internal.execution.steps.SkipUpToDateStep.execute(SkipUpToDateStep.java:38)
        at org.gradle.internal.execution.steps.ResolveChangesStep.execute(ResolveChangesStep.java:73)
        at org.gradle.internal.execution.steps.ResolveChangesStep.execute(ResolveChangesStep.java:44)
        at org.gradle.internal.execution.steps.legacy.MarkSnapshottingInputsFinishedStep.execute(MarkSnapshottingInputsFinishedStep.java:37)
        at org.gradle.internal.execution.steps.legacy.MarkSnapshottingInputsFinishedStep.execute(MarkSnapshottingInputsFinishedStep.java:27)
        at org.gradle.internal.execution.steps.ResolveCachingStateStep.execute(ResolveCachingStateStep.java:89)
        at org.gradle.internal.execution.steps.ResolveCachingStateStep.execute(ResolveCachingStateStep.java:50)
        at org.gradle.internal.execution.steps.ValidateStep.execute(ValidateStep.java:114)
        at org.gradle.internal.execution.steps.ValidateStep.execute(ValidateStep.java:57)
        at org.gradle.internal.execution.steps.CaptureStateBeforeExecutionStep.execute(CaptureStateBeforeExecutionStep.java:76)
        at org.gradle.internal.execution.steps.CaptureStateBeforeExecutionStep.execute(CaptureStateBeforeExecutionStep.java:50)
        at org.gradle.internal.execution.steps.SkipEmptyWorkStep.executeWithNoEmptySources(SkipEmptyWorkStep.java:249)
        at org.gradle.internal.execution.steps.SkipEmptyWorkStep.executeWithNoEmptySources(SkipEmptyWorkStep.java:204)
        at org.gradle.internal.execution.steps.SkipEmptyWorkStep.execute(SkipEmptyWorkStep.java:83)
        at org.gradle.internal.execution.steps.SkipEmptyWorkStep.execute(SkipEmptyWorkStep.java:54)
        at org.gradle.internal.execution.steps.RemoveUntrackedExecutionStateStep.execute(RemoveUntrackedExecutionStateStep.java:32)
        at org.gradle.internal.execution.steps.RemoveUntrackedExecutionStateStep.execute(RemoveUntrackedExecutionStateStep.java:21)
        at org.gradle.internal.execution.steps.legacy.MarkSnapshottingInputsStartedStep.execute(MarkSnapshottingInputsStartedStep.java:38)
        at org.gradle.internal.execution.steps.LoadPreviousExecutionStateStep.execute(LoadPreviousExecutionStateStep.java:43)
        at org.gradle.internal.execution.steps.LoadPreviousExecutionStateStep.execute(LoadPreviousExecutionStateStep.java:31)
        at org.gradle.internal.execution.steps.AssignWorkspaceStep.lambda$execute$0(AssignWorkspaceStep.java:40)
        at org.gradle.api.internal.tasks.execution.TaskExecution$4.withWorkspace(TaskExecution.java:287)
        at org.gradle.internal.execution.steps.AssignWorkspaceStep.execute(AssignWorkspaceStep.java:40)
        at org.gradle.internal.execution.steps.AssignWorkspaceStep.execute(AssignWorkspaceStep.java:30)
        at org.gradle.internal.execution.steps.IdentityCacheStep.execute(IdentityCacheStep.java:37)
        at org.gradle.internal.execution.steps.IdentityCacheStep.execute(IdentityCacheStep.java:27)
        at org.gradle.internal.execution.steps.IdentifyStep.execute(IdentifyStep.java:44)
        at org.gradle.internal.execution.steps.IdentifyStep.execute(IdentifyStep.java:33)
        at org.gradle.internal.execution.impl.DefaultExecutionEngine$1.execute(DefaultExecutionEngine.java:76)
        at org.gradle.api.internal.tasks.execution.ExecuteActionsTaskExecuter.executeIfValid(ExecuteActionsTaskExecuter.java:144)
        at org.gradle.api.internal.tasks.execution.ExecuteActionsTaskExecuter.execute(ExecuteActionsTaskExecuter.java:133)
        at org.gradle.api.internal.tasks.execution.CleanupStaleOutputsExecuter.execute(CleanupStaleOutputsExecuter.java:77)
        at org.gradle.api.internal.tasks.execution.FinalizePropertiesTaskExecuter.execute(FinalizePropertiesTaskExecuter.java:46)
        at org.gradle.api.internal.tasks.execution.ResolveTaskExecutionModeExecuter.execute(ResolveTaskExecutionModeExecuter.java:51)
        at org.gradle.api.internal.tasks.execution.SkipTaskWithNoActionsExecuter.execute(SkipTaskWithNoActionsExecuter.java:57)
        at org.gradle.api.internal.tasks.execution.SkipOnlyIfTaskExecuter.execute(SkipOnlyIfTaskExecuter.java:56)
        at org.gradle.api.internal.tasks.execution.CatchExceptionTaskExecuter.execute(CatchExceptionTaskExecuter.java:36)
        at org.gradle.api.internal.tasks.execution.EventFiringTaskExecuter$1.executeTask(EventFiringTaskExecuter.java:77)
        at org.gradle.api.internal.tasks.execution.EventFiringTaskExecuter$1.call(EventFiringTaskExecuter.java:55)
        at org.gradle.api.internal.tasks.execution.EventFiringTaskExecuter$1.call(EventFiringTaskExecuter.java:52)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$CallableBuildOperationWorker.execute(DefaultBuildOperationRunner.java:204)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$CallableBuildOperationWorker.execute(DefaultBuildOperationRunner.java:199)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:66)
        at org.gradle.internal.operations.DefaultBuildOperationRunner$2.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:157)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.execute(DefaultBuildOperationRunner.java:59)
        at org.gradle.internal.operations.DefaultBuildOperationRunner.call(DefaultBuildOperationRunner.java:53)
        at org.gradle.internal.operations.DefaultBuildOperationExecutor.call(DefaultBuildOperationExecutor.java:73)
        at org.gradle.api.internal.tasks.execution.EventFiringTaskExecuter.execute(EventFiringTaskExecuter.java:52)
        at org.gradle.execution.plan.LocalTaskNodeExecutor.execute(LocalTaskNodeExecutor.java:74)
        at org.gradle.execution.taskgraph.DefaultTaskExecutionGraph$InvokeNodeExecutorsAction.execute(DefaultTaskExecutionGraph.java:333)
        at org.gradle.execution.taskgraph.DefaultTaskExecutionGraph$InvokeNodeExecutorsAction.execute(DefaultTaskExecutionGraph.java:320)
        at org.gradle.execution.taskgraph.DefaultTaskExecutionGraph$BuildOperationAwareExecutionAction.execute(DefaultTaskExecutionGraph.java:313)
        at org.gradle.execution.taskgraph.DefaultTaskExecutionGraph$BuildOperationAwareExecutionAction.execute(DefaultTaskExecutionGraph.java:299)
        at org.gradle.execution.plan.DefaultPlanExecutor$ExecutorWorker.lambda$run$0(DefaultPlanExecutor.java:143)
        at org.gradle.execution.plan.DefaultPlanExecutor$ExecutorWorker.execute(DefaultPlanExecutor.java:227)
        at org.gradle.execution.plan.DefaultPlanExecutor$ExecutorWorker.executeNextNode(DefaultPlanExecutor.java:218)
        at org.gradle.execution.plan.DefaultPlanExecutor$ExecutorWorker.run(DefaultPlanExecutor.java:140)
        at org.gradle.internal.concurrent.ExecutorPolicy$CatchAndRecordFailures.onExecute(ExecutorPolicy.java:64)
        at org.gradle.internal.concurrent.ManagedExecutorImpl$1.run(ManagedExecutorImpl.java:48)
marcphilipp commented 1 year ago

@mauriciovilela Could you please provide information about the test class in question (testing framework, structure, etc.)?

FelixGV commented 1 year ago

I just had the same issue happen on an open source project. Two GH Actions jobs of the same run failed this way:

https://github.com/linkedin/venice/actions/runs/4607456900/jobs/8142007549#step:6:489 https://github.com/linkedin/venice/actions/runs/4607456900/jobs/8142006729#step:6:546

This project uses:

Java 11.0.15 Gradle 6.9.3 TestNG 6.14.3 test-retry 1.5.0

Any clues what might cause this?

marcphilipp commented 1 year ago

First execution

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[0](false, false) STARTED
Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[0](false, false) PASSED (25.157 s)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[2](true, false) STARTED

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[2](true, false) FAILED
    org.testng.internal.thread.ThreadTimeoutException: Method com.linkedin.venice.endToEnd.TestBatch.testZstdCompressingAvroRecordWhenFallbackAvailable() didn't finish within the time-out 120000
        at java.base@11.0.15/java.util.concurrent.ConcurrentHashMap.get(ConcurrentHashMap.java:946)
        at java.base@11.0.15/java.util.ResourceBundle.getBundleImpl(ResourceBundle.java:1629)
        at java.base@11.0.15/java.util.ResourceBundle.getBundleImpl(ResourceBundle.java:1593)
        at java.base@11.0.15/java.util.ResourceBundle.getBundle(ResourceBundle.java:1284)
        at app//org.apache.hadoop.mapreduce.util.ResourceBundles.getBundle(ResourceBundles.java:37)
        at app//org.apache.hadoop.mapreduce.util.ResourceBundles.getValue(ResourceBundles.java:56)
        at app//org.apache.hadoop.mapreduce.util.ResourceBundles.getCounterName(ResourceBundles.java:89)
        at app//org.apache.hadoop.mapreduce.counters.FileSystemCounterGroup$FSCounter.localizeCounterName(FileSystemCounterGroup.java:98)
        at app//org.apache.hadoop.mapreduce.counters.FileSystemCounterGroup$FSCounter.getDisplayName(FileSystemCounterGroup.java:94)
        at app//org.apache.hadoop.mapred.Counters$Counter.getDisplayName(Counters.java:170)
        at app//org.apache.hadoop.mapred.Counters.incrAllCounters(Counters.java:568)
        at app//org.apache.hadoop.mapred.Counters.sum(Counters.java:591)
        at app//org.apache.hadoop.mapred.LocalJobRunner$Job.getCurrentCounters(LocalJobRunner.java:640)
        at app//org.apache.hadoop.mapred.LocalJobRunner.getJobCounters(LocalJobRunner.java:773)
        at app//org.apache.hadoop.mapreduce.Job$7.run(Job.java:756)
        at app//org.apache.hadoop.mapreduce.Job$7.run(Job.java:753)
        at java.base@11.0.15/java.security.AccessController.doPrivileged(Native Method)
        at java.base@11.0.15/javax.security.auth.Subject.doAs(Subject.java:423)
        at app//org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1548)
        at app//org.apache.hadoop.mapreduce.Job.getCounters(Job.java:753)
        at app//org.apache.hadoop.mapred.JobClient$NetworkedJob.getCounters(JobClient.java:374)
        at app//com.linkedin.venice.hadoop.VenicePushJob.validateCountersAfterPush(VenicePushJob.java:1226)
        at app//com.linkedin.venice.hadoop.VenicePushJob.runJobAndUpdateStatus(VenicePushJob.java:1217)
        at app//com.linkedin.venice.hadoop.VenicePushJob.run(VenicePushJob.java:1089)
        at app//com.linkedin.venice.utils.TestWriteUtils.runPushJob(TestWriteUtils.java:963)
        at app//com.linkedin.venice.utils.TestWriteUtils.runPushJob(TestWriteUtils.java:957)
        at app//com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:854)
        at app//com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:811)
        at app//com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:801)
        at app//com.linkedin.venice.endToEnd.TestBatch.testZstdCompressingAvroRecordWhenFallbackAvailable(TestBatch.java:379)
        at java.base@11.0.15/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base@11.0.15/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at java.base@11.0.15/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base@11.0.15/java.lang.reflect.Method.invoke(Method.java:566)
        at app//org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:124)
        at app//org.testng.internal.InvokeMethodRunnable.runOne(InvokeMethodRunnable.java:54)
        at app//org.testng.internal.InvokeMethodRunnable.run(InvokeMethodRunnable.java:44)
        at java.base@11.0.15/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
        at java.base@11.0.15/java.util.concurrent.FutureTask.run(FutureTask.java:264)
        at java.base@11.0.15/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at java.base@11.0.15/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base@11.0.15/java.lang.Thread.run(Thread.java:829)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[3](false, true) STARTED

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[3](false, true) FAILED
    com.linkedin.venice.exceptions.VeniceException: Failed to connect to: d2://ChildController to query job status, after 15 attempts. Error: An error occurred during controller request. controller = http://fv-az448-743:38181, route = /job, params = [cluster_name=venice-cluster_294ff98e2b_9d3580e7, hostname=localhost, store_name=store_8ade22d65b_2ebcb4ea, version=1], timeout = 300000,Failed to get response for url: d2://ChildController/leader_controller?cluster_name=venice-cluster_294ff98e2b_9d3580e7&hostname=localhost with D2 Client
        at com.linkedin.venice.hadoop.VenicePushJob.pollStatusUntilComplete(VenicePushJob.java:2558)
        at com.linkedin.venice.hadoop.VenicePushJob.run(VenicePushJob.java:1112)
        at com.linkedin.venice.utils.TestWriteUtils.runPushJob(TestWriteUtils.java:963)
        at com.linkedin.venice.utils.TestWriteUtils.runPushJob(TestWriteUtils.java:957)
        at com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:854)
        at com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:811)
        at com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:768)
        at com.linkedin.venice.endToEnd.TestBatch.testBatchStore(TestBatch.java:760)
        at com.linkedin.venice.endToEnd.TestBatch.testZstdCompressingAvroRecordWhenFallbackAvailable(TestBatch.java:343)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
        at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
        at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
        at java.base/java.lang.reflect.Method.invoke(Method.java:566)
        at org.testng.internal.MethodInvocationHelper.invokeMethod(MethodInvocationHelper.java:124)
        at org.testng.internal.InvokeMethodRunnable.runOne(InvokeMethodRunnable.java:54)
        at org.testng.internal.InvokeMethodRunnable.run(InvokeMethodRunnable.java:44)
        at java.base/java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:515)
        at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
        at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128)
        at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628)
        at java.base/java.lang.Thread.run(Thread.java:829)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[6](true, true) STARTED
Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[6](true, true) PASSED (28.75 s)

Second execution

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[0](false, false) PASSED (31.45 s)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[2](true, false) STARTED
Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[2](true, false) PASSED (23.566 s)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[4](false, true) STARTED
Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[4](false, true) PASSED (25.424 s)

Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[6](true, true) STARTED
Gradle suite > Gradle test > com.linkedin.venice.endToEnd.TestBatchForRocksDB > testZstdCompressingAvroRecordWhenFallbackAvailable[6](true, true) PASSED (23.779 s)
marcphilipp commented 1 year ago

@FelixGV Do you know how TestNG generates the number in the square brackets? It seems it uses 0, 2, 3, 6 in the first execution but 0, 2, 4, 6 in the second execution. That leads to testZstdCompressingAvroRecordWhenFallbackAvailable[3] which failed in the first execution not being present in the second run and therefore this exception.

ZacAttack commented 1 year ago

I'm having the same issue but for a different test case in the project @FelixGV has already mentioned.

https://github.com/linkedin/venice/actions/runs/4660198917/jobs/8247955553?pr=331

Not sure about the square bracket thing. My initial read on it was that it has something to do with the permutation number for the test generator?

FelixGV commented 1 year ago

Still hitting this and haven't puzzled it out yet. I wonder if the methodology here could be used to sidestep the issue (though I still don't know why it happens in the first place).

FelixGV commented 11 months ago

I've resumed trying to better understand how TestNG sets the number in the bracket... it is still quite mysterious but so far I have discovered that if the test has only a DataProvider set, the number in the square bracket increments monotonically from 0, whereas if it has both a DataProvider and a timeout specified, then the number is always even (0, 2, 4...).

I have tried to make tests fail in various ways but I cannot reproduce the situation where the an otherwise even sequence starts having an odd number in it as well, as you pointed out...

Moreover, upon closer inspection, I also see this error coming up in cases where there was a retry with exactly the precise name of the initial attempt. For example, this CI task failed with:

> org.gradle.test-retry was unable to retry the following test methods, which is unexpected. Please file a bug report at https://github.com/gradle/test-retry-gradle-plugin/issues
     com.linkedin.venice.fastclient.AvroStoreClientZstdEndToEndTest#testFastClientGet[2](false, false, true, true, true, false, 2, SERVER_BASED_METADATA)

And it did get retried with the exact same name, but only once though (we have the build set to 3 retries)...

Anyway, I'm not expecting anyone to pull a rabbit out of a hat here but I thought I would document my findings in case my current effort fizzles out and someone else might be able to use that progress and carry it further.

marcphilipp commented 11 months ago

Not a solution, but as a workaround you can use filtering or class-level retries for such cases.

pshevche commented 5 months ago

@FelixGV , I looked more into the issue. I know where the even iteration numbers are coming from, and I managed to reproduce the issue (although in a very sick way).

Inconsistent iteration numbers seem to be caused by a logical error on the TestNG side:

  1. When timeout is set, TestInvoker calls invokeWithTimeout, which ultimately calls InvokeMethodRunnable, which increments the iteration number here.
  2. Then, the method returns to TestInvoker which will increment the iteration number again here.

Only #2 is called if you don't have a timeout configured. This double increment is also where the error may be coming from.

There is a lot happening between the increments; for example, IInvokedMethodListener listeners are invoked (ref). If those fail, TestNG might not catch the exception and won't perform the second increment. If this failure is flaky, then the double-increment will be done only once, leading to inconsistent names reported to the retry plugin.

I checked your classes, and I don't see how this can happen in your setup. But maybe TestNG experienced an internal error which can also result in this issue.

I am afraid there is nothing we can do on the plugin side, as we simply don't have enough data to detect this case. TestNG needs to be more robust when handling the case. I leave it up to you to report this issue on the TestNG side.

pshevche commented 5 months ago

For future reference, this is how I reproduce the failure:

Test class:

import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;

import java.nio.file.Files;
import java.nio.file.Paths;

public class Issue166Test {

    @Test(timeOut = 1000L, dataProvider = "irrelevantProvider")
    public void failingTestWithDataProvider(boolean irrelevant) {
        if (!Files.exists(Paths.get("pshevche_listener_failed"))) {
            System.setProperty("pshevche_fail_listener", "true");
        }
        throw new RuntimeException("Boom!");
    }

    @DataProvider
    public static Object[][] irrelevantProvider() {
        return new Object[][]{
            {false},
            {true},
            {false},
            {true}
        };
    }
}

Invocation listener

import org.testng.IInvokedMethod;
import org.testng.IInvokedMethodListener;
import org.testng.ITestResult;

import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Paths;

public class PshevcheInvocationListener implements IInvokedMethodListener {

    @Override
    public void beforeInvocation(IInvokedMethod method, ITestResult testResult) {
        if (Boolean.getBoolean("pshevche_fail_listener")) {
            System.clearProperty("pshevche_fail_listener");
            try {
                Files.createFile(Paths.get("pshevche_listener_failed"));
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
            throw new RuntimeException("Superboom!");
        }
    }
}

These executions will be reported by TestNG:

Screenshot 2024-04-18 at 13 15 17

Flow:

  1. counter = 0
  2. Iteration 0 => counter++ => counter++ => trigger listener failure.
  3. Iteration 2 => counter++ => listener fails.
  4. Iteration 3 => counter++ => counter++.
  5. Iteration 5 => counter++ => counter++.

On the next run, the counter will always be incremented twice.