Open wikiselev opened 1 year ago
Looking in the logs I can see this. provided the error should be better handled on tower side, I'm not understanding what do you mean with: "Pipeline has failed on the node"
tower.log:Mar-09 11:47:41.999 [io-executor-thread-5013] ERROR i.s.t.c.GlobalErrorController - Oops... Unable to process request - Error ID: 7KbRlVktv3YFwhsR7oAXzk
tower.log-com.microsoft.azure.batch.protocol.models.BatchErrorException: Status code 409, {
tower.log- "odata.metadata":"https://alethiomics.uksouth.batch.azure.com/$metadata#Microsoft.Azure.Batch.Protocol.Entities.Container.errors/@Element","code":"TaskCompleted","message":{
tower.log- "lang":"en-US","value":"The specified task is already in a completed state.\nRequestId:c4f5fc32-356d-47c2-a2c2-789081828db5\nTime:2023-03-09T11:47:41.9926741Z"
tower.log- }
tower.log-}
tower.log- at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
tower.log- at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
tower.log- at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
tower.log- at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
tower.log- at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
tower.log- at com.microsoft.rest.ServiceResponseBuilder.build(ServiceResponseBuilder.java:122)
tower.log- at com.microsoft.rest.ServiceResponseBuilder.buildWithHeaders(ServiceResponseBuilder.java:151)
tower.log- at com.microsoft.azure.AzureResponseBuilder.buildWithHeaders(AzureResponseBuilder.java:82)
tower.log- at com.microsoft.azure.batch.protocol.implementation.TasksImpl.terminateDelegate(TasksImpl.java:2071)
tower.log- at com.microsoft.azure.batch.protocol.implementation.TasksImpl.access$700(TasksImpl.java:75)
tower.log- at com.microsoft.azure.batch.protocol.implementation.TasksImpl$45.call(TasksImpl.java:2058)
tower.log- at com.microsoft.azure.batch.protocol.implementation.TasksImpl$45.call(TasksImpl.java:2054)
tower.log- at rx.internal.operators.OnSubscribeMap$MapSubscriber.onNext(OnSubscribeMap.java:69)
tower.log- at retrofit2.adapter.rxjava.CallArbiter.deliverResponse(CallArbiter.java:120)
tower.log- at retrofit2.adapter.rxjava.CallArbiter.emitResponse(CallArbiter.java:102)
tower.log- at retrofit2.adapter.rxjava.CallExecuteOnSubscribe.call(CallExecuteOnSubscribe.java:46)
tower.log- at retrofit2.adapter.rxjava.CallExecuteOnSubscribe.call(CallExecuteOnSubscribe.java:24)
tower.log- at rx.Observable.unsafeSubscribe(Observable.java:10327)
tower.log- at rx.internal.operators.OnSubscribeMap.call(OnSubscribeMap.java:48)
tower.log- at rx.internal.operators.OnSubscribeMap.call(OnSubscribeMap.java:33)
tower.log- at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:48)
tower.log- at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:30)
tower.log- at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:48)
tower.log- at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:30)
tower.log- at rx.Observable.subscribe(Observable.java:10423)
tower.log- at rx.Observable.subscribe(Observable.java:10390)
tower.log- at rx.observables.BlockingObservable.blockForSingle(BlockingObservable.java:443)
tower.log- at rx.observables.BlockingObservable.single(BlockingObservable.java:340)
tower.log- at com.microsoft.azure.batch.protocol.implementation.TasksImpl.terminate(TasksImpl.java:1947)
tower.log- at com.microsoft.azure.batch.TaskOperations.terminateTask(TaskOperations.java:757)
tower.log- at com.microsoft.azure.batch.TaskOperations.terminateTask(TaskOperations.java:730)
tower.log- at io.seqera.tower.service.platform.azure.AzBatchPlatformProvider.terminate(AzBatchPlatformProvider.groovy:266)
tower.log- at io.seqera.tower.service.job.JobServiceImpl.cancelJob(JobServiceImpl.groovy:254)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Intercepted.$$access$$cancelJob(Unknown Source)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Exec.dispatch(Unknown Source)
tower.log- at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Intercepted.cancelJob(Unknown Source)
tower.log- at io.seqera.tower.service.job.JobServiceImpl.cancelJobByWorkflowId(JobServiceImpl.groovy:268)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Intercepted.$$access$$cancelJobByWorkflowId(Unknown Source)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Exec.dispatch(Unknown Source)
tower.log- at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
tower.log- at io.seqera.tower.service.job.$JobServiceImpl$Definition$Intercepted.cancelJobByWorkflowId(Unknown Source)
tower.log- at io.seqera.tower.service.workflow.WorkflowLaunchServiceImpl.cancelWorkflow(WorkflowLaunchServiceImpl.groovy:527)
tower.log- at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.$$access$$cancelWorkflow(Unknown Source)
tower.log- at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Exec.dispatch(Unknown Source)
tower.log- at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
tower.log- at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.cancelWorkflow(Unknown Source)
tower.log- at io.seqera.tower.controller.WorkflowExController.cancel(WorkflowExController.groovy:536)
tower.log- at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.$$access$$cancel(Unknown Source)
tower.log- at io.seqera.tower.controller.$WorkflowExController$Definition$Exec.dispatch(Unknown Source)
tower.log- at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
tower.log- at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
tower.log- at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
tower.log- at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
tower.log- at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.cancel(Unknown Source)
tower.log- at io.seqera.tower.controller.$WorkflowExController$Definition$Exec.dispatch(Unknown Source)
tower.log- at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
tower.log- at io.micronaut.context.DefaultBeanContext$4.invoke(DefaultBeanContext.java:594)
tower.log- at io.micronaut.web.router.AbstractRouteMatch.execute(AbstractRouteMatch.java:303)
tower.log- at io.micronaut.web.router.RouteMatch.execute(RouteMatch.java:111)
tower.log- at io.micronaut.http.context.ServerRequestContext.with(ServerRequestContext.java:103)
tower.log- at io.micronaut.http.server.RouteExecutor.lambda$executeRoute$14(RouteExecutor.java:659)
tower.log- at reactor.core.publisher.FluxDeferContextual.subscribe(FluxDeferContextual.java:49)
tower.log- at reactor.core.publisher.InternalFluxOperator.subscribe(InternalFluxOperator.java:62)
tower.log- at reactor.core.publisher.FluxSubscribeOn$SubscribeOnSubscriber.run(FluxSubscribeOn.java:194)
tower.log- at io.micronaut.reactive.reactor.instrument.ReactorInstrumentation.lambda$init$0(ReactorInstrumentation.java:62)
tower.log- at reactor.core.scheduler.WorkerTask.call(WorkerTask.java:84)
tower.log- at reactor.core.scheduler.WorkerTask.call(WorkerTask.java:37)
tower.log- at io.micrometer.core.instrument.composite.CompositeTimer.recordCallable(CompositeTimer.java:129)
tower.log- at io.micrometer.core.instrument.Timer.lambda$wrap$1(Timer.java:206)
tower.log- at io.micronaut.scheduling.instrument.InvocationInstrumenterWrappedCallable.call(InvocationInstrumenterWrappedCallable.java:53)
tower.log- at java.base/java.util.concurrent.FutureTask.run(FutureTask.java:264)
tower.log- at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1136)
tower.log- at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:635)
Thanks, Paolo! If the run is already completed it is good! But it is not shown as Completed
on Tower interface. On Tower interface it is still shown as Submitted
:
I meant that the pipeline has definitely finished (it failed in this case) and I saw the pool shrinking, however the run status on Tower interface was not updated.
Um, not nice. What if you try the cancel it?
yeah, when I try to cancel I get that error as above:
Copy & paste that error ID please. Also include the workflow Id (you can find the details page)
6XIaWQ8igruqSneVMY64fI
- error ID
Did you want workflow or workspace ID? I extracted this id from workflow URL (couldn't find it anywhere else): 256157969323532
I have another run with the same problem:
37Q7X6nfTDzM8chdYi9cyc
102494436285352
I see exactly the same problem.
Workspace ID: 236422758311365 Workflow ID: CPsEtCvnKAz1x
I see exactly the same problem.
Always with Azure?
Yep, using an Azure Batch compute env.
See below where to find the workflow Id
Sorry was confused by the workflow
name. Here are my two runs:
5nopneQGyD26Vj
2J5M6wN3HP87Fe
I should be ok, for you both now.
I still have 4 runs in the same state?
Mine were both cancelled successfully.
I still have 4 runs in the same state?
Please provide the ids
Workflow IDs: 5P5QGjzU1y14xl 1OfkC2HJNYt7FM 4WYm0amvCYnXsH hxCPjmlmy603L
I still have 4 runs in the same state?
should be ok now
I've just caught the same bug again... The workflow ID: 2UeJhfe8xg6zZo
I think I found where the problem comes from in my case. To reproduce:
Selected compute environment not available. The next available compute environment is selected.
but will let you start it.
idle
state. After 5 mins the pool will scale down to 0.Submitted
state and will not let you cancel itself with the following error: Oops... Unable to process request - Error ID: XXXXXXXXXXXXXXXX
.
So, it feels like Tower does not like when the original Pipeline compute environment is deleted. When I add the same pipeline and don't delete the compute environment it all works OK.
Thanks Vlad, this is useful. Let us look a bit more into this
@cbr7 Please have a look at the comment from @wikiselev (too many vlads in this thread! 😆)
The use case he is reporting causes this exception (to be tracked):
com.microsoft.azure.batch.protocol.models.BatchErrorException: Status code 404, {
"odata.metadata":"https://nextflowbatch02282023.eastus.batch.azure.com/$metadata#Microsoft.Azure.Batch.Protocol.Entities.Container.errors/@Element","code":"PoolNotFound","message":{
"lang":"en-US","value":"The specified pool does not exist.\nRequestId:18915cd7-6718-4855-841e-857bf5c9dcfa\nTime:2023-03-13T15:31:45.4973040Z"
}
}
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
at java.base/jdk.internal.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:77)
at java.base/jdk.internal.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
at java.base/java.lang.reflect.Constructor.newInstanceWithCaller(Constructor.java:499)
at java.base/java.lang.reflect.Constructor.newInstance(Constructor.java:480)
at com.microsoft.rest.ServiceResponseBuilder.build(ServiceResponseBuilder.java:122)
at com.microsoft.rest.ServiceResponseBuilder.buildWithHeaders(ServiceResponseBuilder.java:151)
at com.microsoft.azure.AzureResponseBuilder.buildWithHeaders(AzureResponseBuilder.java:82)
at com.microsoft.azure.batch.protocol.implementation.PoolsImpl.getDelegate(PoolsImpl.java:1824)
at com.microsoft.azure.batch.protocol.implementation.PoolsImpl.access$600(PoolsImpl.java:93)
at com.microsoft.azure.batch.protocol.implementation.PoolsImpl$47.call(PoolsImpl.java:1811)
at com.microsoft.azure.batch.protocol.implementation.PoolsImpl$47.call(PoolsImpl.java:1807)
at rx.internal.operators.OnSubscribeMap$MapSubscriber.onNext(OnSubscribeMap.java:69)
at retrofit2.adapter.rxjava.CallArbiter.deliverResponse(CallArbiter.java:120)
at retrofit2.adapter.rxjava.CallArbiter.emitResponse(CallArbiter.java:102)
at retrofit2.adapter.rxjava.CallExecuteOnSubscribe.call(CallExecuteOnSubscribe.java:46)
at retrofit2.adapter.rxjava.CallExecuteOnSubscribe.call(CallExecuteOnSubscribe.java:24)
at rx.Observable.unsafeSubscribe(Observable.java:10327)
at rx.internal.operators.OnSubscribeMap.call(OnSubscribeMap.java:48)
at rx.internal.operators.OnSubscribeMap.call(OnSubscribeMap.java:33)
at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:48)
at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:30)
at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:48)
at rx.internal.operators.OnSubscribeLift.call(OnSubscribeLift.java:30)
at rx.Observable.subscribe(Observable.java:10423)
at rx.Observable.subscribe(Observable.java:10390)
at rx.observables.BlockingObservable.blockForSingle(BlockingObservable.java:443)
at rx.observables.BlockingObservable.single(BlockingObservable.java:340)
at com.microsoft.azure.batch.protocol.implementation.PoolsImpl.get(PoolsImpl.java:1701)
at com.microsoft.azure.batch.PoolOperations.getPool(PoolOperations.java:222)
at com.microsoft.azure.batch.PoolOperations.getPool(PoolOperations.java:169)
at io.seqera.tower.service.platform.azure.AzBatchPlatformProvider.retrievePoolInfo(AzBatchPlatformProvider.groovy:221)
at io.seqera.tower.service.platform.azure.AzBatchPlatformProvider.createLaunchJobResources(AzBatchPlatformProvider.groovy:201)
at io.seqera.tower.service.platform.azure.AzBatchPlatformProvider.createLaunchJobResources(AzBatchPlatformProvider.groovy)
at io.seqera.tower.service.workflow.WorkflowLaunchServiceImpl.newLaunchRequest(WorkflowLaunchServiceImpl.groovy:349)
at io.seqera.tower.service.workflow.WorkflowLaunchServiceImpl.submitWorkflow(WorkflowLaunchServiceImpl.groovy:196)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.$$access$$submitWorkflow(Unknown Source)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.submitWorkflow(Unknown Source)
at io.seqera.tower.service.workflow.WorkflowLaunchServiceImpl.submitNewLaunch(WorkflowLaunchServiceImpl.groovy:157)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.$$access$$submitNewLaunch(Unknown Source)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
at io.seqera.tower.service.workflow.$WorkflowLaunchServiceImpl$Definition$Intercepted.submitNewLaunch(Unknown Source)
at io.seqera.tower.controller.WorkflowExController.submitLaunch0(WorkflowExController.groovy:377)
at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.$$access$$submitLaunch0(Unknown Source)
at io.seqera.tower.controller.$WorkflowExController$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.lambda$intercept$3(TransactionalInterceptor.java:152)
at io.micronaut.transaction.support.AbstractSynchronousStateTransactionManager.execute(AbstractSynchronousStateTransactionManager.java:145)
at io.micronaut.transaction.support.AbstractSynchronousTransactionManager.execute(AbstractSynchronousTransactionManager.java:144)
at io.micronaut.transaction.interceptor.TransactionalInterceptor.intercept(TransactionalInterceptor.java:147)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.submitLaunch0(Unknown Source)
at io.seqera.tower.controller.WorkflowExController.submitLaunch(WorkflowExController.groovy:356)
at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.$$access$$submitLaunch(Unknown Source)
at io.seqera.tower.controller.$WorkflowExController$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
at io.seqera.tower.security.rule.AuthRequiredInterceptor.intercept(AuthRequiredInterceptor.groovy:43)
at io.seqera.tower.security.rule.$AuthRequiredInterceptor$Definition$Intercepted.$$access$$intercept(Unknown Source)
at io.seqera.tower.security.rule.$AuthRequiredInterceptor$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:128)
at io.seqera.tower.security.rule.$AuthRequiredInterceptor$Definition$Intercepted.intercept(Unknown Source)
at io.micronaut.aop.chain.MethodInterceptorChain.proceed(MethodInterceptorChain.java:137)
at io.seqera.tower.controller.$WorkflowExController$Definition$Intercepted.submitLaunch(Unknown Source)
at io.seqera.tower.controller.$WorkflowExController$Definition$Exec.dispatch(Unknown Source)
at io.micronaut.context.AbstractExecutableMethodsDefinition$DispatchedExecutableMethod.invoke(AbstractExecutableMethodsDefinition.java:371)
at io.micronaut.context.DefaultBeanContext$4.invoke(DefaultBeanContext.java:594)
at io.micronaut.web.router.AbstractRouteMatch.execute(AbstractRouteMatch.java:303)
at io.micronaut.web.router.RouteMatch.execute(RouteMatch.java:111)
at io.micronaut.http.context.ServerRequestContext.with(ServerRequestContext.java:103)
at io.micronaut.http.server.RouteExecutor.lambda$executeRoute$14(RouteExecutor.java:659)
at reactor.core.publisher.FluxDeferContextual.subscribe(FluxDeferContextual.java:49)
at reactor.core.publisher.InternalFluxOperator.subscribe(InternalFluxOperator.java:62)
at reactor.core.publisher.FluxSubscribeOn$SubscribeOnSubscriber.run(FluxSubscribeOn.java:194)
at io.micronaut.reactive.reactor.instrument.ReactorInstrumentation.lambda$init$0(ReactorInstrumentation.java:62)
at reactor.core.scheduler.WorkerTask.call(WorkerTask.java:84)
at reactor.core.scheduler.WorkerTask.call(WorkerTask.java:37)
at io.micrometer.core.instrument.composite.CompositeTimer.recordCallable(CompositeTimer.java:129)
at io.micrometer.core.instrument.Timer.lambda$wrap$1(Timer.java:206)
Bug report
Expected behavior and actual behavior
Expected: Completed run should get either in
Failed
orSucceeded
state.Actual: Completed run is stuck in
Submitted
state even though a compute pool corresponding to the run has been auto scaled to 0.Steps to reproduce the problem
I created a compute pool using Tower Forge and started a pipeline (private). This created a new run on Tower and auto scaled the newly created pool from 0 to 1 node. Pipeline has failed on the node.
Program output
Tower run never received the output of the pipeline and got stuck in
Submitted
state even though the compute has been down scaled to 0 nodes.Environment
Tower
Additional context
I now have two runs like this (in
Submitted
state). When I try to delete them myself via Tower interface or via Tower CLI (with--force
option) I get an error like:If I get three more runs like this I will hit a limit of concurrent tasks and won't be able to use Tower anymore... I will appreciate your help in deleting them. Thanks!