cloudscaleml / cloudrun-digits

MIT License
0 stars 1 forks source link

Experiment fails on Finalize #2

Open cwoj opened 5 years ago

cwoj commented 5 years ago

Run failed: Submitted script failed with a non-zero exit code; see the driver log file for details.

Stack Trace: ERROR:: OutputUpload failed. Output files can be accessed in the project working directory directly.. Exception Details:Traceback (most recent call last): File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/workspace_client.py", line 78, in _execute_with_arguments return self._call_api(func, *args_list, kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/clientbase.py", line 175, in _call_api return self._execute_with_base_arguments(func, *args, *kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/clientbase.py", line 229, in _execute_with_base_arguments back_off, total_retry, ssl_error_handler, self._logger, func, args, kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/clientbase.py", line 239, in _execute_func_internal return func(*args, kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/operations/run_metric_operations.py", line 161, in post_batch raise models.ErrorResponseException(self._deserialize, response) azureml._restclient.models.error_response.ErrorResponseException: (TransientError) Etag conflict on bf599197-4037-4550-b00b-cad3c2234f5a/e41436c7-c3bb-4b07-bfd3-06eebcf637f6 with etag . During handling of the above exception, another exception occurred: Traceback (most recent call last): File "/mnt/batch/tasks/shared/LS_root/jobs/glados/azureml/mnist_lab_1565029025_bd060128/mounts/workspacefilestore/azureml/mnist_lab_1565029025_bd060128/azureml-setup/context_managers.py", line 106, in exit self.history_context.exit(args) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_history/utils/context_managers.py", line 57, in exit return self._exit_stack.exit(args) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/contextlib.py", line 380, in exit raise exc_details[1] File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/contextlib.py", line 365, in exit if cb(exc_details): File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/contextlib.py", line 284, in _exit_wrapper return cm_exit(cm, exc_details) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_history/utils/context_managers.py", line 165, in exit _RunBase._kill(timeout=self._kill_timeout) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_run_impl/run_base.py", line 159, in _kill handler(timeout) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_run_impl/run_base.py", line 169, in _cleanup self._client.flush(timeout) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_run_impl/run_history_facade.py", line 520, in flush self.metrics.flush(timeout_seconds=timeout_seconds) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/metrics_client.py", line 202, in flush self._task_queue.flush(self.identity, timeout_seconds=timeout_seconds) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_common/async_utils/batch_task_queue.py", line 89, in flush super(BatchTaskQueue, self).flush(*args, *kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_common/async_utils/task_queue.py", line 112, in flush self._results.extend((task.wait(awaiter_name=self.identity) for task in completed_tasks)) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_common/async_utils/task_queue.py", line 112, in self._results.extend((task.wait(awaiter_name=self.identity) for task in completed_tasks)) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_common/async_utils/async_task.py", line 59, in wait res = self._handler(self._future, self._logger) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_common/async_utils/async_task.py", line 16, in basic_handler return future.result() File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/concurrent/futures/_base.py", line 398, in result return self.__get_result() File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/concurrent/futures/_base.py", line 357, in __get_result raise self._exception File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/concurrent/futures/thread.py", line 55, in run result = self.fn(self.args, self.kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/metrics_client.py", line 231, in _log_batch is_async=is_async) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/run_client.py", line 440, in _execute_with_run_arguments return self._execute_with_arguments(func, copy.deepcopy(self._run_arguments), *args, **kwargs) File "/azureml-envs/azureml_557e56e6c5351baea3351c518cef0f82/lib/python3.6/site-packages/azureml/_restclient/workspace_client.py", line 80, in _execute_with_arguments raise ServiceException(e) azureml._restclient.exceptions.ServiceException: ServiceException: Code: 409 Message: (TransientError) Etag conflict on bf599197-4037-4550-b00b-cad3c2234f5a/e41436c7-c3bb-4b07-bfd3-06eebcf637f6 with etag . Details: InnerException{ "additional_properties": {}, "error": { "additional_properties": {}, "code": "TransientError", "message": "Etag conflict on bf599197-4037-4550-b00b-cad3c2234f5a/e41436c7-c3bb-4b07-bfd3-06eebcf637f6 with etag .", "target": null, "details": [], "inner_error": null, "debug_info": { "additional_properties": {}, "type": "Microsoft.MachineLearning.Common.Core.EtagConflictException", "message": "Etag conflict on bf599197-4037-4550-b00b-cad3c2234f5a/e41436c7-c3bb-4b07-bfd3-06eebcf637f6 with etag .", "stack_trace": " at Microsoft.MachineLearning.Common.DocumentDb.DocumentDbStore2.UpsertInternalAsync(T document, Boolean ignoreEtag) in /home/vsts/work/1/s/src/azureml-api/src/Common/DocumentDb/DocumentDbStore.cs:line 362\n at Microsoft.MachineLearning.Common.DocumentDb.DocumentDbStore2.UpsertAsync(T document, Boolean ignoreEtag)\n at Polly.Policy.<>cDisplayClass181_0`1.<b0>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Polly.RetrySyntaxAsync.<>cDisplayClass9_1.<b1>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Polly.Retry.RetryEngine.ImplementationAsync[TResult](Func3 action, Context context, CancellationToken cancellationToken, IEnumerable1 shouldRetryExceptionPredicates, IEnumerable1 shouldRetryResultPredicates, Func1 policyStateFactory, Boolean continueOnCapturedContext)\n at Polly.Retry.RetryEngine.ImplementationAsync[TResult](Func3 action, Context context, CancellationToken cancellationToken, IEnumerable1 shouldRetryExceptionPredicates, IEnumerable1 shouldRetryResultPredicates, Func1 policyStateFactory, Boolean continueOnCapturedContext)\n at Polly.Policy.ExecuteAsyncInternal[TResult](Func3 action, Context context, CancellationToken cancellationToken, Boolean continueOnCapturedContext)\n at Microsoft.MachineLearning.RunHistory.EntryPoints.Api.Controllers.RunMetricsController.RunMetricUpdateTasks(RunDocument runDocument, String experimentId, IReadOnlyCollection1 metricDtos, IReadOnlyCollection1 metricDocuments) in /home/vsts/work/1/s/src/azureml-api/src/RunHistory/EntryPoints/Api/Controllers/RunMetricsController.cs:line 329\n at Microsoft.MachineLearning.RunHistory.EntryPoints.Api.Controllers.RunMetricsController.PostRunMetrics(Guid subscriptionId, String resourceGroupName, String workspaceName, String experimentName, RunId runId, BatchMetricDto batchMetricDto) in /home/vsts/work/1/s/src/azureml-api/src/RunHistory/EntryPoints/Api/Controllers/RunMetricsController.cs:line 189\n at Microsoft.AspNetCore.Mvc.Internal.ControllerActionInvoker.InvokeActionMethodAsync()\n at Microsoft.AspNetCore.Mvc.Internal.ControllerActionInvoker.InvokeNextActionFilterAsync()\n at Microsoft.AspNetCore.Mvc.Internal.ControllerActionInvoker.Rethrow(ActionExecutedContext context)\n at Microsoft.AspNetCore.Mvc.Internal.ControllerActionInvoker.Next(State& next, Scope& scope, Object& state, Boolean& isCompleted)\n at Microsoft.AspNetCore.Mvc.Internal.ControllerActionInvoker.InvokeInnerFilterAsync()\n at Microsoft.AspNetCore.Mvc.Internal.ResourceInvoker.InvokeNextExceptionFilterAsync()", "inner_exception": { "additional_properties": {}, "type": "Microsoft.Azure.Documents.ConflictException", "message": "Message: {\"Errors\":[\"Resource with specified id or name already exists.\"]}\nActivityId: 0b8f717a-44c8-4217-9dc3-81ce0eba91dc, Request URI: /apps/9cb3298e-05bf-4cd7-90ee-3b80cc9eea69/services/8642c007-5bf9-433f-a3f9-56e7761bb28c/partitions/4db86029-263b-40b6-877c-a16bfe786a6c/replicas/132079578132306307p/, RequestStats: \nRequestStartTime: 2019-08-05T18:18:53.8966782Z, Number of regions attempted: 1\n, SDK: Linux/9 documentdb-netcore-sdk/2.1.3", "stack_trace": " at Microsoft.Azure.Documents.TransportClient.ThrowIfFailed(String resourceAddress, StoreResponse storeResponse, Uri physicalAddress, Guid activityId)\n at Microsoft.Azure.Documents.Rntbd.TransportClient.InvokeStoreAsync(Uri physicalAddress, ResourceOperation resourceOperation, DocumentServiceRequest request)\n at Microsoft.Azure.Documents.ConsistencyWriter.WritePrivateAsync(DocumentServiceRequest request, TimeoutHelper timeout, Boolean forceRefresh)\n at Microsoft.Azure.Documents.ConsistencyWriter.WriteAsync(DocumentServiceRequest entity, TimeoutHelper timeout, Boolean forceRefresh)\n at Microsoft.Azure.Documents.ReplicatedResourceClient.InvokeAsync(DocumentServiceRequest request, TimeoutHelper timeout, Boolean isInRetry, Boolean forceRefresh)\n at Microsoft.Azure.Documents.ReplicatedResourceClient.<>c__DisplayClass21_0.<<InvokeAsync>b__0>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Microsoft.Azure.Documents.BackoffRetryUtility1.<>cDisplayClass4_01.<<ExecuteAsync>b__1>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteRetryAsync(Func1 callbackMethod, Func3 callShouldRetry, Func1 inBackoffAlternateCallbackMethod, TimeSpan minBackoffForInBackoffCallback, CancellationToken cancellationToken, Action1 preRetryCallback)\n at Microsoft.Azure.Documents.ShouldRetryResult.ThrowIfDoneTrying(ExceptionDispatchInfo capturedException)\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteRetryAsync(Func1 callbackMethod, Func3 callShouldRetry, Func1 inBackoffAlternateCallbackMethod, TimeSpan minBackoffForInBackoffCallback, CancellationToken cancellationToken, Action1 preRetryCallback)\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteAsync[TPolicyArg1](Func2 callbackMethod, IRetryPolicy1 retryPolicy, Func2 inBackoffAlternateCallbackMethod, TimeSpan minBackoffForInBackoffCallback, CancellationToken cancellationToken, Action1 preRetryCallback)\n at Microsoft.Azure.Documents.ReplicatedResourceClient.InvokeAsync(DocumentServiceRequest request, Func`2 prepareRequestAsyncDelegate, CancellationToken cancellationToken)\n at Microsoft.Azure.Documents.StoreClient.<>c__DisplayClass17_0.<b0>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Microsoft.Azure.Documents.StoreClient.ProcessMessageAsync(DocumentServiceRequest request, CancellationToken cancellationToken, IRetryPolicy retryPolicy, Func2 prepareRequestAsyncDelegate)\n at Microsoft.Azure.Documents.ServerStoreModel.ProcessMessageAsync(DocumentServiceRequest request, CancellationToken cancellationToken)\n at Microsoft.Azure.Documents.Client.DocumentClient.CreateAsync(DocumentServiceRequest request, CancellationToken cancellationToken)\n at Microsoft.Azure.Documents.Client.DocumentClient.CreateDocumentPrivateAsync(String documentCollectionLink, Object document, RequestOptions options, Boolean disableAutomaticIdGeneration, IDocumentClientRetryPolicy retryPolicyInstance, CancellationToken cancellationToken)\n at Microsoft.Azure.Documents.BackoffRetryUtility1.<>cDisplayClass1_0.<b0>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteRetryAsync(Func1 callbackMethod, Func3 callShouldRetry, Func1 inBackoffAlternateCallbackMethod, TimeSpan minBackoffForInBackoffCallback, CancellationToken cancellationToken, Action1 preRetryCallback)\n at Microsoft.Azure.Documents.ShouldRetryResult.ThrowIfDoneTrying(ExceptionDispatchInfo capturedException)\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteRetryAsync(Func1 callbackMethod, Func3 callShouldRetry, Func1 inBackoffAlternateCallbackMethod, TimeSpan minBackoffForInBackoffCallback, CancellationToken cancellationToken, Action1 preRetryCallback)\n at Microsoft.Azure.Documents.BackoffRetryUtility1.ExecuteAsync(Func1 callbackMethod, IRetryPolicy retryPolicy, CancellationToken cancellationToken, Action`1 preRetryCallback)\n at Microsoft.Azure.Documents.Client.DocumentClient.CreateDocumentInlineAsync(String documentsFeedOrDatabaseLink, Object document, RequestOptions options, Boolean disableAutomaticIdGeneration, CancellationToken cancellationToken)\n at Polly.Policy.<>cDisplayClass181_0`1.<b0>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Polly.RetrySyntaxAsync.<>c__DisplayClass25_1.<b__1>d.MoveNext()\n--- End of stack trace from previous location where exception was thrown ---\n at Polly.Retry.RetryEngine.ImplementationAsync[TResult](Func3 action, Context context, CancellationToken cancellationToken, IEnumerable1 shouldRetryExceptionPredicates, IEnumerable1 shouldRetryResultPredicates, Func1 policyStateFactory, Boolean continueOnCapturedContext)\n at Polly.Retry.RetryEngine.ImplementationAsync[TResult](Func3 action, Context context, CancellationToken cancellationToken, IEnumerable1 shouldRetryExceptionPredicates, IEnumerable1 shouldRetryResultPredicates, Func1 policyStateFactory, Boolean continueOnCapturedContext)\n at Polly.Policy.ExecuteAsyncInternal[TResult](Func3 action, Context context, CancellationToken cancellationToken, Boolean continueOnCapturedContext)\n at Microsoft.MachineLearning.Common.DocumentDb.DocumentClientWrapper.CreateDocumentAsync(Uri documentCollectionUri, Object document, RequestOptions options, Boolean disableAutomaticIdGeneration) in /home/vsts/work/1/s/src/azureml-api/src/Common/DocumentDb/DocumentClientWrapper.cs:line 55\n at Microsoft.MachineLearning.Common.DocumentDb.DocumentDbStore2.UpsertInternalAsync(T document, Boolean ignoreEtag) in /home/vsts/work/1/s/src/azureml-api/src/Common/DocumentDb/DocumentDbStore.cs:line 344", "inner_exception": null, "data": {}, "error_response": null }, "data": { "PartitionKey": "bf599197-4037-4550-b00b-cad3c2234f5a", "DocumentId": "e41436c7-c3bb-4b07-bfd3-06eebcf637f6", "Etag": null }, "error_response": null } }, "correlation": { "operation": "7ec2cd02051e4a48b1f20db2b7ae77b9", "request": "0q/UIMV2TGc=" }, "environment": "eastus", "location": "eastus", "time": {} }

cwoj commented 5 years ago

Issue could be worked around by renaming the experiment.