Notebooks to upload/download marine footage, connect to a citizen science project, train machine learning models and publish marine biological observations.
Seems like there is something going on again with Notebook 5, error might be here, or might be further up
Was trying to get a quick example model to show in tutorial powerpoint
To Reproduce (REQUIRED)
Input:
Project: KSO
Path:/data/album/kso/Xhoni_model1/Example_delete_later
Baseline: YoloV8 Baseline model
mlp.train_yolo(
exp_name=exp_name.value,
weights=weights.artifact_path,
project=mlp.project_name,
epochs=epochs.value,
batch_size=batch_size.value,
img_size=img_h.value, # this requires an int
)
Output:
/home/jupyter-kso-user/.local/lib/python3.10/site-packages/mlflow/data/dataset_source_registry.py:143: UserWarning: The specified dataset source can be interpreted in multiple ways: LocalArtifactDatasetSource, LocalArtifactDatasetSource. MLflow will assume that this is a LocalArtifactDatasetSource source.
return _dataset_source_registry.resolve(
/home/jupyter-kso-user/.local/lib/python3.10/site-packages/mlflow/data/dataset_source_registry.py:143: UserWarning: The specified dataset source can be interpreted in multiple ways: LocalArtifactDatasetSource, LocalArtifactDatasetSource. MLflow will assume that this is a LocalArtifactDatasetSource source.
return _dataset_source_registry.resolve(
---------------------------------------------------------------------------
MaxRetryError Traceback (most recent call last)
File /opt/tljh/user/lib/python3.10/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
485 try:
--> 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
File /opt/tljh/user/lib/python3.10/site-packages/urllib3/connectionpool.py:878, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
877 log.debug("Retry: %s", url)
--> 878 return self.urlopen(
879 method,
880 url,
881 body,
882 headers,
883 retries=retries,
884 redirect=redirect,
885 assert_same_host=assert_same_host,
886 timeout=timeout,
887 pool_timeout=pool_timeout,
888 release_conn=release_conn,
889 chunked=chunked,
890 body_pos=body_pos,
891 **response_kw
892 )
894 return response
File /opt/tljh/user/lib/python3.10/site-packages/urllib3/connectionpool.py:878, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
877 log.debug("Retry: %s", url)
--> 878 return self.urlopen(
879 method,
880 url,
881 body,
882 headers,
883 retries=retries,
884 redirect=redirect,
885 assert_same_host=assert_same_host,
886 timeout=timeout,
887 pool_timeout=pool_timeout,
888 release_conn=release_conn,
889 chunked=chunked,
890 body_pos=body_pos,
891 **response_kw
892 )
894 return response
[... skipping similar frames: HTTPConnectionPool.urlopen at line 878 (2 times)]
File /opt/tljh/user/lib/python3.10/site-packages/urllib3/connectionpool.py:878, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
877 log.debug("Retry: %s", url)
--> 878 return self.urlopen(
879 method,
880 url,
881 body,
882 headers,
883 retries=retries,
884 redirect=redirect,
885 assert_same_host=assert_same_host,
886 timeout=timeout,
887 pool_timeout=pool_timeout,
888 release_conn=release_conn,
889 chunked=chunked,
890 body_pos=body_pos,
891 **response_kw
892 )
894 return response
File /opt/tljh/user/lib/python3.10/site-packages/urllib3/connectionpool.py:868, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
867 try:
--> 868 retries = retries.increment(method, url, response=response, _pool=self)
869 except MaxRetryError:
File /opt/tljh/user/lib/python3.10/site-packages/urllib3/util/retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
591 if new_retry.is_exhausted():
--> 592 raise MaxRetryError(_pool, url, error or ResponseError(cause))
594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPSConnectionPool(host='vendor.cloudina.org', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/1/ada46bd376b0417889a848873cbb159d/artifacts/input_datasets/Testing_12/01747002.mp4 (Caused by ResponseError('too many 502 error responses'))
During handling of the above exception, another exception occurred:
RetryError Traceback (most recent call last)
File ~/.local/lib/python3.10/site-packages/mlflow/utils/rest_utils.py:92, in http_request(host_creds, endpoint, method, max_retries, backoff_factor, extra_headers, retry_codes, timeout, **kwargs)
91 try:
---> 92 return _get_http_response_with_retries(
93 method,
94 url,
95 max_retries,
96 backoff_factor,
97 retry_codes,
98 headers=headers,
99 verify=host_creds.verify,
100 timeout=timeout,
101 **kwargs,
102 )
103 except requests.exceptions.Timeout as to:
File ~/.local/lib/python3.10/site-packages/mlflow/utils/request_utils.py:135, in _get_http_response_with_retries(method, url, max_retries, backoff_factor, retry_codes, **kwargs)
134 session = _get_request_session(max_retries, backoff_factor, retry_codes)
--> 135 return session.request(method, url, **kwargs)
File /opt/tljh/user/lib/python3.10/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File /opt/tljh/user/lib/python3.10/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
File /opt/tljh/user/lib/python3.10/site-packages/requests/adapters.py:510, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
509 if isinstance(e.reason, ResponseError):
--> 510 raise RetryError(e, request=request)
512 if isinstance(e.reason, _ProxyError):
RetryError: HTTPSConnectionPool(host='vendor.cloudina.org', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/1/ada46bd376b0417889a848873cbb159d/artifacts/input_datasets/Testing_12/01747002.mp4 (Caused by ResponseError('too many 502 error responses'))
During handling of the above exception, another exception occurred:
MlflowException Traceback (most recent call last)
Cell In[18], line 1
----> 1 mlp.train_yolo(
2 exp_name=exp_name.value,
3 weights=weights.artifact_path,
4 project=mlp.project_name,
5 epochs=epochs.value,
6 batch_size=batch_size.value,
7 img_size=img_h.value, # this requires an int
8 )
File ~/kso/kso_utils/project.py:1617, in MLProjectProcessor.train_yolo(self, exp_name, weights, project, epochs, batch_size, img_size)
1615 mlflow.log_input(train_dataset, context="training")
1616 mlflow.log_input(val_dataset, context="validation")
-> 1617 mlflow.log_artifacts(
1618 str(Path(self.data_path).parent), artifact_path="input_datasets"
1619 )
1620 try:
1621 if "yolov5" in weights:
File ~/.local/lib/python3.10/site-packages/mlflow/tracking/fluent.py:908, in log_artifacts(local_dir, artifact_path)
878 """
879 Log all the contents of a local directory as artifacts of the run. If no run is active,
880 this method will create a new active run.
(...)
905 mlflow.log_artifacts("data", artifact_path="states")
906 """
907 run_id = _get_or_start_run().info.run_id
--> 908 MlflowClient().log_artifacts(run_id, local_dir, artifact_path)
File ~/.local/lib/python3.10/site-packages/mlflow/tracking/client.py:1138, in MlflowClient.log_artifacts(self, run_id, local_dir, artifact_path)
1094 def log_artifacts(
1095 self, run_id: str, local_dir: str, artifact_path: Optional[str] = None
1096 ) -> None:
1097 """
1098 Write a directory of files to the remote ``artifact_uri``.
1099
(...)
1136 is_dir: True
1137 """
-> 1138 self._tracking_client.log_artifacts(run_id, local_dir, artifact_path)
File ~/.local/lib/python3.10/site-packages/mlflow/tracking/_tracking_service/client.py:463, in TrackingServiceClient.log_artifacts(self, run_id, local_dir, artifact_path)
456 def log_artifacts(self, run_id, local_dir, artifact_path=None):
457 """
458 Write a directory of files to the remote ``artifact_uri``.
459
460 :param local_dir: Path to the directory of files to write.
461 :param artifact_path: If provided, the directory in ``artifact_uri`` to write to.
462 """
--> 463 self._get_artifact_repo(run_id).log_artifacts(local_dir, artifact_path)
File ~/.local/lib/python3.10/site-packages/mlflow/store/artifact/http_artifact_repo.py:45, in HttpArtifactRepository.log_artifacts(self, local_dir, artifact_path)
41 artifact_dir = (
42 posixpath.join(artifact_path, rel_path) if artifact_path else rel_path
43 )
44 for f in filenames:
---> 45 self.log_artifact(os.path.join(root, f), artifact_dir)
File ~/.local/lib/python3.10/site-packages/mlflow/store/artifact/http_artifact_repo.py:28, in HttpArtifactRepository.log_artifact(self, local_file, artifact_path)
26 extra_headers = {"Content-Type": mime_type}
27 with open(local_file, "rb") as f:
---> 28 resp = http_request(
29 self._host_creds, endpoint, "PUT", data=f, extra_headers=extra_headers
30 )
31 augmented_raise_for_status(resp)
File ~/.local/lib/python3.10/site-packages/mlflow/utils/rest_utils.py:112, in http_request(host_creds, endpoint, method, max_retries, backoff_factor, extra_headers, retry_codes, timeout, **kwargs)
110 raise InvalidUrlException(f"Invalid url: {url}") from iu
111 except Exception as e:
--> 112 raise MlflowException(f"API request to {url} failed with exception {e}")
MlflowException: API request to https://vendor.cloudina.org/api/2.0/mlflow-artifacts/artifacts/1/ada46bd376b0417889a848873cbb159d/artifacts/input_datasets/Testing_12/01747002.mp4 failed with exception HTTPSConnectionPool(host='vendor.cloudina.org', port=443): Max retries exceeded with url: /api/2.0/mlflow-artifacts/artifacts/1/ada46bd376b0417889a848873cbb159d/artifacts/input_datasets/Testing_12/01747002.mp4 (Caused by ResponseError('too many 502 error responses'))
Expected behavior
A clear and concise description of what you expected to happen.
Environment
If applicable, add screenshots to help explain your problem.
OS: [e.g. Ubuntu]
GPU [e.g. 2080 Ti]
Additional context:
Possible cause? can't remember as it has been a while since i dealt with this notebook
🐛 Bug
Seems like there is something going on again with Notebook 5, error might be here, or might be further up Was trying to get a quick example model to show in tutorial powerpoint
To Reproduce (REQUIRED)
Input: Project: KSO Path:/data/album/kso/Xhoni_model1/Example_delete_later Baseline: YoloV8 Baseline model
Output:
Expected behavior
A clear and concise description of what you expected to happen.
Environment
If applicable, add screenshots to help explain your problem.
Additional context:
Possible cause? can't remember as it has been a while since i dealt with this notebook