Whenever I try to upload a large amount of images to a local label-studio instance (in my case I need to upload 10k images), I face two errors: A timeout error, and a validation error when a project with the same name already exists on LabelStudio (because trying to do another attempt)
OS Platform and Distribution : Ryzen9 5900X, 64GB RAM, RTX 3090. Arch Linux, Kernel 6.9.1
Python version (python --version): Python 3.11
FiftyOne version (fiftyone --version): v0.23.8
FiftyOne installed from (pip or source): pip
Label Studio installation info: installed from pip using pipx, v1.12.1
Other info/logs
Timeout Error
---------------------------------------------------------------------------
TimeoutError Traceback (most recent call last)
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:537, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
536 try:
--> 537 response = conn.getresponse()
538 except (BaseSSLError, OSError) as e:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connection.py:466, in HTTPConnection.getresponse(self)
465 # Get the response from http.client.HTTPConnection
--> 466 httplib_response = super().getresponse()
468 try:
File ~/.pyenv/versions/3.11.9/lib/python3.11/http/client.py:1395, in HTTPConnection.getresponse(self)
1394 try:
-> 1395 response.begin()
1396 except ConnectionError:
File ~/.pyenv/versions/3.11.9/lib/python3.11/http/client.py:325, in HTTPResponse.begin(self)
324 while True:
--> 325 version, status, reason = self._read_status()
326 if status != CONTINUE:
File ~/.pyenv/versions/3.11.9/lib/python3.11/http/client.py:286, in HTTPResponse._read_status(self)
285 def _read_status(self):
--> 286 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
287 if len(line) > _MAXLINE:
File ~/.pyenv/versions/3.11.9/lib/python3.11/socket.py:706, in SocketIO.readinto(self, b)
705 try:
--> 706 return self._sock.recv_into(b)
707 except timeout:
TimeoutError: timed out
The above exception was the direct cause of the following exception:
ReadTimeoutError Traceback (most recent call last)
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/requests/adapters.py:564, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
563 try:
--> 564 resp = conn.urlopen(
565 method=request.method,
566 url=url,
567 body=request.body,
568 headers=request.headers,
569 redirect=False,
570 assert_same_host=False,
571 preload_content=False,
572 decode_content=False,
573 retries=self.max_retries,
574 timeout=timeout,
575 chunked=chunked,
576 )
578 except (ProtocolError, OSError) as err:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:847, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
845 new_e = ProtocolError("Connection aborted.", new_e)
--> 847 retries = retries.increment(
848 method, url, error=new_e, _pool=self, _stacktrace=sys.exc_info()[2]
849 )
850 retries.sleep()
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/util/retry.py:470, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
469 if read is False or method is None or not self._is_method_retryable(method):
--> 470 raise reraise(type(error), error, _stacktrace)
471 elif read is not None:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/util/util.py:39, in reraise(tp, value, tb)
38 raise value.with_traceback(tb)
---> 39 raise value
40 finally:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:793, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
792 # Make the request on the HTTPConnection object
--> 793 response = self._make_request(
794 conn,
795 method,
796 url,
797 timeout=timeout_obj,
798 body=body,
799 headers=headers,
800 chunked=chunked,
801 retries=retries,
802 response_conn=response_conn,
803 preload_content=preload_content,
804 decode_content=decode_content,
805 **response_kw,
806 )
808 # Everything went great!
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:539, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
538 except (BaseSSLError, OSError) as e:
--> 539 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
540 raise
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/urllib3/connectionpool.py:370, in HTTPConnectionPool._raise_timeout(self, err, url, timeout_value)
369 if isinstance(err, SocketTimeout):
--> 370 raise ReadTimeoutError(
371 self, url, f"Read timed out. (read timeout={timeout_value})"
372 ) from err
374 # See the above comment about EAGAIN in Python 3.
ReadTimeoutError: HTTPConnectionPool(host='localhost', port=8081): Read timed out. (read timeout=180)
During handling of the above exception, another exception occurred:
ReadTimeout Traceback (most recent call last)
Cell In[8], line 11
2 anno_key = "labelstudio_basic_recipe1"
4 label_schema = {
5 "new_ground_truth": {
6 "type": "detections",
7 "classes": dataset.distinct("detections.detections.label"),
8 },
9 }
---> 11 annot_run = dataset.annotate(
12 anno_key,
13 backend="labelstudio",
14 label_schema=label_schema,
15 launch_editor=True,
16 url="http://localhost:8081/",
17 api_key="0e68ae17ba691fea9020b2813a89925116d65f9d"
18
19 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/core/collections.py:8759, in SampleCollection.annotate(self, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
8629 def annotate(
8630 self,
8631 anno_key,
(...)
8646 **kwargs,
8647 ):
8648 """Exports the samples and optional label field(s) in this collection
8649 to the given annotation backend.
8650
(...)
8757 an :class:`fiftyone.utils.annotations.AnnnotationResults`
8758 """
-> 8759 return foua.annotate(
8760 self,
8761 anno_key,
8762 label_schema=label_schema,
8763 label_field=label_field,
8764 label_type=label_type,
8765 classes=classes,
8766 attributes=attributes,
8767 mask_targets=mask_targets,
8768 allow_additions=allow_additions,
8769 allow_deletions=allow_deletions,
8770 allow_label_edits=allow_label_edits,
8771 allow_index_edits=allow_index_edits,
8772 allow_spatial_edits=allow_spatial_edits,
8773 media_field=media_field,
8774 backend=backend,
8775 launch_editor=launch_editor,
8776 **kwargs,
8777 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/annotations.py:250, in annotate(samples, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
245 # Don't allow overwriting an existing run with same `anno_key`, since we
246 # need the existing run in order to perform workflows like automatically
247 # cleaning up the backend's tasks
248 anno_backend.register_run(samples, anno_key, overwrite=False)
--> 250 results = anno_backend.upload_annotations(
251 samples, anno_key, launch_editor=launch_editor
252 )
254 anno_backend.save_run_results(samples, anno_key, results)
256 return results
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:145, in LabelStudioBackend.upload_annotations(self, samples, anno_key, launch_editor)
142 api = self.connect_to_api()
144 logger.info("Uploading media to Label Studio...")
--> 145 results = api.upload_samples(samples, anno_key, self)
146 logger.info("Upload complete")
148 if launch_editor:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:458, in LabelStudioAnnotationAPI.upload_samples(self, samples, anno_key, backend)
452 # @todo can we add support for uploading tasks in batches?
453 tasks, predictions, id_map = self._prepare_tasks(
454 samples,
455 config.label_schema,
456 config.media_field,
457 )
--> 458 uploaded_tasks = self._upload_tasks(project, tasks, predictions)
460 return LabelStudioAnnotationResults(
461 samples,
462 config,
(...)
467 backend=backend,
468 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:335, in LabelStudioAnnotationAPI._upload_tasks(self, project, tasks, predictions)
329 files = [
330 (one["source_id"], open(one[one["media_type"]], "rb"))
331 for one in tasks
332 ]
334 # upload files first and get their upload ids
--> 335 upload_resp = self._client.make_request(
336 "POST",
337 f"/api/projects/{project.id}/import",
338 params={"commit_to_project": True},
339 files=files,
340 )
342 # create tasks out of the uploaded files
343 payload = json.dumps(
344 {
345 "file_upload_ids": upload_resp.json()["file_upload_ids"],
346 "files_as_tasks_list": False,
347 }
348 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/label_studio_sdk/client.py:436, in Client.make_request(self, method, url, *args, **kwargs)
433 raise_exceptions = kwargs.pop("raise_exceptions")
435 logger.debug(f"{method}: {url} with args={args}, kwargs={kwargs}")
--> 436 response = self.session.request(
437 method,
438 self.get_url(url),
439 headers=self.headers,
440 cookies=self.cookies,
441 *args,
442 **kwargs,
443 )
445 if raise_exceptions:
446 if response.status_code >= 400:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/requests/adapters.py:610, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
608 raise SSLError(e, request=request)
609 elif isinstance(e, ReadTimeoutError):
--> 610 raise ReadTimeout(e, request=request)
611 elif isinstance(e, _InvalidHeader):
612 raise InvalidHeader(e, request=request)
ReadTimeout: HTTPConnectionPool(host='localhost', port=8081): Read timed out. (read timeout=180)
Validation Error when dataset already has a corresponding project in LabelStudio
--------------------------------------------
Request URL: http://localhost:8081/api/projects
Response status code: 400
Response content:
{
"id": "52b07b5c-84b8-4f11-b5eb-81d187221c7a",
"status_code": 400,
"version": "1.12.1",
"detail": "Validation error",
"exc_info": null,
"validation_errors": {
"title": [
"Ensure this field has no more than 50 characters."
]
}
}
---------------------------------------------------------------------------
HTTPError Traceback (most recent call last)
Cell In[6], line 11
2 anno_key = "labelstudio_basic_recipe1"
4 label_schema = {
5 "new_ground_truth": {
6 "type": "detections",
7 "classes": dataset.distinct("detections.detections.label"),
8 },
9 }
---> 11 annot_run = view.annotate(
12 anno_key,
13 backend="labelstudio",
14 label_schema=label_schema,
15 launch_editor=True,
16 url="http://localhost:8081/",
17 api_key="0e68ae17ba691fea9020b2813a89925116d65f9d"
18
19 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/core/collections.py:8759, in SampleCollection.annotate(self, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
8629 def annotate(
8630 self,
8631 anno_key,
(...)
8646 **kwargs,
8647 ):
8648 """Exports the samples and optional label field(s) in this collection
8649 to the given annotation backend.
8650
(...)
8757 an :class:`fiftyone.utils.annotations.AnnnotationResults`
8758 """
-> 8759 return foua.annotate(
8760 self,
8761 anno_key,
8762 label_schema=label_schema,
8763 label_field=label_field,
8764 label_type=label_type,
8765 classes=classes,
8766 attributes=attributes,
8767 mask_targets=mask_targets,
8768 allow_additions=allow_additions,
8769 allow_deletions=allow_deletions,
8770 allow_label_edits=allow_label_edits,
8771 allow_index_edits=allow_index_edits,
8772 allow_spatial_edits=allow_spatial_edits,
8773 media_field=media_field,
8774 backend=backend,
8775 launch_editor=launch_editor,
8776 **kwargs,
8777 )
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/annotations.py:250, in annotate(samples, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
245 # Don't allow overwriting an existing run with same `anno_key`, since we
246 # need the existing run in order to perform workflows like automatically
247 # cleaning up the backend's tasks
248 anno_backend.register_run(samples, anno_key, overwrite=False)
--> 250 results = anno_backend.upload_annotations(
251 samples, anno_key, launch_editor=launch_editor
252 )
254 anno_backend.save_run_results(samples, anno_key, results)
256 return results
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:145, in LabelStudioBackend.upload_annotations(self, samples, anno_key, launch_editor)
142 api = self.connect_to_api()
144 logger.info("Uploading media to Label Studio...")
--> 145 results = api.upload_samples(samples, anno_key, self)
146 logger.info("Upload complete")
148 if launch_editor:
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:448, in LabelStudioAnnotationAPI.upload_samples(self, samples, anno_key, backend)
435 """Uploads the given samples to Label Studio according to the given
436 backend's annotation and server configuration.
437
(...)
444 a :class:`LabelStudioAnnotationResults`
445 """
446 config = backend.config
--> 448 project = self._init_project(config, samples)
450 samples.compute_metadata()
452 # @todo can we add support for uploading tasks in batches?
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/fiftyone/utils/labelstudio.py:232, in LabelStudioAnnotationAPI._init_project(self, config, samples)
227 # generate label config
228 label_config = generate_labeling_config(
229 label_schema, samples.media_type
230 )
--> 232 project = self._client.start_project(
233 title=project_name, label_config=label_config
234 )
235 return project
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/label_studio_sdk/client.py:240, in Client.start_project(self, **kwargs)
232 from .project import Project
234 project = Project(
235 url=self.url,
236 api_key=self.api_key,
237 session=self.session,
238 versions=self.versions,
239 )
--> 240 project.start_project(**kwargs)
241 return project
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/label_studio_sdk/project.py:438, in Project.start_project(self, **kwargs)
377 def start_project(self, **kwargs):
378 """Create a new labeling project in Label Studio.
379
380 Parameters
(...)
436
437 """
--> 438 response = self.make_request("POST", "/api/projects", json=kwargs)
439 if response.status_code == 201:
440 self.params = response.json()
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/label_studio_sdk/client.py:448, in Client.make_request(self, method, url, *args, **kwargs)
446 if response.status_code >= 400:
447 self.log_response_error(response)
--> 448 response.raise_for_status()
450 return response
File ~/.cache/pypoetry/virtualenvs/dac2024-gpu-CzXcmR37-py3.11/lib/python3.11/site-packages/requests/models.py:1024, in Response.raise_for_status(self)
1019 http_error_msg = (
1020 f"{self.status_code} Server Error: {reason} for url: {self.url}"
1021 )
1023 if http_error_msg:
-> 1024 raise HTTPError(http_error_msg, response=self)
HTTPError: 400 Client Error: Bad Request for url: http://localhost:8081/api/projects
Willingness to contribute
The FiftyOne Community encourages bug fix contributions. Would you or another
member of your organization be willing to contribute a fix for this bug to the
FiftyOne codebase?
[ ] Yes. I can contribute a fix for this bug independently
[x] Yes. I would be willing to contribute a fix for this bug with guidance
from the FiftyOne community
[ ] No. I cannot contribute a bug fix at this time
Describe the problem
Whenever I try to upload a large amount of images to a local label-studio instance (in my case I need to upload 10k images), I face two errors: A timeout error, and a validation error when a project with the same name already exists on LabelStudio (because trying to do another attempt)
Code to reproduce issue
System information
python --version
): Python 3.11fiftyone --version
): v0.23.8Other info/logs
Timeout Error
Validation Error when dataset already has a corresponding project in LabelStudio
Willingness to contribute
The FiftyOne Community encourages bug fix contributions. Would you or another member of your organization be willing to contribute a fix for this bug to the FiftyOne codebase?