voxel51 / fiftyone

Refine high-quality datasets and visual AI models
https://fiftyone.ai
Apache License 2.0
8.92k stars 567 forks source link

Unable to upload images to CVAT #3085

Open nsriniva03 opened 1 year ago

nsriniva03 commented 1 year ago

Describe the problem

Images are not being uploaded to CVAT. Instead I get the following error. There is nothing in the RQ queues.

Error info/logs

Computing metadata... 100% |█████████████████████| 1/1 [145.5ms elapsed, 0s remaining, 6.9 samples/s] Uploading samples to CVAT... Arguments the caused this error were: {'data': {'image_quality': 75, 'use_cache': True, 'use_zip_chunks': True}, 'files': {'client_files[0]': ('000000_0001eeaf4aed83f9.jpg', <_io.BufferedReader name='/home/nsrinivas/fiftyone/open-images-v6/validation/data/0001eeaf4aed83f9.jpg'>)}}

HTTPError Traceback (most recent call last) File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:6543, in CVATAnnotationAPI._validate(self, response, kwargs) 6542 try: -> 6543 response.raise_for_status() 6544 except:

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/requests/models.py:1021, in Response.raise_for_status(self) 1020 if http_error_msg: -> 1021 raise HTTPError(http_error_msg, response=self)

HTTPError: 500 Server Error: Internal Server Error for url: http://localhost:8080/api/tasks/6/data

During handling of the above exception, another exception occurred:

Exception Traceback (most recent call last) Cell In[4], line 4 1 anno_key = "segs_run_2" 3 # Upload the samples and launch CVAT ----> 4 anno_results = dataset.annotate( 5 anno_key, 6 label_field="segmentations", 7 label_type="instances", 8 classes=["person", "vehicle", "animal"], 9 launch_editor=False, 10 )

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/core/collections.py:8098, in SampleCollection.annotate(self, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, kwargs) 7969 def annotate( 7970 self, 7971 anno_key, (...) 7986 kwargs, 7987 ): 7988 """Exports the samples and optional label field(s) in this collection 7989 to the given annotation backend. 7990 (...) 8096 an :class:fiftyone.utils.annotations.AnnnotationResults 8097 """ -> 8098 return foua.annotate( 8099 self, 8100 anno_key, 8101 label_schema=label_schema, 8102 label_field=label_field, 8103 label_type=label_type, 8104 classes=classes, 8105 attributes=attributes, 8106 mask_targets=mask_targets, 8107 allow_additions=allow_additions, 8108 allow_deletions=allow_deletions, 8109 allow_label_edits=allow_label_edits, 8110 allow_index_edits=allow_index_edits, 8111 allow_spatial_edits=allow_spatial_edits, 8112 media_field=media_field, 8113 backend=backend, 8114 launch_editor=launch_editor, 8115 **kwargs, 8116 )

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/annotations.py:247, in annotate(samples, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs) 242 # Don't allow overwriting an existing run with same anno_key, since we 243 # need the existing run in order to perform workflows like automatically 244 # cleaning up the backend's tasks 245 anno_backend.register_run(samples, anno_key, overwrite=False) --> 247 results = anno_backend.upload_annotations( 248 samples, anno_key, launch_editor=launch_editor 249 ) 251 anno_backend.save_run_results(samples, anno_key, results) 253 return results

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:3249, in CVATBackend.upload_annotations(self, samples, anno_key, launch_editor) 3247 def upload_annotations(self, samples, anno_key, launch_editor=False): 3248 api = self.connect_to_api() -> 3249 results = api.upload_samples(samples, anno_key, self) 3251 if launch_editor: 3252 results.launch_editor()

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:4344, in CVATAnnotationAPI.upload_samples(self, samples, anno_key, backend) 4337 if num_batches > 1: 4338 taskname += f"{idx + 1}" 4340 ( 4341 task_id, 4342 class_id_map, 4343 attr_id_map, -> 4344 ) = self._create_task_upload_data( 4345 config, 4346 idx, 4347 task_name, 4348 cvat_schema, 4349 project_id, 4350 samples_batch, 4351 task_ids, 4352 job_ids, 4353 frame_id_map, 4354 ) 4356 for label_field in label_schema.keys(): 4357 labels_task_map[label_field].append(task_id)

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:5117, in CVATAnnotationAPI._create_task_upload_data(self, config, idx, task_name, cvat_schema, project_id, samples_batch, task_ids, job_ids, frame_id_map) 5114 task_ids.append(task_id) 5116 # Upload media -> 5117 job_ids[task_id] = self.upload_data( 5118 task_id, 5119 samples_batch.values(media_field), 5120 image_quality=image_quality, 5121 use_cache=use_cache, 5122 use_zip_chunks=use_zip_chunks, 5123 chunk_size=chunk_size, 5124 job_assignees=_job_assignees, 5125 job_reviewers=_job_reviewers, 5126 ) 5127 self._verify_uploaded_frames(task_id, samples_batch) 5128 frame_id_map[task_id] = self._build_frame_id_map(samples_batch)

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:4151, in CVATAnnotationAPI.upload_data(self, task_id, paths, image_quality, use_cache, use_zip_chunks, chunk_size, job_assignees, job_reviewers) 4148 open_files.append(open_file) 4150 try: -> 4151 self.post(self.task_data_url(task_id), data=data, files=files) 4152 finally: 4153 for f in open_files:

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:3744, in CVATAnnotationAPI.post(self, url, kwargs) 3734 def post(self, url, kwargs): 3735 """Sends a POST request to the given CVAT API URL. 3736 3737 Args: (...) 3742 the request response 3743 """ -> 3744 return self._make_request(self._session.post, url, **kwargs)

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:3704, in CVATAnnotationAPI._make_request(self, request_method, url, print_error_info, kwargs) 3702 response = request_method(url, verify=False, kwargs) 3703 if print_error_info: -> 3704 self._validate(response, kwargs) 3705 else: 3706 response.raise_for_status()

File ~/anaconda3/envs/fiftyone/lib/python3.8/site-packages/fiftyone/utils/cvat.py:6548, in CVATAnnotationAPI._validate(self, response, kwargs) 6546 logger.info("Arguments the caused this error were:") 6547 logger.info(kwargs) -> 6548 raise Exception( 6549 "%d error for request %s to url %s with the reason %s. Error " 6550 "content: %s" 6551 % ( 6552 d["status_code"], 6553 d["request"], 6554 d["url"], 6555 d["reason"], 6556 d["_content"], 6557 ) 6558 )

Exception: 500 error for request <PreparedRequest [POST]> to url http://localhost:8080/api/tasks/6/data with the reason Internal Server Error. "Error content: b'\n<!doctype html>\n<html lang="en">\n<head>\n <title>Server Error (500)</title>\n</head>\n<body>\n <h1>Server Error (500)</h1><p></p>\n</body>\n</html>\n'"

I have not face this issue with fitftyone version 0.16.0 but am facing it with fiftyone version 0.20.1

System information

Commands to reproduce

import fiftyone as fo
import fiftyone.zoo as foz

dataset = foz.load_zoo_dataset(
    "open-images-v6",
    split="validation",
    label_types=[],
    max_samples=1,
)

anno_key = "segs_run_2"

# Upload the samples and launch CVAT
anno_results = dataset.annotate(
    anno_key,
    label_field="segmentations",
    label_type="instances",
    classes=["person", "vehicle", "animal"],
    launch_editor=True,
)

Other info/logs

Include any logs or source code that would be helpful to diagnose the problem. If including tracebacks, please include the full traceback. Large logs and files should be attached. Please do not use screenshots for sharing text. Code snippets should be used instead when providing tracebacks, logs, etc.

What areas of FiftyOne does this bug affect?

Willingness to contribute

The FiftyOne Community encourages bug fix contributions. Would you or another member of your organization be willing to contribute a fix for this bug to the FiftyOne codebase?

nsriniva03 commented 1 year ago

CVAT server logs:

[Fri May 19 15:15:31.041672 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] ERROR - 2023-05-19 15:15:31,041 - log - Internal Server Error: /api/tasks/3/data [Fri May 19 15:15:31.041676 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] Traceback (most recent call last): [Fri May 19 15:15:31.041678 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/django/core/handlers/exception.py", line 47, in inner [Fri May 19 15:15:31.041680 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] response = get_response(request) [Fri May 19 15:15:31.041681 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/django/core/handlers/base.py", line 181, in _get_response [Fri May 19 15:15:31.041683 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] response = wrapped_callback(request, *callback_args, **callback_kwargs) [Fri May 19 15:15:31.041685 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/django/views/decorators/csrf.py", line 54, in wrapped_view [Fri May 19 15:15:31.041687 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] return view_func(*args, **kwargs) [Fri May 19 15:15:31.041688 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/rest_framework/viewsets.py", line 125, in view [Fri May 19 15:15:31.041690 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] return self.dispatch(request, *args, **kwargs) [Fri May 19 15:15:31.041692 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/rest_framework/views.py", line 509, in dispatch [Fri May 19 15:15:31.041694 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] response = self.handle_exception(exc) [Fri May 19 15:15:31.041695 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/rest_framework/views.py", line 469, in handle_exception [Fri May 19 15:15:31.041697 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] self.raise_uncaught_exception(exc) [Fri May 19 15:15:31.041699 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/rest_framework/views.py", line 480, in raise_uncaught_exception [Fri May 19 15:15:31.041700 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] raise exc [Fri May 19 15:15:31.041702 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/opt/venv/lib/python3.8/site-packages/rest_framework/views.py", line 506, in dispatch [Fri May 19 15:15:31.041704 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] response = handler(request, *args, **kwargs) [Fri May 19 15:15:31.041705 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] File "/home/django/cvat/apps/engine/views.py", line 778, in data [Fri May 19 15:15:31.041707 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] return data_getter(request, self._object.data.start_frame, [Fri May 19 15:15:31.041709 2023] [wsgi:error] [pid 473:tid 140409627248384] [remote 172.23.0.3:52344] AttributeError: 'NoneType' object has no attribute 'start_frame'

johankrivez commented 4 months ago

+1, I have the same issue

johankrivez commented 4 months ago

Looks like it works fine on packs of 1-30 images, but bigger packs are likely to cause the error:

In [21]: view = dataset.filter_labels('classifications', F('label') == 'To Be Done').limit(30)
In [22]: view.annotate('test_1_7', label_field='classifications')
Uploading samples to CVAT...
Out[22]: <fiftyone.utils.cvat.CVATAnnotationResults at 0x7ff343219000>
In [23]: view = dataset.filter_labels('classifications', F('label') == 'To Be Done').limit(50)
In [24]: view.annotate('test_1_8', label_field='classifications')
Uploading samples to CVAT...

HTTPError: 500 Server Error: Internal Server Error for url: http://cvat-server:8080/api/tasks/22/data

During handling of the above exception, another exception occurred:

Exception                                 Traceback (most recent call last)
Cell In[24], line 1
----> 1 view.annotate('test_1_8', label_field='classifications')

File /usr/local/lib/python3.10/dist-packages/fiftyone/core/collections.py:8810, in SampleCollection.annotate(self, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
   8680 def annotate(
   8681     self,
   8682     anno_key,
   (...)
   8697     **kwargs,
   8698 ):
   8699     """Exports the samples and optional label field(s) in this collection
   8700     to the given annotation backend.
   8701
   (...)
   8808         an :class:`fiftyone.utils.annotations.AnnnotationResults`
   8809     """
-> 8810     return foua.annotate(
   8811         self,
   8812         anno_key,
   8813         label_schema=label_schema,
   8814         label_field=label_field,
   8815         label_type=label_type,
   8816         classes=classes,
   8817         attributes=attributes,
   8818         mask_targets=mask_targets,
   8819         allow_additions=allow_additions,
   8820         allow_deletions=allow_deletions,
   8821         allow_label_edits=allow_label_edits,
   8822         allow_index_edits=allow_index_edits,
   8823         allow_spatial_edits=allow_spatial_edits,
   8824         media_field=media_field,
   8825         backend=backend,
   8826         launch_editor=launch_editor,
   8827         **kwargs,
   8828     )

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/annotations.py:250, in annotate(samples, anno_key, label_schema, label_field, label_type, classes, attributes, mask_targets, allow_additions, allow_deletions, allow_label_edits, allow_index_edits, allow_spatial_edits, media_field, backend, launch_editor, **kwargs)
    245 # Don't allow overwriting an existing run with same `anno_key`, since we
    246 # need the existing run in order to perform workflows like automatically
    247 # cleaning up the backend's tasks
    248 anno_backend.register_run(samples, anno_key, overwrite=False)
--> 250 results = anno_backend.upload_annotations(
    251     samples, anno_key, launch_editor=launch_editor
    252 )
    254 anno_backend.save_run_results(samples, anno_key, results)
    256 return results

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:3309, in CVATBackend.upload_annotations(self, samples, anno_key, launch_editor)
   3307 def upload_annotations(self, samples, anno_key, launch_editor=False):
   3308     api = self.connect_to_api()
-> 3309     results = api.upload_samples(samples, anno_key, self)
   3311     if launch_editor:
   3312         results.launch_editor()

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:4520, in CVATAnnotationAPI.upload_samples(self, samples, anno_key, backend)
   4513 if num_batches > 1:
   4514     task_name += f"_{idx + 1}"
   4516 (
   4517     task_id,
   4518     class_id_map,
   4519     attr_id_map,
-> 4520 ) = self._create_task_upload_data(
   4521     config,
   4522     idx,
   4523     task_name,
   4524     cvat_schema,
   4525     project_id,
   4526     samples_batch,
   4527     task_ids,
   4528     job_ids,
   4529     frame_id_map,
   4530     _frame_start,
   4531     _frame_stop,
   4532     _frame_step,
   4533 )
   4535 for label_field in label_schema.keys():
   4536     labels_task_map[label_field].append(task_id)

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:5382, in CVATAnnotationAPI._create_task_upload_data(self, config, idx, task_name, cvat_schema, project_id, samples_batch, task_ids, job_ids, frame_id_map, frame_start, frame_stop, frame_step)
   5379 task_ids.append(task_id)
   5381 # Upload media
-> 5382 job_ids[task_id] = self.upload_data(
   5383     task_id,
   5384     samples_batch.values(media_field),
   5385     image_quality=image_quality,
   5386     use_cache=use_cache,
   5387     use_zip_chunks=use_zip_chunks,
   5388     chunk_size=chunk_size,
   5389     job_assignees=_job_assignees,
   5390     job_reviewers=_job_reviewers,
   5391     frame_start=frame_start,
   5392     frame_stop=frame_stop,
   5393     frame_step=frame_step,
   5394 )
   5396 self._verify_uploaded_frames(
   5397     task_id, samples_batch, frame_start, frame_stop, frame_step
   5398 )
   5400 frame_id_map[task_id] = self._build_frame_id_map(samples_batch)

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:4262, in CVATAnnotationAPI.upload_data(self, task_id, paths, image_quality, use_cache, use_zip_chunks, chunk_size, job_assignees, job_reviewers, frame_start, frame_stop, frame_step)
   4260     self.post(self.task_data_url(task_id), data=data, files=files)
   4261 except Exception as e:
-> 4262     raise e
   4263 finally:
   4264     for f in open_files:

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:4260, in CVATAnnotationAPI.upload_data(self, task_id, paths, image_quality, use_cache, use_zip_chunks, chunk_size, job_assignees, job_reviewers, frame_start, frame_stop, frame_step)
   4257     data["sorting_method"] = "predefined"
   4259 try:
-> 4260     self.post(self.task_data_url(task_id), data=data, files=files)
   4261 except Exception as e:
   4262     raise e

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:3839, in CVATAnnotationAPI.post(self, url, **kwargs)
   3829 def post(self, url, **kwargs):
   3830     """Sends a POST request to the given CVAT API URL.
   3831
   3832     Args:
   (...)
   3837         the request response
   3838     """
-> 3839     return self._make_request(self._session.post, url, **kwargs)

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:3799, in CVATAnnotationAPI._make_request(self, request_method, url, print_error_info, **kwargs)
   3797 response = request_method(url, verify=False, **kwargs)
   3798 if print_error_info:
-> 3799     self._validate(response, kwargs)
   3800 else:
   3801     response.raise_for_status()

File /usr/local/lib/python3.10/dist-packages/fiftyone/utils/cvat.py:6863, in CVATAnnotationAPI._validate(self, response, kwargs)
   6861 logger.info("Arguments the caused this error were:")
   6862 logger.info(kwargs)
-> 6863 raise Exception(
   6864     "%d error for request %s to url %s with the reason %s. Error "
   6865     "content: %s"
   6866     % (
   6867         d["status_code"],
   6868         d["request"],
   6869         d["url"],
   6870         d["reason"],
   6871         d["_content"],
   6872     )
   6873 )

Exception: 500 error for request <PreparedRequest [POST]> to url http://cvat-server:8080/api/tasks/22/data with the reason Internal Server Error. Error content: b'\n<!doctype html>\n<html lang="en">\n<head>\n  <title>Server Error (500)</title>\n</head>\n<body>\n  <h1>Server Error (500)</h1><p></p>\n</body>\n</html>\n'
johankrivez commented 4 months ago

Okay, I've found the following error on CVAT logs:

django.db.utils.IntegrityError: duplicate key value violates unique constraint "engine_clientfile_data_id_file_c9989a74_uniq"
DETAIL:  Key (data_id, file)=(23, /home/django/data/data/23/raw/page-27.jpg) already exists.

Looks like it happens in case there's two (or more) same filenames in one task. I work with documents, so my images are stored like /fiftyone/loan_documents/{loan_id}/JohnDoeCreditPackage.pdf/page-1.jpg. Is it principal point to keep original filenames in tasks? Maybe it would be better to use f'{sample_id}.jpg' instead?