ocean-data-factory-sweden / kso

Notebooks to upload/download marine footage, connect to a citizen science project, train machine learning models and publish marine biological observations.
GNU General Public License v3.0
4 stars 12 forks source link

Tutorial 3 upload clips issue #238

Closed Bergylta closed 10 months ago

Bergylta commented 10 months ago

🐛 Bug

A clear and concise description of what the bug is.

To Reproduce (REQUIRED)

Input:

pp.upload_zoo_subjects("clip")

Output:

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[14], line 1
----> 1 pp.upload_zoo_subjects("clip")

File /usr/src/app/kso/kso_utils/kso_utils/project.py:692, in ProjectProcessor.upload_zoo_subjects(self, subject_type)
    684 """
    685 This function uploads clips or frames to Zooniverse, depending on the subject_type argument
    686 
   (...)
    689 :type subject_type: str
    690 """
    691 if subject_type == "clip":
--> 692     upload_df, sitename, created_on = zoo_utils.set_zoo_clip_metadata(
    693         project=self.project,
    694         generated_clipsdf=self.generated_clips,
    695         sitesdf=self.local_sites_csv,
    696         moviesdf=self.local_movies_csv,
    697     )
    698     zoo_utils.upload_clips_to_zooniverse(
    699         project=self.project,
    700         upload_to_zoo=upload_df,
    701         sitename=sitename,
    702         created_on=created_on,
    703     )
    704     # Clean up subjects after upload

File /usr/src/app/kso/kso_utils/kso_utils/zooniverse_utils.py:1303, in set_zoo_clip_metadata(project, generated_clipsdf, sitesdf, moviesdf)
   1301 # Combine site info to the generated_clips df
   1302 if "site_id" in generated_clipsdf.columns:
-> 1303     upload_to_zoo = generated_clipsdf.merge(sitesdf, on="site_id")
   1304     sitename = upload_to_zoo["#siteName"].unique()[0]
   1305 else:

File /usr/local/lib/python3.8/dist-packages/pandas/core/frame.py:9329, in DataFrame.merge(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
   9310 @Substitution("")
   9311 @Appender(_merge_doc, indents=2)
   9312 def merge(
   (...)
   9325     validate: str | None = None,
   9326 ) -> DataFrame:
   9327     from pandas.core.reshape.merge import merge
-> 9329     return merge(
   9330         self,
   9331         right,
   9332         how=how,
   9333         on=on,
   9334         left_on=left_on,
   9335         right_on=right_on,
   9336         left_index=left_index,
   9337         right_index=right_index,
   9338         sort=sort,
   9339         suffixes=suffixes,
   9340         copy=copy,
   9341         indicator=indicator,
   9342         validate=validate,
   9343     )

File /usr/local/lib/python3.8/dist-packages/pandas/core/reshape/merge.py:107, in merge(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)
     90 @Substitution("\nleft : DataFrame or named Series")
     91 @Appender(_merge_doc, indents=0)
     92 def merge(
   (...)
    105     validate: str | None = None,
    106 ) -> DataFrame:
--> 107     op = _MergeOperation(
    108         left,
    109         right,
    110         how=how,
    111         on=on,
    112         left_on=left_on,
    113         right_on=right_on,
    114         left_index=left_index,
    115         right_index=right_index,
    116         sort=sort,
    117         suffixes=suffixes,
    118         copy=copy,
    119         indicator=indicator,
    120         validate=validate,
    121     )
    122     return op.get_result()

File /usr/local/lib/python3.8/dist-packages/pandas/core/reshape/merge.py:704, in _MergeOperation.__init__(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)
    696 (
    697     self.left_join_keys,
    698     self.right_join_keys,
    699     self.join_names,
    700 ) = self._get_merge_keys()
    702 # validate the merge keys dtypes. We may need to coerce
    703 # to avoid incompatible dtypes
--> 704 self._maybe_coerce_merge_keys()
    706 # If argument passed to validate,
    707 # check if columns specified as unique
    708 # are in fact unique.
    709 if validate is not None:

File /usr/local/lib/python3.8/dist-packages/pandas/core/reshape/merge.py:1257, in _MergeOperation._maybe_coerce_merge_keys(self)
   1251     # unless we are merging non-string-like with string-like
   1252     elif (
   1253         inferred_left in string_types and inferred_right not in string_types
   1254     ) or (
   1255         inferred_right in string_types and inferred_left not in string_types
   1256     ):
-> 1257         raise ValueError(msg)
   1259 # datetimelikes must match exactly
   1260 elif needs_i8_conversion(lk.dtype) and not needs_i8_conversion(rk.dtype):

ValueError: You are trying to merge on object and int64 columns. If you wish to proceed you should use pd.concat

Expected behavior

Uploading of created clips, error might be due to large file size?

Environment

image image image image

jannesgg commented 10 months ago

Solved by commit 73cfc30