Closed Bergylta closed 9 months ago
A clear and concise description of what the bug is.
Input:
pp.upload_zoo_subjects("frame")
Output:
--------------------------------------------------------------------------- IntCastingNaNError Traceback (most recent call last) Cell In[19], line 1 ----> 1 pp.upload_zoo_subjects("frame") File /usr/src/app/kso-dev/kso_utils/kso_utils/project.py:670, in ProjectProcessor.upload_zoo_subjects(self, subject_type) 668 elif subject_type == "frame": 669 species_list = [] --> 670 upload_df = zoo_utils.set_zoo_frame_metadata( 671 project=self.project, 672 db_connection=self.db_connection, 673 df=self.generated_frames, 674 species_list=self.species_of_interest, 675 csv_paths=self.csv_paths, 676 ) 677 zoo_utils.upload_frames_to_zooniverse( 678 project=self.project, 679 upload_to_zoo=upload_df, 680 species_list=self.species_of_interest, 681 ) 683 else: File /usr/src/app/kso-dev/kso_utils/kso_utils/zooniverse_utils.py:1818, in set_zoo_frame_metadata(project, db_connection, df, species_list, csv_paths) 1816 # Set project-specific metadata 1817 if project.Zooniverse_number == 9747: -> 1818 df = add_db_info_to_df( 1819 project, db_connection, csv_paths, df, "sites", "id, siteName" 1820 ) 1821 upload_to_zoo = df[ 1822 [ 1823 "frame_path", (...) 1829 ] 1830 ] 1832 elif project_name == "SGU": File /usr/src/app/kso-dev/kso_utils/kso_utils/db_utils.py:509, in add_db_info_to_df(project, conn, csv_paths, df, table_name, cols_interest) 507 # Ensure id columns that are going to be used to merge are int 508 if "id" in left_on_col: --> 509 df[left_on_col] = df[left_on_col].astype(float).astype(int) 511 # Combine the original and sqldf dfs 512 comb_df = pd.merge( 513 df, sql_df, how="left", left_on=left_on_col, right_on=right_on_col 514 ) File /usr/local/lib/python3.8/dist-packages/pandas/core/generic.py:5920, in NDFrame.astype(self, dtype, copy, errors) 5913 results = [ 5914 self.iloc[:, i].astype(dtype, copy=copy) 5915 for i in range(len(self.columns)) 5916 ] 5918 else: 5919 # else, only a single dtype is given -> 5920 new_data = self._mgr.astype(dtype=dtype, copy=copy, errors=errors) 5921 return self._constructor(new_data).__finalize__(self, method="astype") 5923 # GH 33113: handle empty frame or series File /usr/local/lib/python3.8/dist-packages/pandas/core/internals/managers.py:419, in BaseBlockManager.astype(self, dtype, copy, errors) 418 def astype(self: T, dtype, copy: bool = False, errors: str = "raise") -> T: --> 419 return self.apply("astype", dtype=dtype, copy=copy, errors=errors) File /usr/local/lib/python3.8/dist-packages/pandas/core/internals/managers.py:304, in BaseBlockManager.apply(self, f, align_keys, ignore_failures, **kwargs) 302 applied = b.apply(f, **kwargs) 303 else: --> 304 applied = getattr(b, f)(**kwargs) 305 except (TypeError, NotImplementedError): 306 if not ignore_failures: File /usr/local/lib/python3.8/dist-packages/pandas/core/internals/blocks.py:582, in Block.astype(self, dtype, copy, errors) 564 """ 565 Coerce to the new dtype. 566 (...) 578 Block 579 """ 580 values = self.values --> 582 new_values = astype_array_safe(values, dtype, copy=copy, errors=errors) 584 new_values = maybe_coerce_values(new_values) 585 newb = self.make_block(new_values) File /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/cast.py:1292, in astype_array_safe(values, dtype, copy, errors) 1289 dtype = dtype.numpy_dtype 1291 try: -> 1292 new_values = astype_array(values, dtype, copy=copy) 1293 except (ValueError, TypeError): 1294 # e.g. astype_nansafe can fail on object-dtype of strings 1295 # trying to convert to float 1296 if errors == "ignore": File /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/cast.py:1237, in astype_array(values, dtype, copy) 1234 values = values.astype(dtype, copy=copy) 1236 else: -> 1237 values = astype_nansafe(values, dtype, copy=copy) 1239 # in pandas we don't store numpy str dtypes, so convert to object 1240 if isinstance(dtype, np.dtype) and issubclass(values.dtype.type, str): File /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/cast.py:1148, in astype_nansafe(arr, dtype, copy, skipna) 1145 raise TypeError(f"cannot astype a timedelta from [{arr.dtype}] to [{dtype}]") 1147 elif np.issubdtype(arr.dtype, np.floating) and np.issubdtype(dtype, np.integer): -> 1148 return astype_float_to_int_nansafe(arr, dtype, copy) 1150 elif is_object_dtype(arr.dtype): 1151 1152 # work around NumPy brokenness, #1987 1153 if np.issubdtype(dtype.type, np.integer): File /usr/local/lib/python3.8/dist-packages/pandas/core/dtypes/cast.py:1193, in astype_float_to_int_nansafe(values, dtype, copy) 1189 """ 1190 astype with a check preventing converting NaN to an meaningless integer value. 1191 """ 1192 if not np.isfinite(values).all(): -> 1193 raise IntCastingNaNError( 1194 "Cannot convert non-finite values (NA or inf) to integer" 1195 ) 1196 return values.astype(dtype, copy=copy) IntCastingNaNError: Cannot convert non-finite values (NA or inf) to integer
The expected behaviour is that the frames will be uploaded to zooniverse via this block of code.
@Bergylta What is the project you are testing this with?
@victor-wildlife This would be in the KSO project, trying to upload extracted frames of file clams to a new subject set
🐛 Bug
A clear and concise description of what the bug is.
To Reproduce (REQUIRED)
Input:
Output:
Expected behavior
The expected behaviour is that the frames will be uploaded to zooniverse via this block of code.