SauceCat / pydqc

python automatic data quality check toolkit
MIT License
285 stars 57 forks source link

ValueError: Cannot convert -#.##### to Excel #11

Open quantabox opened 5 years ago

quantabox commented 5 years ago

Cannot convert -#.##### to Excel

Here is an error log trying to extract data summary - seems like the spreadsheet writer is unable to fill float values

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-35-b07dddd4375d> in <module>()
      1 get_ipython().magic(u'time')
      2 data_summary.data_summary(table_schema=df_schema, table=df, fname='csv', 
----> 3                           sample_size=1.0, output_root='dq/', keep_images=True, n_jobs=2)

<path>/lib/python2.7/site-packages/pydqc/data_summary.pyc in data_summary(table_schema, table, fname, sample_size, sample_rows, output_root, keep_images, n_jobs)
    466         ws = wb.create_sheet(title=u'numeric')
    467         # write the final result to work sheet
--> 468         _insert_numeric_results(numeric_results, ws, 35, img_dir)
    469 
    470     # string features

<path>/lib/python2.7/site-packages/pydqc/dqc_utils.pyc in _insert_numeric_results(numeric_results, ws, row_height, img_dir, date_flag)
    253         result_df = result['result_df']
    254         result_df = result_df[['feature', 'value', 'graph']]
--> 255         head_row = _insert_df(result_df, ws)
    256 
    257         # merge cells for the graph

/<path>/lib/python2.7/site-packages/pydqc/dqc_utils.pyc in _insert_df(result_df, ws, header, head_color, bold_first_column, head_style)
    202     max_col = result_df.shape[1]
    203     for r_idx, r in enumerate(dataframe_to_rows(result_df, index=False, header=header)):
--> 204         ws.append(r)
    205         for cell_idx, cell in enumerate(ws.iter_cols(max_col=max_col, min_row=ws.max_row, max_row=ws.max_row)):
    206             cell = cell[0]

<path>/lib/python2.7/site-packages/openpyxl/worksheet/worksheet.pyc in append(self, iterable)
    652                     cell.row = row_idx
    653                 else:
--> 654                     cell = Cell(self, row=row_idx, column=col_idx, value=content)
    655                 self._cells[(row_idx, col_idx)] = cell
    656 

<path>/lib/python2.7/site-packages/openpyxl/cell/cell.pyc in __init__(self, worksheet, row, column, value, style_array)
    118         self.data_type = 'n'
    119         if value is not None:
--> 120             self.value = value
    121         self._comment = None
    122 

<path>/lib/python2.7/site-packages/openpyxl/cell/cell.pyc in value(self, value)
    250     def value(self, value):
    251         """Set the value and infer type and display options."""
--> 252         self._bind_value(value)
    253 
    254     @property

<path>/lib/python2.7/site-packages/openpyxl/cell/cell.pyc in _bind_value(self, value)
    216 
    217         elif value is not None:
--> 218             raise ValueError("Cannot convert {0!r} to Excel".format(value))
    219 
    220         self._value = value

ValueError: Cannot convert -0.625969839 to Excel
npv3s commented 4 years ago

I have the same, but it was because this float was from sympy, so i have to convert it just to float()