Open bcpage opened 4 weeks ago
If you remove
df.index = pd.to_datetime(df.index) it is able to display the buckaroo rendered table.
Thanks for the Bug Report! Sorry I missed this initially. currently digging into a fix.
This is a JSON serialization issue
buckaroo.BuckarooWidget(df)
gives the following stack trace
---------------------------------------------------------------------------
AttributeError Traceback (most recent call last)
Cell In[6], line 19
13 df.index = pd.to_datetime(df.index)
15 # remove localize
16 #df.index = df.index.tz_localize(None)
17
18 # Display df
---> 19 buckaroo.BuckarooWidget(df)
File ~/buckaroo/buckaroo/dataflow/dataflow.py:211, in CustomizableDataflow.__init__(self, df, debug, column_config_overrides, pinned_rows, extra_grid_config, component_config)
209 self.df_display_args = {}
210 self.setup_options_from_analysis()
--> 211 super().__init__(self.sampling_klass.pre_stats_sample(df))
213 self.populate_df_meta()
214 #self.raw_df = df
File ~/buckaroo/buckaroo/dataflow/dataflow.py:41, in DataFlow.__init__(self, raw_df)
39 self.raw_df = raw_df
40 except Exception:
---> 41 six.reraise(self.exception[0], self.exception[1], self.exception[2])
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/six.py:718, in reraise(tp, value, tb)
716 value = tp()
717 if value.__traceback__ is not tb:
--> 718 raise value.with_traceback(tb)
719 raise value
720 finally:
File ~/buckaroo/buckaroo/dataflow/dataflow_extras.py:93, in exception_protect.<locals>.wrapped_decorator.<locals>.wrapped(self, *args, **kwargs)
91 def wrapped(self, *args, **kwargs):
92 try:
---> 93 func(self, *args, **kwargs)
94 except Exception:
95 #sometimes useful for debugging tricky call order stuff
96 # if protect_name:
97 # print("protect handler", protect_name, self.exception)
98 if self.exception is None:
File ~/buckaroo/buckaroo/dataflow/dataflow.py:179, in DataFlow._widget_config(self, change)
173 @observe('merged_sd', 'style_method')
174 @exception_protect('widget_config-protector')
175 def _widget_config(self, change):
176 #how to control ordering of column_config???
177 # dfviewer_config = self._get_dfviewer_config(self.merged_sd, self.style_method)
178 # self.widget_args_tuple = [self.processed_df, self.merged_sd, dfviewer_config]
--> 179 self.widget_args_tuple = (id(self.processed_df), self.processed_df, self.merged_sd)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/traitlets/traitlets.py:716, in TraitType.__set__(self, obj, value)
714 if self.read_only:
715 raise TraitError('The "%s" trait is read-only.' % self.name)
--> 716 self.set(obj, value)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/traitlets/traitlets.py:706, in TraitType.set(self, obj, value)
702 silent = False
703 if silent is not True:
704 # we explicitly compare silent to True just in case the equality
705 # comparison above returns something other than True/False
--> 706 obj._notify_trait(self.name, old_value, new_value)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/traitlets/traitlets.py:1513, in HasTraits._notify_trait(self, name, old_value, new_value)
1512 def _notify_trait(self, name: str, old_value: t.Any, new_value: t.Any) -> None:
-> 1513 self.notify_change(
1514 Bunch(
1515 name=name,
1516 old=old_value,
1517 new=new_value,
1518 owner=self,
1519 type="change",
1520 )
1521 )
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/ipywidgets/widgets/widget.py:701, in Widget.notify_change(self, change)
698 if name in self.keys and self._should_send_property(name, getattr(self, name)):
699 # Send new state to front-end
700 self.send_state(key=name)
--> 701 super().notify_change(change)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/traitlets/traitlets.py:1525, in HasTraits.notify_change(self, change)
1523 def notify_change(self, change: Bunch) -> None:
1524 """Notify observers of a change event"""
-> 1525 return self._notify_observers(change)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/traitlets/traitlets.py:1568, in HasTraits._notify_observers(self, event)
1565 elif isinstance(c, EventHandler) and c.name is not None:
1566 c = getattr(self, c.name)
-> 1568 c(event)
File ~/buckaroo/buckaroo/dataflow/dataflow.py:335, in CustomizableDataflow._handle_widget_change(self, change)
325 return
327 # df_data_dict is still hardcoded for now
328 # eventually processed_df will be able to add or alter values of df_data_dict
329 # correlation would be added, filtered would probably be altered
(...)
332 # postprcoessing could default to empty until that is
333 # selected, optionally
--> 335 self.df_data_dict = {'main': self._df_to_obj(processed_df),
336 'all_stats': self._sd_to_jsondf(merged_sd),
337 'empty': []}
339 temp_display_args = {}
340 for display_name, A_Klass in self.df_display_klasses.items():
File ~/buckaroo/buckaroo/dataflow/dataflow.py:314, in CustomizableDataflow._df_to_obj(self, df)
313 def _df_to_obj(self, df:pd.DataFrame):
--> 314 return pd_to_obj(self.sampling_klass.serialize_sample(df))
File ~/buckaroo/buckaroo/serialization_utils.py:87, in pd_to_obj(df)
86 def pd_to_obj(df:pd.DataFrame):
---> 87 obj = json.loads(df.to_json(orient='table', indent=2, default_handler=str))
89 if isinstance(df.index, pd.MultiIndex):
90 old_index = df.index
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/util/_decorators.py:333, in deprecate_nonkeyword_arguments.<locals>.decorate.<locals>.wrapper(*args, **kwargs)
327 if len(args) > num_allow_args:
328 warnings.warn(
329 msg.format(arguments=_format_argument_list(allow_args)),
330 FutureWarning,
331 stacklevel=find_stack_level(),
332 )
--> 333 return func(*args, **kwargs)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/core/generic.py:2702, in NDFrame.to_json(self, path_or_buf, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options, mode)
2699 config.is_nonnegative_int(indent)
2700 indent = indent or 0
-> 2702 return json.to_json(
2703 path_or_buf=path_or_buf,
2704 obj=self,
2705 orient=orient,
2706 date_format=date_format,
2707 double_precision=double_precision,
2708 force_ascii=force_ascii,
2709 date_unit=date_unit,
2710 default_handler=default_handler,
2711 lines=lines,
2712 compression=compression,
2713 index=index,
2714 indent=indent,
2715 storage_options=storage_options,
2716 mode=mode,
2717 )
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/io/json/_json.py:200, in to_json(path_or_buf, obj, orient, date_format, double_precision, force_ascii, date_unit, default_handler, lines, compression, index, indent, storage_options, mode)
197 else:
198 raise NotImplementedError("'obj' should be a Series or a DataFrame")
--> 200 s = writer(
201 obj,
202 orient=orient,
203 date_format=date_format,
204 double_precision=double_precision,
205 ensure_ascii=force_ascii,
206 date_unit=date_unit,
207 default_handler=default_handler,
208 index=index,
209 indent=indent,
210 ).write()
212 if lines:
213 s = convert_to_line_delimits(s)
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/io/json/_json.py:366, in JSONTableWriter.__init__(self, obj, orient, date_format, double_precision, ensure_ascii, date_unit, index, default_handler, indent)
359 msg = (
360 "Trying to write with `orient='table'` and "
361 f"`date_format='{date_format}'`. Table Schema requires dates "
362 "to be formatted with `date_format='iso'`"
363 )
364 raise ValueError(msg)
--> 366 self.schema = build_table_schema(obj, index=self.index)
368 # NotImplemented on a column MultiIndex
369 if obj.ndim == 2 and isinstance(obj.columns, MultiIndex):
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/io/json/_table_schema.py:304, in build_table_schema(data, index, primary_key, version)
302 fields.append(new_field)
303 else:
--> 304 fields.append(convert_pandas_type_to_json_field(data.index))
306 if data.ndim > 1:
307 for column, s in data.items():
File ~/anaconda3/envs/buckaroo-0.7-test/lib/python3.9/site-packages/pandas/io/json/_table_schema.py:151, in convert_pandas_type_to_json_field(arr)
148 field["tz"] = "UTC"
149 else:
150 # error: "tzinfo" has no attribute "zone"
--> 151 field["tz"] = dtype.tz.zone # type: ignore[attr-defined]
152 elif isinstance(dtype, ExtensionDtype):
153 field["extDtype"] = dtype.name
AttributeError: 'datetime.timezone' object has no attribute 'zone'
I'm digging in and figuring out a fix.
So this is a core bug in pandas it comes up when calling df.to_json(oreint=table)
I could special case datetime index handling, and will probably have to eventually because there's a lot with datetimes.
Long term, this specific bug will be solved by moving away from json and to arrow serialization. I'm not sure when I will implement that feature but it's one of the next big ones.
Is this bug critical for your workflow? I can work out some type of solution for you if it is.
Checks
How would you categorize this request. You can select multiple if not sure
Performance
Enhancement Description
When trying to display a table that contains a index with datetime and time zone information buckaroo style table does not display. Table reverts back to pd style table. The reason for this happening is not clear to the user.
Suggestions for improvement
Pseudo Code Implementation
Prior Art
n/a