vega / altair

Declarative statistical visualization library for Python
https://altair-viz.github.io/
BSD 3-Clause "New" or "Revised" License
9.26k stars 793 forks source link

TypeError: FacetedEncoding.init() got multiple values for argument 'self' #3634

Open gaspardc-met opened 2 days ago

gaspardc-met commented 2 days ago

What happened?

Using a complex altair chart in streamlit, worked in 5.3.0, and broke with 5.4.0 and then 5.4.1. It seems to be an altair issue: TypeError: FacetedEncoding.init() got multiple values for argument 'self'

I tried to reproduce the error with the plot code and anonymized example data. The error didn't show up neither with altair alone nor streamlit+altair.

However, it might give you an idea of the plot I use. The error happens in the first call to .encode in the fonction .mark_bar.encode() Image

Long Code Block

```python import functools import altair as alt import numpy as np import pandas as pd import streamlit as st # Mocked dependencies and constants CMAP_BINS = [0, 0.25, 0.5, 0.75, 1.0] CMAP_COLORS = ["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"] def custom_blues(): bins = [0, 0.25, 0.5, 0.75, 1.0] colors = ["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"] return bins, colors def chronogram_legend(target="load", pump_toggle=False): legend = "Load Legend" short_legend = "Load" extra = None return legend, short_legend, extra def chronogram_processing(chronogram, timedelta="60min", filter_load=True): data = chronogram.copy() data["start_time"] = data.index data["end_time"] = data.index + pd.Timedelta(timedelta) return data.reset_index(drop=True) def get_machines_starts_and_stops(chronogram, timedelta="60min", separator_dt=None): starts_and_stops = alt.Chart(pd.DataFrame({"x": [], "y": []})) starts_and_stops_texts = alt.Chart(pd.DataFrame({"x": [], "y": []})) return starts_and_stops, starts_and_stops_texts def get_vertical_separator(separator_dt, labels_y="", y_field="machine"): separator = alt.Chart(pd.DataFrame({"x": [], "y": []})) separator_labels = alt.Chart(pd.DataFrame({"x": [], "y": []})) return separator, separator_labels @st.cache_data(show_spinner=False, ttl=60 * 60) # TTL in seconds def plot_chronogram( chronogram: pd.DataFrame, formatted=".0f", target="load", timedelta="60min", filter_load=True, expand: bool = False, pump_toggle: bool = False, display_starts_and_stops: bool = False, separator_dt: pd.Timestamp = None, ): legend, short_legend, _ = chronogram_legend(target=target, pump_toggle=pump_toggle) data = chronogram_processing(chronogram=chronogram, timedelta=timedelta, filter_load=filter_load) if expand: data = data.set_index("start_time").sort_index().reset_index() data.loc[28:, "end_time"] = data.loc[28:, "end_time"] + pd.Timedelta("45T") if target == "pressure": bins, colors = custom_blues() scale = alt.Scale(domain=bins, range=colors, type="ordinal") elif target == "load": scale = alt.Scale(domain=CMAP_BINS, range=CMAP_COLORS, type="threshold") else: scale = alt.Scale(scheme="blues") sort_order = [""] + data["machine"].sort_values().unique().tolist() chart = ( alt.Chart(data) .mark_bar() .encode( x=alt.X("start_time:T", title="Horizon Temporel"), x2=alt.X2("end_time:T"), y=alt.Y("machine:N", title="Utilisation: Groupes ou AFC", sort=sort_order), color=alt.Color( "load:Q", title=short_legend, scale=scale, legend=alt.Legend(title=legend), ), stroke=alt.value("white"), strokeWidth=alt.value(2), tooltip=[ alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"), alt.Tooltip("start_time:T", format="%H:%M", title="Heure"), alt.Tooltip("load:Q", format=formatted, title=legend), ], ) ).properties( title=f"Chronogramme d'opération: {legend}", width=800, height=350, ) text = ( alt.Chart(data) .mark_text(dx=0, dy=0, color="white", fontSize=10) .encode( x=alt.X("mid_time:T"), y=alt.Y("machine:N", sort=None), text=alt.Text("load:Q", format=formatted), tooltip=[ alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"), alt.Tooltip("start_time:T", format="%H:%M", title="Heure"), alt.Tooltip("load:Q", format=formatted, title=legend), ], ) .transform_calculate(mid_time="datum.start_time + (datum.end_time - datum.start_time)/2") .transform_filter((alt.datum.load >= 0.0) & (alt.datum.load != 0.01)) ) text_hot = ( alt.Chart(data) .mark_text(dx=0, dy=0, color="white", fontSize=10) .encode( x=alt.X("mid_time:T"), y=alt.Y("machine:N", sort=None), text=alt.value("Chaud"), tooltip=alt.value(None), ) .transform_calculate(mid_time="datum.start_time + (datum.end_time - datum.start_time)/2") .transform_filter(alt.datum.load == 0.01) ) all_charts = [chart, text, text_hot] if display_starts_and_stops: starts_and_stops, starts_and_stops_texts = get_machines_starts_and_stops( chronogram=chronogram, timedelta=timedelta, separator_dt=separator_dt, ) all_charts += [starts_and_stops, starts_and_stops_texts] display_separator = separator_dt is not None and separator_dt > chronogram.index.min() if display_separator: separator, separator_labels = get_vertical_separator(separator_dt=separator_dt, labels_y="", y_field="machine") all_charts += [separator, separator_labels] composed = ( functools.reduce(lambda a, b: a + b, all_charts) .configure_legend(orient="right", titleOrient="right") .configure_axis(labelFontSize=12, titleFontSize=12) ) return composed # Main block to generate and display the chart if __name__ == "__main__": # Generate fake data date_range = pd.date_range(start="2022-01-01", periods=48, freq="30min") data = pd.DataFrame(index=date_range) data["machine"] = np.random.choice(["A", "B", "C"], size=len(data)) data["load"] = np.random.rand(len(data)) # Call the plotting function chart = plot_chronogram(chronogram=data) st.altair_chart(chart) ```

What would you like to happen instead?

No response

Which version of Altair are you using?

altair: 5.4.1 python: 3.11

dangotbanned commented 2 days ago

Using a complex altair chart in streamlit, worked in 5.3.0, and broke with 5.4.0 and then 5.4.1. It seems to be an altair issue: TypeError: FacetedEncoding.init() got multiple values for argument 'self'

I tried to reproduce the error with the plot code and anonymized example data. The error didn't show up neither with altair alone nor streamlit+altair.

@gaspardc-met could you provide the traceback please?

I'm unsure what FacetedEncoding.init() is referring to, as that isn't a method on alt.FacetedEncoding:

Repro

import altair as alt

>>> alt.FacetedEncoding().init()

Traceback


AttributeError                            Traceback (most recent call last)
Cell In[17], line 3
      1 import altair as alt
----> 3 alt.FacetedEncoding().init()

File c:/../altair/altair/utils/schemapi.py:1063, in SchemaBase.__getattr__(self, attr)
   1061 except AttributeError:
   1062     _getattr = super().__getattribute__
-> 1063 return _getattr(attr)

AttributeError: 'FacetedEncoding' object has no attribute 'init'
gaspardc-met commented 1 day ago

Hey @dangotbanned ,

Sorry I clipped some of the error, it was the core.FacetedEncoding init at https://github.com/vega/altair/blob/02ad17d0f1f71ea5125a7c6c9b43a61fd80c4567/altair/vegalite/v5/schema/channels.py#L25276 :

Traceback (most recent call last):
    File "<project_root>/decorators.py", line 68, in wrapper
        result = main_func(*args, **kwargs)
    File "<project_root>/pages_stm/predictions.py", line 79, in main
        display_predictions_tab(...)
    File "<project_root>/pages_stm/predictions.py", line 229, in display_predictions_tab
        display_chronograms(...)
    File "<project_root>/predictions/plots.py", line 516, in display_chronograms
        plot_chronogram(...)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 212, in __call__
        return self._get_or_create_cached_value(args, kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 235, in _get_or_create_cached_value
        return self._handle_cache_miss(cache, value_key, func_args, func_kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/streamlit/runtime/caching/cache_utils.py", line 292, in _handle_cache_miss
        computed_value = self._info.func(*func_args, **func_kwargs)
    File "<project_root>/predictions/plots.py", line 331, in plot_chronogram
        .encode(...)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/vegalite/v5/schema/channels.py", line 31242, in encode
        copy.encoding = core.FacetedEncoding(**encoding)

Error: TypeError: FacetedEncoding.init() got multiple values for argument 'self'
gaspardc-met commented 1 day ago

Removing streamlit caching on the plotting function (plot_chronogram, last one called) reveals another error I encounter often:

Traceback (most recent call last):
    File "<project_root>/decorators.py", line 68, in wrapper
        result = main_func(*args, **kwargs)
    File "<project_root>/pages_stm/predictions.py", line 79, in main
        display_predictions_tab(...)
    File "<project_root>/pages_stm/predictions.py", line 229, in display_predictions_tab
        display_chronograms(...)
    File "<project_root>/predictions/plots.py", line 515, in display_chronograms
        plot_chronogram(...)
    File "<project_root>/predictions/plots.py", line 330, in plot_chronogram
        .encode(...)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/vegalite/v5/schema/channels.py", line 31233, in encode
        kwargs = _infer_encoding_types(args, kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 964, in infer_encoding_types
        return cache.infer_encoding_types(kwargs)
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 870, in infer_encoding_types
        return { ... }
    File "<project_root>/venv/lib/python3.11/site-packages/altair/utils/core.py", line 870, in <dictcomp>
        return { ... }

Error: RuntimeError: dictionary changed size during iteration
dangotbanned commented 6 hours ago

@gaspardc-met

Thanks for the additional traceback.

Issues

I'm trying to piece together all the issues you've raised related to altair in streamlit.

As you've seen in comment, removing streamlit caching has revealed this is actually the same issue as #3554.

Versions

Issue Working Broken
streamlit#8409 ??? 5.2.0
#3554 5.3.0 5.4.0
#3634 5.3.0 5.4.*

From this I can only conclude that #3444 is probably unrelated, since that was introduced in 5.4.0 but you had the same problem in 5.2.0

Looking at the previous functionality - you can see that this code has not changed in a meaningful way in any 5.* release. Therefore I suspect the underlying problem is something else.

Possible Fix

As you have not provided a reproducible example, I can only guess what might be the cause.

It could be possible that alt.Chart().encode() is not thread-safe, and this could be addressed as part of #3589.

Reading PEP 667, made me think the use of locals() might be related.

I'm able to make the change below without causing a regression in versions we currently support (python>=3.8,<3.13):

diff --git a/altair/vegalite/v5/schema/channels.py

```diff diff --git a/altair/vegalite/v5/schema/channels.py b/altair/vegalite/v5/schema/channels.py index c978330a..327f5cc9 100644 --- a/altair/vegalite/v5/schema/channels.py +++ b/altair/vegalite/v5/schema/channels.py @@ -11,6 +11,7 @@ from __future__ import annotations # However, we need these overloads due to how the propertysetter works # mypy: disable-error-code="no-overload-impl, empty-body, misc" import sys +import threading from typing import TYPE_CHECKING, Any, Literal, Sequence, TypedDict, Union, overload if sys.version_info >= (3, 10): @@ -25257,14 +25258,15 @@ class _EncodingMixin: Offset of y-position of the marks """ # Compat prep for `infer_encoding_types` signature - kwargs = locals() - kwargs.pop("self") - args = kwargs.pop("args") - if args: - kwargs = {k: v for k, v in kwargs.items() if v is not Undefined} - - # Convert args to kwargs based on their types. - kwargs = _infer_encoding_types(args, kwargs) + with threading.Lock(): + kwargs = locals().copy() + kwargs.pop("self") + args = kwargs.pop("args") + if args: + kwargs = {k: v for k, v in kwargs.items() if v is not Undefined} + + # Convert args to kwargs based on their types. + kwargs = _infer_encoding_types(args, kwargs) # get a copy of the dict representation of the previous encoding # ignore type as copy method comes from SchemaBase copy = self.copy(deep=["encoding"]) # type: ignore[attr-defined] ```

You would need to test this locally and report back if it fixes the issue.

However, I do not see this change getting merged into altair without at least one supporting test. For that, only you can help move this forward by taking the time to write a minimal repro. I cannot see much of an effort to do so since I last requested this in comment

Example

@gaspardc-met in description

However, it might give you an idea of the plot I use. The error happens in the first call to .encode in the function .mark_bar.encode()

I can't know exactly how much of the original code was related.

This is what a more minimal version would look like to me:

Code block

**NOTE**: Still not a minimal repro, since we need to be able to reproduce the error ```py import numpy as np import pandas as pd import streamlit as st # type: ignore import altair as alt # Mocked dependencies and constants SCALE = alt.Scale( domain=[0, 0.25, 0.5, 0.75, 1.0], range=["#f7fbff", "#c6dbef", "#6baed6", "#2171b5", "#08306b"], type="threshold", ) @st.cache_data(show_spinner=False, ttl=60 * 60) # TTL in seconds def plot_chronogram(data: pd.DataFrame) -> alt.Chart: legend = "Load Legend" y = alt.Y( "machine:N", title="Utilisation: Groupes ou AFC", sort=["", "A", "B", "C"] ) color = alt.Color( "load:Q", title="Load", scale=SCALE, legend=alt.Legend(title=legend) ) tooltip = ( alt.Tooltip("start_time:T", format="%Y-%m-%d", title="Date"), alt.Tooltip("start_time:T", format="%H:%M", title="Heure"), alt.Tooltip("load:Q", format=".0f", title=legend), ) chart = ( alt.Chart(data) .mark_bar() .encode( x=alt.X("start_time:T", title="Horizon Temporel"), x2=alt.X2("end_time:T"), y=y, color=color, stroke=alt.value("white"), strokeWidth=alt.value(2), tooltip=tooltip, ) ) return chart # Main block to generate and display the chart if __name__ == "__main__": # Generate fake data start = pd.date_range(start="2022-01-01", periods=48, freq="30min") data = pd.DataFrame( { "machine": np.random.choice(["A", "B", "C"], 48), # noqa: NPY002 "load": np.random.rand(48), # noqa: NPY002 "start_time": start, "end_time": start + pd.Timedelta("60min"), } ) chart = plot_chronogram(data) st.altair_chart(chart) ```

Disregarding the streamlit parts, you can see the output here

Image