plotly / plotly.py

The interactive graphing library for Python :sparkles: This project now includes Plotly Express!
https://plotly.com/python/
MIT License
16.38k stars 2.56k forks source link

Pandas import error #2433

Open alexcjohnson opened 4 years ago

alexcjohnson commented 4 years ago

A Dash user is seeing an error from pandas without ever directly importing pandas, just dash, plotly, and numpy. https://community.plotly.com/t/callback-error-when-plotting-multiple-graph-objects/38756

The error occurs inside plotly when the Dash app tries to render one of the plots in a callback:

Traceback (most recent call last):
  File "/Users/alex/plotly/fiddle/f.py", line 74, in update_dist_plot
    "data": [go.Bar(x=bins, y=counts)],
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/plotly/graph_objs/__init__.py", line 93149, in __init__
    self["x"] = x if x is not None else _v
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/plotly/basedatatypes.py", line 3490, in __setitem__
    self._set_prop(prop, value)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/plotly/basedatatypes.py", line 3772, in _set_prop
    val = validator.validate_coerce(val)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/_plotly_utils/basevalidators.py", line 385, in validate_coerce
    v = copy_to_readonly_numpy_array(v)
  File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/site-packages/_plotly_utils/basevalidators.py", line 93, in copy_to_readonly_numpy_array
    if pd and isinstance(v, (pd.Series, pd.Index)):
AttributeError: module 'pandas' has no attribute 'Series'

That's the error I see - the OP's error message is a little more extensive, which is a little funny because we both report pandas v1.0.3

AttributeError: partially initialized module ‘pandas’ has no attribute ‘Series’ (most likely due to a circular import)

I can reproduce locally with this app:

import dash_core_components as dcc
import dash_html_components as html
from dash import Dash
from dash.dependencies import Input, Output
import plotly.graph_objs as go
import numpy as np

app = Dash(__name__)

app.layout = html.Div(
    [
        html.Div(
            [
                html.Br(),
                html.Label("Plotting Options"),
                dcc.RadioItems(
                    id="trunk-angle-radio",
                    options=[
                        {"label": "Sagittal", "value": "Sagittal"},
                        {"label": "Lateral", "value": "Lateral"},
                        {"label": "Twist", "value": "Twist"},
                    ],
                    value="Sagittal",
                ),
            ]
        ),
        html.Div(
            [
                html.Div(
                    [dcc.Graph(id="trunk-angle-plot")],
                    style={"width": "48%", "display": "inline-block"},
                ),
                html.Div(
                    [dcc.Graph(id="trunk-angle-dist")],
                    style={"width": "48%", "display": "inline-block", "float": "right"},
                ),
            ]
        ),
        html.Div(
            [
                html.Label("Data Statistics"),
                html.Div(id="data-stats-div", style={"padding": 10}),
            ]
        ),
    ]
)

@app.callback(
    Output("trunk-angle-plot", "figure"), [Input("trunk-angle-radio", "value")]
)
def update_angle_plot(radio_option):
    (x, y) = get_trunk_angles(radio_option)
    fig = {
        "data": [go.Scatter(x=x, y=y, mode="lines+markers")],
        "layout": go.Layout(
            title="Trunk Angle Time Series Plot",
            xaxis={"title": "Time (sec)"},
            yaxis={"title": "Degrees"},
        ),
    }
    return fig

@app.callback(
    Output("trunk-angle-dist", "figure"), [Input("trunk-angle-radio", "value")]
)
def update_dist_plot(radio_option):
    (x, y) = get_trunk_angles(radio_option)
    counts, bins = np.histogram(y, bins=range(-90, 91, 30))
    bins = bins + (bins[1] - bins[0]) / 2
    # print(counts, bins)
    fig = {
        "data": [go.Bar(x=bins, y=counts)],
        "layout": go.Layout(
            title="Trunk Angle Distributions",
            xaxis={
                "title": "Bin midpoint (degrees)",
                "tickmode": "array",
                "tickvals": bins,
                "ticktext": [str(int(bin)) for bin in bins],
            },
            yaxis={"title": "Percentage of time"},
        ),
    }
    return fig

@app.callback(
    Output("data-stats-div", "children"), [Input("trunk-angle-radio", "value")]
)
def update_stats(radio_option):
    (x, y) = get_trunk_angles(radio_option)
    stats_div = [
        html.Div("Minimum: {}".format(np.min(y)), id="trunk-angle-dist-min"),
        html.Div("Maximum: {}".format(np.max(y)), id="trunk-angle-dist-max"),
        html.Div("Mean: {:.2f}".format(np.mean(y)), id="trunk-angle-dist-mean"),
        html.Div(
            "Standard Deviation: {:.2f}".format(np.std(y)), id="trunk-angle-dist-std"
        ),
        html.Div(
            "Range: {}".format(np.max(y) - np.min(y)), id="trunk-angle-dist-range"
        ),
    ]
    return stats_div

def get_trunk_angles(radio_option):

    dummy_x = np.linspace(0, 50, 101)

    if radio_option == "Sagittal":
        dummy_y = np.random.randint(-90, 90, 101)
    elif radio_option == "Lateral":
        dummy_y = np.random.randint(-90, 90, 101)
    elif radio_option == "Twist":
        dummy_y = np.random.randint(-90, 90, 101)

    return (dummy_x, dummy_y)

if __name__ == "__main__":
    app.run_server(debug=True)
alexcjohnson commented 4 years ago

I can make the error go away by adding import pandas at the top of the posted app. Then I just have to ignore the linter error about unused imports...

emmanuelle commented 4 years ago

@alexcjohnson when did you get the error, when running the script or when triggering a specific callback from the UI? I cannot reproduce (I tried changing the value of the RadioItems)

alexcjohnson commented 4 years ago

The error appears for me when first loading the app in the browser:

Screen Shot 2020-05-04 at 5 33 42 PM

If I reload the page everything is fine thereafter - so it's just the first time these callbacks are called. after starting the app Maybe the two callbacks are being called in parallel threads and this is a race condition? One callback starts importing pandas but doesn't finish... the second one sees pandas as already loaded and tries to use it prematurely?

emmanuelle commented 4 years ago

Interesting. Hum maybe this is indeed what happens. Probably if you initialize the dcc.Graph with an empty go.Figure() then the problem should disappear?

simaiden commented 4 years ago

I have the same error when I try to update a graph using callback

MCBoarder289 commented 4 years ago

Not sure if this is 100% related, but I put up a PR recently for a similar issue (https://github.com/plotly/plotly.py/issues/2391) involving ValueErrors thrown with numpy imports. Not a dash rendering issue, but I wonder if there's a more more efficient way we can do type checks involving different imports with libraries like numpy/pandas?

Interesting that OP has a "partially initialized module error". That seems to support the race condition theory and why it might only fail on initialization.

MCBoarder289 commented 4 years ago

I'm not able to reproduce locally either, and wonder if it's dependent on the machine itself (perhaps low memory/cpu when running the app? so loading pandas is taking longer than it takes to make that initial callback)

atharva-2001 commented 3 years ago
import pandas
import numpy as np
import math
from plotly import graph_objects as go
# import plotly.express as px

tip = [0, 0, 0]
end = [10, 10, 10]
distance = 10
u = distance
x_ = (1-u)*tip[0] + u*end[0]
y_ = (1-u)*tip[1] + u*end[1]
z_ = (1-u)*tip[2] + u*end[2]

fig = go.Figure(
        data=go.Scatter3d(
                x=[x_, tip[0], end[0]],
                y=[y_, tip[1], end[1]],
                z=[z_, tip[2], end[2]]
        )
)
fig.show()

I was able to get the error with just this block of simple code. I tried fixing multiple python installations as well. Creating a virtual environment also doesn't help. Will be really grateful if you could fix this!

Here is the error log

  File "C:\Users\Administrator\Downloads\workspace\wna\wiki-nearby-articles\tests\dis.py", line 1, in <module>
    import pandas
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pandas\__init__.py", line 51, in <module>
    from pandas.core.api import (
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pandas\core\api.py", line 14, in <module>
    from pandas.core.algorithms import factorize, unique, value_counts
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pandas\core\algorithms.py", line 16, in <module>
    from pandas.util._decorators import doc
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pandas\util\__init__.py", line 1, in <module>
    from pandas.util._decorators import Appender, Substitution, cache_readonly  # noqa
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\pandas\util\_decorators.py", line 2, in <module>
    import inspect
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.9_3.9.496.0_x64__qbz5n2kfra8p0\lib\inspect.py", line 36, in <module>
    import dis
  File "C:\Users\Administrator\Downloads\workspace\wna\wiki-nearby-articles\tests\dis.py", line 24, in <module>
    data=go.Scatter3d(
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\plotly\graph_objs\_scatter3d.py", line 2401, in __init__
    self["x"] = _v
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\plotly\basedatatypes.py", line 4804, in __setitem__
    self._set_prop(prop, value)
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\plotly\basedatatypes.py", line 5143, in _set_prop
    val = validator.validate_coerce(val)
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\_plotly_utils\basevalidators.py", line 388, in validate_coerce
    elif is_homogeneous_array(v):
  File "C:\Users\Administrator\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\_plotly_utils\basevalidators.py", line 176, in is_homogeneous_array
    or (pd and isinstance(v, (pd.Series, pd.Index)))
AttributeError: partially initialized module 'pandas' has no attribute 'Series' (most likely due to a circular import)
MCBoarder289 commented 3 years ago

Still haven't been able to reproduce locally with my machine, with any of these provided examples... Makes me think this could potentially be hardware/machine specific and some timing issues loading the library.

Anyone have any other thoughts on how we might be able to reproduce better or locate a good place to start making some improvements?

mhwaage commented 3 years ago

I have a scenario that seems to reproduce this type of error fairly consistently:

from uuid import uuid4

import flask
import dash
from dash import html, dcc
from dash.dependencies import Output, Input

import plotly.graph_objects as go

server = flask.Flask('app')
app = dash.Dash('app', server=server)

n_comps = 10
buttton_id = "click_me"
n_clicks_counter_id = str(uuid4())
figids = [str(uuid4()) for _ in range(n_comps)]

@app.callback(
    Output(n_clicks_counter_id, "children"),
    Input(buttton_id, "n_clicks")
    )
def update_count(n_clicks):
    return str(n_clicks)

for figid in figids:
    @app.callback(
        Output(figid, "figure"),
        Input(buttton_id, "n_clicks")
        )
    def plot(val):
        if val is None:
            val = 0
        figure = go.Figure(
            data=[go.Bar(x=[1+val, 2, 3], y=[1, 3, 2])],
            layout=go.Layout(
                title=go.layout.Title(text="A Figure Specified By A Graph Object")
            )
        )
        return figure

    app.layout = html.Div(
            id="main-title",
            children=[html.Button(id=buttton_id, children="click me"), html.Div(children="0", id=n_clicks_counter_id)] + 
            [
                dcc.Graph(id=figid) for figid in figids
             ]
        )

if __name__ == '__main__':
    app.run_server()

Instead of running the script directly, start the server with gunicorn set to do semi-frequent worker reboots; e.g. gunicorn --bind 0.0.0.0:5000 --keep-alive 120 --max-requests 40 --preload --workers 2 --worker-class gthread --threads 2 --timeout 1000000 "reproducing_module:server".

Then, click the button repeatedly; it should trigger an internal error fairly frequently. With dash 2.0 + orjson installed, it seems to first trigger a bad import of orjson:

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/dash/dash.py", line 1336, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/dash/_callback.py", line 191, in add_context
    jsonResponse = to_json(response)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/dash/_utils.py", line 21, in to_json
    return to_json_plotly(value)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/plotly/io/_json.py", line 127, in to_json_plotly
    opts = orjson.OPT_SORT_KEYS | orjson.OPT_SERIALIZE_NUMPY
AttributeError: partially initialized module 'orjson' has no attribute 'OPT_SORT_KEYS' (most likely due to a circular import)

if orjson is pre-emptively imported, it will instead break on a numpy import:

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 2073, in wsgi_app
    response = self.full_dispatch_request()
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1518, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1516, in full_dispatch_request
    rv = self.dispatch_request()
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/flask/app.py", line 1502, in dispatch_request
    return self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/dash/dash.py", line 1336, in dispatch
    response.set_data(func(*args, outputs_list=outputs_list))
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/dash/_callback.py", line 151, in add_context
    output_value = func(*func_args, **func_kwargs)  # %% callback invoked %%
  File "/home/mhwa/local/temp/temp/try_to_break.py", line 36, in plot
    data=[go.Bar(x=[1+val, 2, 3], y=[1, 3, 2])],
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/plotly/graph_objs/_bar.py", line 3174, in __init__
    self["x"] = _v
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/plotly/basedatatypes.py", line 4819, in __setitem__
    self._set_prop(prop, value)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/plotly/basedatatypes.py", line 5158, in _set_prop
    val = validator.validate_coerce(val)
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/_plotly_utils/basevalidators.py", line 400, in validate_coerce
    elif is_homogeneous_array(v):
  File "/home/mhwa/.cache/pypoetry/virtualenvs/temp-BbzQGPLE-py3.8/lib/python3.8/site-packages/_plotly_utils/basevalidators.py", line 187, in is_homogeneous_array
    and isinstance(v, np.ndarray)
AttributeError: partially initialized module 'numpy' has no attribute 'ndarray' (most likely due to a circular import)

I tried this on a couple of machines and it seems to reproduce consistently.