holoviz / panel

Panel: The powerful data exploration & web app framework for Python
https://panel.holoviz.org
BSD 3-Clause "New" or "Revised" License
4.69k stars 508 forks source link

Add Mosaic Widget #7358

Open MarcSkovMadsen opened 4 hours ago

MarcSkovMadsen commented 4 hours ago

Mosaic is a powerful dataviz library with 800 github stars. It especially shines when using it for crossfiltering. Its created as an AnyWidget.

Would be really nice to add a Mosaic pane to Panel with the same Python api as the AnyWidget. Alternatively get it added to the mosaic package.

It would also test/ showcase that the AnyWidgetComponent is compatible with AnyWidget.

Additional Context

The mosaic widget is defined in https://github.com/uwdata/mosaic/blob/main/packages/widget/mosaic_widget/__init__.py.

I tried turning it into a Panel component.

from __future__ import annotations

import logging
import time

import duckdb
import panel as pn
import param
import pyarrow as pa
import requests
from panel.custom import AnyWidgetComponent

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

SLOW_QUERY_THRESHOLD = 5000

@pn.cache
def _get_esm():
    text = requests.get(
        "https://raw.githubusercontent.com/uwdata/mosaic/refs/heads/main/packages/widget/src/index.js",
        timeout=1000,
    ).text
    text = text.replace("import './style.css';", "")
    return text

@pn.cache
def _get_css():
    return requests.get(
        "https://raw.githubusercontent.com/uwdata/mosaic/refs/heads/main/packages/widget/src/style.css",
        timeout=1000,
    ).text

class MosaicWidget(AnyWidgetComponent):
    _esm = _get_esm()
    _stylesheets = [_get_css()]
    _importmap = {
        "imports": {
            "@uwdata/mosaic-core": "https://esm.sh/@uwdata/mosaic-core@0.11.0",
            "@uwdata/mosaic-spec": "https://esm.sh/@uwdata/mosaic-spec@0.11.0",
            "@uwdata/vgplot": "https://esm.sh/@uwdata/vgplot@0.11.0",
            "uuid": "https://esm.sh/uuid@10.0.0",
        }
    }

    # The Mosaic specification
    spec = param.Dict({})

    # The current params indexed by name
    params = param.Dict({})

    # Where data cube indexes should be created
    data_cube_schema = param.Bytes()

    def __init__(
        self,
        spec: dict | None = None,
        con: duckdb.DuckDBPyConnection | None = None,
        data: dict | None = None,
        *args,
        **kwargs,
    ):
        """Create a Mosaic widget.

        Args:
            spec (dict, optional): The initial Mosaic specification. Defaults to {}.
            con (connection, optional): A DuckDB connection.
                Defaults to duckdb.connect().
            data (dict, optional): DataFrames/Arrow objects to "register" with DuckDB.
                Defaults to {}. Keys are table names, values are objects to register as
                virtual tables (similar to SQL VIEWs). Supports pandas/polars DataFrames
                and other Arrow objects.
        """
        if data is None:
            data = {}
        if spec is None:
            spec = {}
        if con is None:
            con = duckdb.connect()

        super().__init__(*args, **kwargs)
        self.spec = spec
        self.con = con
        for name, df in data.items():
            self.con.register(name, df)
        self.on_msg(self._handle_custom_msg)

    def _handle_custom_msg(self, data: dict, buffers: list):
        logger.debug(f"{data=}, {buffers=}")
        start = time.time()

        uuid = data["uuid"]
        sql = data["sql"]
        command = data["type"]

        try:
            if command == "arrow":
                result = self.con.query(sql).arrow()
                sink = pa.BufferOutputStream()
                with pa.ipc.new_stream(sink, result.schema) as writer:
                    writer.write(result)
                buf = sink.getvalue()

                self.send({"type": "arrow", "uuid": uuid}, buffers=[buf.to_pybytes()])
            elif command == "exec":
                self.con.execute(sql)
                self.send({"type": "exec", "uuid": uuid})
            elif command == "json":
                result = self.con.query(sql).df()
                json = result.to_dict(orient="records")
                self.send({"type": "json", "uuid": uuid, "result": json})
            else:
                raise ValueError(f"Unknown command {command}")
        except Exception as e:
            logger.exception("Error processing query")
            self.send({"error": str(e), "uuid": uuid})

        total = round((time.time() - start) * 1_000)
        if total > SLOW_QUERY_THRESHOLD:
            logger.warning(f"DONE. Slow query { uuid } took { total } ms.\n{ sql }")
        else:
            logger.info(f"DONE. Query { uuid } took { total } ms.\n{ sql }")

if pn.state.served:
    import pandas as pd

    weather = pd.read_csv(
        "https://uwdata.github.io/mosaic-datasets/data/seattle-weather.csv", parse_dates=["date"]
    )

    spec = {
        "params": {
            "click": {"select": "single"},
            "domain": ["sun", "fog", "drizzle", "rain", "snow"],
            "colors": ["#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd"],
        },
        "vconcat": [
            {
                "hconcat": [
                    {
                        "plot": [
                            {
                                "mark": "dot",
                                "data": {"from": "weather", "filterBy": "$click"},
                                "x": {"dateMonthDay": "date"},
                                "y": "temp_max",
                                "fill": "weather",
                                "r": "precipitation",
                                "opacity": 0.7,
                            },
                            {"select": "intervalX", "as": "$range"},
                            {"select": "highlight", "by": "$range", "fill": "#eee"},
                            {"legend": "color", "as": "$click", "columns": 1},
                        ],
                        "xyDomain": "Fixed",
                        "xTickFormat": "%b",
                        "colorDomain": "$domain",
                        "colorRange": "$colors",
                        "rDomain": "Fixed",
                        "rRange": [2, 10],
                        "width": 800,
                    }
                ]
            },
            {
                "plot": [
                    {
                        "mark": "barX",
                        "data": {"from": "weather"},
                        "x": {"count": None},
                        "y": "weather",
                        "fill": "#f5f5f5",
                    },
                    {
                        "mark": "barX",
                        "data": {"from": "weather", "filterBy": "$range"},
                        "x": {"count": None},
                        "y": "weather",
                        "fill": "weather",
                        "order": "weather",
                    },
                    {"select": "toggleY", "as": "$click"},
                    {"select": "highlight", "by": "$click"},
                ],
                "xDomain": "Fixed",
                "yDomain": "$domain",
                "yLabel": None,
                "colorDomain": "$domain",
                "colorRange": "$colors",
                "width": 800,
            },
        ],
    }

    MosaicWidget(spec, data={"weather": weather}).servable()

serve it with

panel serve app.py --dev

The version below fails here

image

MarcSkovMadsen commented 4 hours ago

I have also tried using the pre-built mosaic .js and .css instead of the source

from __future__ import annotations

import logging
import time
from pathlib import Path

import duckdb
import panel as pn
import param
import pyarrow as pa
import requests
from panel.custom import AnyWidgetComponent

logger = logging.getLogger(__name__)
logger.addHandler(logging.NullHandler())

SLOW_QUERY_THRESHOLD = 5000

@pn.cache
def _get_esm():
    import mosaic_widget

    return Path(mosaic_widget.__file__).parent / "static" / "index.js"

@pn.cache
def _get_css():
    import mosaic_widget

    return [Path(mosaic_widget.__file__).parent / "static" / "index.css"]

class MosaicWidget(AnyWidgetComponent):
    _esm = _get_esm()
    _stylesheets = _get_css()
    _importmap = {
        "imports": {
            "@uwdata/mosaic-core": "https://esm.sh/@uwdata/mosaic-core@0.11.0",
            "@uwdata/mosaic-spec": "https://esm.sh/@uwdata/mosaic-spec@0.11.0",
            "@uwdata/vgplot": "https://esm.sh/@uwdata/vgplot@0.11.0",
            "uuid": "https://esm.sh/uuid@10.0.0",
        }
    }

    # The Mosaic specification
    spec = param.Dict({})

    # The current params indexed by name
    params = param.Dict({})

    # Where data cube indexes should be created
    data_cube_schema = param.Bytes()

    def __init__(
        self,
        spec: dict | None = None,
        con: duckdb.DuckDBPyConnection | None = None,
        data: dict | None = None,
        *args,
        **kwargs,
    ):
        """Create a Mosaic widget.

        Args:
            spec (dict, optional): The initial Mosaic specification. Defaults to {}.
            con (connection, optional): A DuckDB connection.
                Defaults to duckdb.connect().
            data (dict, optional): DataFrames/Arrow objects to "register" with DuckDB.
                Defaults to {}. Keys are table names, values are objects to register as
                virtual tables (similar to SQL VIEWs). Supports pandas/polars DataFrames
                and other Arrow objects.
        """
        if data is None:
            data = {}
        if spec is None:
            spec = {}
        if con is None:
            con = duckdb.connect()

        super().__init__(*args, **kwargs)
        self.spec = spec
        self.con = con
        for name, df in data.items():
            self.con.register(name, df)
        self.on_msg(self._handle_custom_msg)

    def _handle_custom_msg(self, data: dict, buffers: list):
        logger.debug(f"{data=}, {buffers=}")
        start = time.time()

        uuid = data["uuid"]
        sql = data["sql"]
        command = data["type"]

        try:
            if command == "arrow":
                result = self.con.query(sql).arrow()
                sink = pa.BufferOutputStream()
                with pa.ipc.new_stream(sink, result.schema) as writer:
                    writer.write(result)
                buf = sink.getvalue()

                self.send({"type": "arrow", "uuid": uuid}, buffers=[buf.to_pybytes()])
            elif command == "exec":
                self.con.execute(sql)
                self.send({"type": "exec", "uuid": uuid})
            elif command == "json":
                result = self.con.query(sql).df()
                json = result.to_dict(orient="records")
                self.send({"type": "json", "uuid": uuid, "result": json})
            else:
                raise ValueError(f"Unknown command {command}")
        except Exception as e:
            logger.exception("Error processing query")
            self.send({"error": str(e), "uuid": uuid})

        total = round((time.time() - start) * 1_000)
        if total > SLOW_QUERY_THRESHOLD:
            logger.warning(f"DONE. Slow query { uuid } took { total } ms.\n{ sql }")
        else:
            logger.info(f"DONE. Query { uuid } took { total } ms.\n{ sql }")

if pn.state.served:
    import pandas as pd

    weather = pd.read_csv(
        "https://uwdata.github.io/mosaic-datasets/data/seattle-weather.csv", parse_dates=["date"]
    )

    spec = {
        "params": {
            "click": {"select": "single"},
            "domain": ["sun", "fog", "drizzle", "rain", "snow"],
            "colors": ["#e7ba52", "#a7a7a7", "#aec7e8", "#1f77b4", "#9467bd"],
        },
        "vconcat": [
            {
                "hconcat": [
                    {
                        "plot": [
                            {
                                "mark": "dot",
                                "data": {"from": "weather", "filterBy": "$click"},
                                "x": {"dateMonthDay": "date"},
                                "y": "temp_max",
                                "fill": "weather",
                                "r": "precipitation",
                                "opacity": 0.7,
                            },
                            {"select": "intervalX", "as": "$range"},
                            {"select": "highlight", "by": "$range", "fill": "#eee"},
                            {"legend": "color", "as": "$click", "columns": 1},
                        ],
                        "xyDomain": "Fixed",
                        "xTickFormat": "%b",
                        "colorDomain": "$domain",
                        "colorRange": "$colors",
                        "rDomain": "Fixed",
                        "rRange": [2, 10],
                        "width": 800,
                    }
                ]
            },
            {
                "plot": [
                    {
                        "mark": "barX",
                        "data": {"from": "weather"},
                        "x": {"count": None},
                        "y": "weather",
                        "fill": "#f5f5f5",
                    },
                    {
                        "mark": "barX",
                        "data": {"from": "weather", "filterBy": "$range"},
                        "x": {"count": None},
                        "y": "weather",
                        "fill": "weather",
                        "order": "weather",
                    },
                    {"select": "toggleY", "as": "$click"},
                    {"select": "highlight", "by": "$click"},
                ],
                "xDomain": "Fixed",
                "yDomain": "$domain",
                "yLabel": None,
                "colorDomain": "$domain",
                "colorRange": "$colors",
                "width": 800,
            },
        ],
    }

    MosaicWidget(spec, data={"weather": weather}).servable()

But it also fails

image

Maybe because the Mosaic widget is using older or undocumented AnyWidget functionality that the Panel AnyWidgetComponent does not support.