burnash / gspread

Google Sheets Python API
https://docs.gspread.org
MIT License
7.09k stars 948 forks source link

Pivot Table Implementation #824

Open ryan-pixel opened 4 years ago

ryan-pixel commented 4 years ago

I'm trying to see if it's possible to create a pivot table using gspread. I know that the Google Sheets API v4 can do this with the use of the batchUpdate method with a updateCells request and am trying to see if it's possible with gspread's batch update or update cells, but they seem to be different from one another.

shivavelingker commented 3 years ago

@ryan-pixel did you figure a workaround out? This is a feature I need as well

lomnes-atlast-food commented 11 months ago

@lavigne958 Here is a helper class that helps you define a pivot table using the google sheets API in conjunction with gspread. It likely is not complete and tested fully but perhaps this is helpful to anyone who wants this feature. An example of it being used is here. I suspect that there is a cleaner way to do this if it were to be added into the gspread API. Hopefully useful to someone!

from typing import Optional

from gspread import Spreadsheet, Worksheet

class PivotTableCreator:
    """helper class to create pivot tables with the Google Sheets API v4 designed to work with gspread
    see https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables
    and https://docs.gspread.org/en/latest/api/index.html
    """

    def __init__(self, spreadsheet: Spreadsheet):
        self.spreadsheet = spreadsheet
        self.pivot_config = {"rows": [], "columns": [], "values": [], "filterSpecs": []}
        self.source_range = None
        self.target_range = None

    def set_data_source_id(self, data_source_id: str):
        """Sets the data source ID for the pivot table when using data from a source external to the sheet.
        Examples can include sources like BigQuery. Either this method or .set_source()
        should be called, but not both.

        Args:
            data_source_id (str): The ID of the data source the pivot table is reading data from.

        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets#datasource
        """
        self.pivot_config["dataSourceId"] = data_source_id

    def set_source(
        self,
        worksheet: Worksheet,
        start_row: int = 0,
        end_row: Optional[int] = None,
        start_col: int = 0,
        end_col: Optional[int] = None,
    ):
        """Defines the source data range for the pivot table when using source data in a google sheet.
        Either this method or .set_data_source_id() should be called, but not both.

        Args:
            worksheet (Worksheet): worksheet containing the source data
            start_row (int, optional): _description_. Defaults to 0.
            end_row (Optional[int], optional): _description_. Defaults to None.
            start_col (int, optional): _description_. Defaults to 0.
            end_col (Optional[int], optional): _description_. Defaults to None.
        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/other#GridRange
        """
        self.source_range = {
            "sheetId": worksheet.id,
            "startRowIndex": start_row,
            "startColumnIndex": start_col,
            "endRowIndex": end_row or worksheet.row_count,
            "endColumnIndex": end_col or worksheet.col_count,
        }

    def set_target(
        self,
        worksheet: Optional[Worksheet] = None,
        start_row: int = 0,
        start_col: int = 0,
    ):
        """Defines the worksheet and range to place the pivot table. If no worksheet
        is provided, the pivot table will be placed in a new worksheet.

        Args:
            worksheet (Worksheet, optional): gspread worksheet object. Defaults to None.
            start_row (int, optional): starting row index. Defaults to 0.
            start_col (int, optional): starting column index. Defaults to 0.
        """
        if worksheet is None:
            worksheet = self.spreadsheet.add_worksheet(title="Pivot Table")

        self.target_range = {
            "sheetId": worksheet.id,
            "rowIndex": start_row,
            "columnIndex": start_col,
        }

    def add_row_group(
        self,
        source_column_offset: int,
        show_totals: bool = True,
        sort_order: str = "ASCENDING",
    ):
        """Adds a row group to the pivot table.  Use this method multiple times to add
        multiple row groups.  The order in which they are added is the order that they
        will appear in the pivot table.

        Args:
            source_column_offset (int): reference to the column index of the source data.
                                        The base index is 0.
            show_totals (bool, optional): show total rows. Defaults to True.
            sort_order (str, optional): "ASCENDING" or "DESCENDING". Defaults to "ASCENDING".

        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotGroup
        """
        self.pivot_config["rows"].append(
            {
                "sourceColumnOffset": source_column_offset,
                "showTotals": show_totals,
                "sortOrder": sort_order,
            }
        )

    def add_column_group(
        self,
        source_column_offset: int,
        show_totals: bool = False,
        sort_order: str = "ASCENDING",
    ):
        """Adds a column group to the pivot table.  Use this method multiple times to add
        multiple column groups.  The order in which they are added is the order that they
        will appear in the pivot table.

        Args:
            source_column_offset (int): reference to the column index of the source data.
                                        The base index is 0.
            show_totals (bool, optional): show total columns. Defaults to False.
            sort_order (str, optional): "ASCENDING" or "DESCENDING". Defaults to "ASCENDING".
        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotGroup
        """
        self.pivot_config["columns"].append(
            {
                "sourceColumnOffset": source_column_offset,
                "showTotals": show_totals,
                "sortOrder": sort_order,
            }
        )

    def add_value(self, source_column_offset: int, summarize_function="SUM"):
        """Defines the values for the pivot table.  Call this method multiple times to
        to add multiple values.  The order in which they are added is the order that
        they will appear in the pivot table.

        Args:
            source_column_offset (int): reference to the column index of the source data.
                                        The base index is 0.
            summarize_function (str, optional): Function to aggregate values. Defaults to "SUM".
                See documnentation for a complete list of options.

        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotValue
        """
        self.pivot_config["values"].append(
            {
                "sourceColumnOffset": source_column_offset,
                "summarizeFunction": summarize_function,
            }
        )

    def add_filter_spec(
        self,
        column_offset_index: int,
        visible_values: list,
        condition_type: str,
        condition_values: list,
        visible_by_default: bool = True,
    ):
        """Add a filter to the pivot table.  Call this method multiple times to add
        multiple filters.  The order in which they are added is the order that they
        will appear in the pivot table.

        Args:
            column_offset_index (int): The index of the column to filter on
            visible_values (list): Values that should be included. Values not listed here are excluded.
            condition_type (str): _description_
            condition_values (list): _description_
        References:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotFilterSpec
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotFilterCriteria
        """
        filter_criteria = {
            "visibleValues": visible_values,
            "condition": {
                "type": condition_type,
                "values": [{"userEnteredValue": str(val)} for val in condition_values],
            },
            "visibleByDefault": visible_by_default,
        }
        self.pivot_config["filterSpecs"].append(
            {
                "columnOffsetIndex": column_offset_index,
                "filterCriteria": filter_criteria,
            }
        )

    def set_value_layout(self, layout: str = "HORIZONTAL"):
        """The layout of pivot values. Either "HORIZONTAL" or "VERTICAL"
        Horizontal: Pivot Values are laid out across columns.
        Vertical: Pivot Values are laid out down rows.

        Args:
            layout (str, optional): "HORIZONTAL" or "VERTICAL. Defaults to "HORIZONTAL".

        Reference:
        https://developers.google.com/sheets/api/reference/rest/v4/spreadsheets/pivot-tables#PivotValueLayout
        """
        self.pivot_config["valueLayout"] = layout

    @property
    def request_body(self):
        return {
            "requests": [
                {
                    "updateCells": {
                        "rows": [
                            {
                                "values": [
                                    {
                                        "pivotTable": {
                                            **self.pivot_config,
                                            "source": self.source_range,
                                        }
                                    }
                                ]
                            }
                        ],
                        "start": self.target_range,
                        "fields": "pivotTable",
                    }
                }
            ]
        }

if __name__ == "__main__":
    # Example usage:
    import gspread
    from oauth2client.service_account import ServiceAccountCredentials

    scope = [
        "https://spreadsheets.google.com/feeds",
        "https://www.googleapis.com/auth/drive",
    ]
    cred_file = "path/to_your/credentials.json"
    credentials = ServiceAccountCredentials.from_json_keyfile_name(
        filename=cred_file, scopes=scope
    )
    gc = gspread.authorize(credentials)

    # Get the Spreadsheet and Worksheets
    sheet_name = "your_spreadsheet_title"
    ss = gc.open(sheet_name)
    ws_source = ss.get_worksheet(0)
    ws_target = ss.get_worksheet(1)

    # Initialize the pivot table creator
    pivot_table = PivotTableCreator(ss)

    # Configure source, target, rows, columns, values, filters, etc.
    # .set_source() or .set_data_source_id() but not both
    pivot_table.set_source(
        worksheet=ws_source, start_row=0, end_row=None, start_col=0, end_col=None
    )
    pivot_table.add_row_group(source_column_offset=2)
    pivot_table.add_row_group(
        source_column_offset=2, show_totals=False, sort_order="DESCENDING"
    )
    pivot_table.add_column_group(source_column_offset=12)
    pivot_table.add_value(source_column_offset=10)
    pivot_table.add_filter_spec(
        column_offset_index=12,
        visible_values=["2020", "2021", "2022"],
        condition_type="NUMBER_BETWEEN",
        condition_values=[2022, 2026],
    )
    pivot_table.set_value_layout("HORIZONTAL")
    pivot_table.set_target(worksheet=ws_target, start_row=0, start_col=0)

    # Execute the update
    ss.batch_update(pivot_table.request_body)