JanMarvin / openxlsx2

openxlsx2 - read, write and modify xlsx files
https://janmarvin.github.io/openxlsx2/
Other
115 stars 11 forks source link

custom Autofilter #129

Closed JanMarvin closed 2 days ago

JanMarvin commented 2 years ago

https://github.com/ycphs/openxlsx/issues/76

this should be simpler to do now. row_attr is a data frame, just needs a check which rows to hide

JanMarvin commented 1 year ago

See also https://stackoverflow.com/questions/50601438/add-a-filter-to-an-openxlsx-object-with-a-query-activated/75807739#75807739

JanMarvin commented 1 year ago

A working draft

library(openxlsx2)
wb <- wb_workbook()$
  add_worksheet()$
  add_data(x = mtcars)$
  add_filter(rows = 1, cols = seq_along(mtcars))

# filterColumn c-vector
# operator equal, greaterThan, greaterThanOrEqual, lessThan, lessThanOrEqual, notEqual
autofilter <- 
'<autoFilter ref="A1:K33">
  <filterColumn colId="0">
    <customFilters>
      <customFilter operator="greaterThan" val="20" />
    </customFilters>
  </filterColumn>
  <filterColumn colId="1">
    <filters>
      <filter val = "4"/>
    </filters>
  </filterColumn>
</autoFilter>'

wb$worksheets[[1]]$autoFilter <- autofilter
wb$worksheets[[1]]$sheetPr <- '<sheetPr filterMode="1" />'

# # not really required?
# wb$workbook$definedNames <- "<definedName name=\"_xlnm._FilterDatabase\" localSheetId=\"0\" hidden=\"1\">'Sheet 1'!$A$1:$K$33</definedName>"
dt <- wb_data(wb)
to_show <- rownames(dt[dt$mpg > 20 & dt$cyl == 4,])
to_hide <- rownames(dt)[!rownames(dt) %in% to_show]

# hide rows
rows <- wb$worksheets[[1]]$sheet_data$row_attr
rows[rownames(rows) %in% to_hide, "hidden"] <- "1"
wb$worksheets[[1]]$sheet_data$row_attr <- rows
JanMarvin commented 1 year ago

Needs to update sortState too

JanMarvin commented 1 day ago
  library(openxlsx2)

  wb <- write_xlsx(x = mtcars, as_table = TRUE)

  rows <- dims_to_dataframe(wb_dims(rows = which(mtcars$cyl != 6) + 1, cols = seq_along(mtcars)), fill = TRUE)$A %>% 
    gsub("\\D+", "", .) %>% 
    as.integer() %>% 
    .[complete.cases(.)]

  wb$set_row_heights(rows = rows, hidden = TRUE)

  auto_filter <- '<autoFilter ref="A1:K33">
  <filterColumn colId="1">
    <filters>
      <filter val="6" />
    </filters>
  </filterColumn>
</autoFilter>'

  wb$tables$tab_xml[1] <- gsub(
    pattern = '<autoFilter ref="A1:K33"/>',
    replacement = auto_filter,
    x = wb$tables$tab_xml[1]
  )

  if (interactive()) wb$open()
JanMarvin commented 17 hours ago

A more complete function to create data table filters. Allows for filters !=, <, <=, ==, %in%, >=, and >:

#' function to create the autofilter xml structure required for openxml
#' @param tabs a wbWorkbook wb$tables object
#' @param tab_name a table name in the workbook
#' @param conditions a named list with conditions to apply to the columns
prepare <- function(tabs, tab_name, conditions) {

  if (!tab_name %in% tabs$tab_name) stop("Table with this tab_name not found.")

  tabs <- tabs[tabs$tab_name == tab_name, , drop = FALSE]

  vars <- openxlsx2:::rbindlist(
    xml_attr(tabs$tab_xml, "table", "tableColumns", "tableColumn")
  )

  cond_vars <- names(conditions)
  if (any(!cond_vars %in% vars$name)) stop("Condtion variable not found in table.")

  ref <- openxlsx2:::rbindlist(xml_attr(tabs$tab_xml, "table"))$ref

  autoFilter <- xml_node_create("autoFilter", xml_attributes = c(ref = ref))

  vars$id <- as.integer(vars$id) - 1L

  aF <- NULL
  for (cond_var in cond_vars) {
    colId <- vars$id[vars$name == cond_var]

    condition <- conditions[[cond_var]]

    ## == default or %in% with multiple values
    if (condition$operator == "equal") {
      filter <- vapply(
        condition$val,
        function(x) {
          xml_node_create("filter", xml_attributes = c(val = openxlsx2:::as_xml_attr(x)))
        },
        FUN.VALUE = NA_character_
      )

      filters      <- xml_node_create("filters")
      filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))

      filters      <- xml_add_child(filters, filter)
      filterColumn <- xml_add_child(filterColumn, filters)
    }

    ## != notEqual, < lessThan, <= lessThanOrEqual, > greaterThan, >= greaterThanOrEqual
    ## TODO provide replacement function
    ops <- c("notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual")
    if (condition$operator %in% ops) {
      customFilter <- vapply(
        condition$val,
        function(x) {
          xml_node_create(
            "customFilter",
            xml_attributes = c(operator = condition$operator,
                               val = openxlsx2:::as_xml_attr(x))
          )
        },
        FUN.VALUE = NA_character_
      )

      customFilters <- xml_node_create("customFilters")
      filterColumn  <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))

      customFilters <- xml_add_child(customFilters, customFilter)
      filterColumn  <- xml_add_child(filterColumn, customFilters)
    }

    aF <- c(aF, filterColumn)
  }

  autoFilter <- xml_add_child(autoFilter, aF)

  autoFilter
}

#' helper to parse the string into something that can be converted into openxml
#' column filters
#' @param filter_expr an expression like "x$cyl != 4"
create_conditions <- function(filter_expr) {
  conditions <- list()

  # Define a helper function to add conditions to the list
  add_condition <- function(column, operator, value) {
    conditions[[column]] <<- list(val = value, operator = operator)
  }

  # should only have &
  fltr_xprs <- strsplit(filter_expr, "&")[[1]]

  for (fltr_xpr in fltr_xprs) {

    # Extract conditions for "equal"
    if (grepl("%in%", fltr_xpr)) {
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      values <- as.numeric(unlist(strsplit(gsub(".*%in% c\\(([^)]*)\\).*", "\\1", fltr_xpr), ", ")))
      add_condition(column_name, "equal", values)
    }

    # Extract conditions for "equal" using ==
    if (grepl("==", fltr_xpr)) {
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*== (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "equal", c(value))
    }

    # Extract conditions for "notEqual"
    if (grepl("!=", fltr_xpr)) {
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*!= (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "notEqual", c(value))
    }

    # Extract conditions for "lessThan"
    if (grepl("<[^=]", fltr_xpr)) {  # "<" but not "<="
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*< (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "lessThan", c(value))
    }

    # Extract conditions for "lessThanOrEqual"
    if (grepl("<=", fltr_xpr)) {
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*<= (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "lessThanOrEqual", c(value))
    }

    # Extract conditions for "greaterThan"
    if (grepl(">[^=]", fltr_xpr)) {  # ">" but not ">="
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*> (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "greaterThan", c(value))
    }

    # Extract conditions for "greaterThanOrEqual"
    if (grepl(">=", fltr_xpr)) {
      column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
      value <- as.numeric(sub(".*>= (\\d+).*", "\\1", fltr_xpr))
      add_condition(column_name, "greaterThanOrEqual", c(value))
    }
  }

  conditions
}

#' get rows to hide (in _all but not in _sel)
#' @param wb a wbWorkbook
#' @param tab_len an index for a table object in the workbook
#' @param filter the filter as character string
rows_to_hide <- function(wb, tab_len, filter) {
  x <- wb_to_df(wb, sheet = wb$tables$tab_sheet[tab_len], dims = wb$tables$tab_ref[tab_len])
  rows_all <- rownames(x)

  x <- x[eval(parse(text = filter)), ]
  rows_sel <- rownames(x)

  out <- rows_all[!rows_all %in% rows_sel]
  as.integer(out)
}

#' function to apply data table filter
#' @param filter a base R statement to filter an object "x$cyl == 4"
#' @inheritParams openxlsx2::wb_add_data_table
wb_filter_data_table <- function(wb, sheet = current_sheet(), x, dims = "A1", filter = NULL, ...) {

  openxlsx2:::assert_workbook(wb)

  ## add the data table object
  wb$add_data_table(wb, sheet = sheet, x = x, dims = dims, ...)

  if (!is.null(filter)) {

    openxlsx2:::assert_class(filter, "character")

    ## assume that the table created is the last table added to the workbook
    tab_len <- length(wb$tables$tab_xml)

    ### prepare condition list & autofilter xml
    fltr           <- create_conditions(filter)
    new_autofilter <- prepare(tabs = wb$tables, tab_name = wb$tables$tab_name[tab_len], conditions = fltr)
    sel            <- rows_to_hide(wb, tab_len, filter)

    # hide rows
    wb$set_row_heights(rows = sel, hidden = TRUE)

    # add auto filter to table
    wb$tables$tab_xml[tab_len] <- gsub(
      pattern = xml_node(wb$tables$tab_xml[tab_len], "table", "autoFilter"),
      replacement = new_autofilter,
      x = wb$tables$tab_xml[tab_len]
    )
  }

  invisible(wb)
}

library(openxlsx2)

wb <- wb_workbook() %>%
  wb_add_worksheet() %>%
  wb_filter_data_table(x = mtcars, filter = "x$cyl %in% c(4, 8) & x$am != 1")

if (interactive()) wb$open()
JanMarvin commented 17 hours ago

This was not tested with characters or dates and not how it works with all the complex things like multiple tables next to each other on a sheet or a full page filter. Tested was only the case in the bottom of the function.

JanMarvin commented 16 hours ago

@jwhendy, since my initial post I've spent a couple of hours on the functions above. You might give this a try. It should work for basic numeric/integer data, but as stated probably for not much else.

The filter passed to wb_filter_data_table() must reference the object as x (because we have to run some internal evaluation of the filter string. If you try it out, please let me know what works. If we can bring it to at least a semi stable state, I might consider adding the filter to the wb_add_data_table() function.

wb_filter_data_table(x = mtcars, filter = "x$cyl %in% c(4, 8) & x$am != 1")

I've created a gist here: https://gist.github.com/JanMarvin/db0d4081c70f3a1c6982140c174a5642, you should be able to source it from R (this includes the example data).

source("https://gist.githubusercontent.com/JanMarvin/db0d4081c70f3a1c6982140c174a5642/raw/71ac5b47547150a38c899ed8dfeaab3a25b9609a/wb_filter_data_table.R")
JanMarvin commented 15 hours ago

I've updated the gist. To reflect what we already have in wb_add_pivot_table() with choose. So the syntax slightly changed and is now something like this:

wb_filter_data_table(x = mtcars, choose = c(cyl = "x %in% c(4, 8)", am = c("x != 1")))

Filtering works with numerics, characters and dates as well. So yay!