Closed JanMarvin closed 2 days ago
A working draft
library(openxlsx2)
wb <- wb_workbook()$
add_worksheet()$
add_data(x = mtcars)$
add_filter(rows = 1, cols = seq_along(mtcars))
# filterColumn c-vector
# operator equal, greaterThan, greaterThanOrEqual, lessThan, lessThanOrEqual, notEqual
autofilter <-
'<autoFilter ref="A1:K33">
<filterColumn colId="0">
<customFilters>
<customFilter operator="greaterThan" val="20" />
</customFilters>
</filterColumn>
<filterColumn colId="1">
<filters>
<filter val = "4"/>
</filters>
</filterColumn>
</autoFilter>'
wb$worksheets[[1]]$autoFilter <- autofilter
wb$worksheets[[1]]$sheetPr <- '<sheetPr filterMode="1" />'
# # not really required?
# wb$workbook$definedNames <- "<definedName name=\"_xlnm._FilterDatabase\" localSheetId=\"0\" hidden=\"1\">'Sheet 1'!$A$1:$K$33</definedName>"
dt <- wb_data(wb)
to_show <- rownames(dt[dt$mpg > 20 & dt$cyl == 4,])
to_hide <- rownames(dt)[!rownames(dt) %in% to_show]
# hide rows
rows <- wb$worksheets[[1]]$sheet_data$row_attr
rows[rownames(rows) %in% to_hide, "hidden"] <- "1"
wb$worksheets[[1]]$sheet_data$row_attr <- rows
Needs to update sortState
too
library(openxlsx2)
wb <- write_xlsx(x = mtcars, as_table = TRUE)
rows <- dims_to_dataframe(wb_dims(rows = which(mtcars$cyl != 6) + 1, cols = seq_along(mtcars)), fill = TRUE)$A %>%
gsub("\\D+", "", .) %>%
as.integer() %>%
.[complete.cases(.)]
wb$set_row_heights(rows = rows, hidden = TRUE)
auto_filter <- '<autoFilter ref="A1:K33">
<filterColumn colId="1">
<filters>
<filter val="6" />
</filters>
</filterColumn>
</autoFilter>'
wb$tables$tab_xml[1] <- gsub(
pattern = '<autoFilter ref="A1:K33"/>',
replacement = auto_filter,
x = wb$tables$tab_xml[1]
)
if (interactive()) wb$open()
A more complete function to create data table filters. Allows for filters !=
, <
, <=
, ==
, %in%
, >=
, and >
:
#' function to create the autofilter xml structure required for openxml
#' @param tabs a wbWorkbook wb$tables object
#' @param tab_name a table name in the workbook
#' @param conditions a named list with conditions to apply to the columns
prepare <- function(tabs, tab_name, conditions) {
if (!tab_name %in% tabs$tab_name) stop("Table with this tab_name not found.")
tabs <- tabs[tabs$tab_name == tab_name, , drop = FALSE]
vars <- openxlsx2:::rbindlist(
xml_attr(tabs$tab_xml, "table", "tableColumns", "tableColumn")
)
cond_vars <- names(conditions)
if (any(!cond_vars %in% vars$name)) stop("Condtion variable not found in table.")
ref <- openxlsx2:::rbindlist(xml_attr(tabs$tab_xml, "table"))$ref
autoFilter <- xml_node_create("autoFilter", xml_attributes = c(ref = ref))
vars$id <- as.integer(vars$id) - 1L
aF <- NULL
for (cond_var in cond_vars) {
colId <- vars$id[vars$name == cond_var]
condition <- conditions[[cond_var]]
## == default or %in% with multiple values
if (condition$operator == "equal") {
filter <- vapply(
condition$val,
function(x) {
xml_node_create("filter", xml_attributes = c(val = openxlsx2:::as_xml_attr(x)))
},
FUN.VALUE = NA_character_
)
filters <- xml_node_create("filters")
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))
filters <- xml_add_child(filters, filter)
filterColumn <- xml_add_child(filterColumn, filters)
}
## != notEqual, < lessThan, <= lessThanOrEqual, > greaterThan, >= greaterThanOrEqual
## TODO provide replacement function
ops <- c("notEqual", "lessThan", "lessThanOrEqual", "greaterThan", "greaterThanOrEqual")
if (condition$operator %in% ops) {
customFilter <- vapply(
condition$val,
function(x) {
xml_node_create(
"customFilter",
xml_attributes = c(operator = condition$operator,
val = openxlsx2:::as_xml_attr(x))
)
},
FUN.VALUE = NA_character_
)
customFilters <- xml_node_create("customFilters")
filterColumn <- xml_node_create("filterColumn", xml_attributes = c(colId = openxlsx2:::as_xml_attr(colId)))
customFilters <- xml_add_child(customFilters, customFilter)
filterColumn <- xml_add_child(filterColumn, customFilters)
}
aF <- c(aF, filterColumn)
}
autoFilter <- xml_add_child(autoFilter, aF)
autoFilter
}
#' helper to parse the string into something that can be converted into openxml
#' column filters
#' @param filter_expr an expression like "x$cyl != 4"
create_conditions <- function(filter_expr) {
conditions <- list()
# Define a helper function to add conditions to the list
add_condition <- function(column, operator, value) {
conditions[[column]] <<- list(val = value, operator = operator)
}
# should only have &
fltr_xprs <- strsplit(filter_expr, "&")[[1]]
for (fltr_xpr in fltr_xprs) {
# Extract conditions for "equal"
if (grepl("%in%", fltr_xpr)) {
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
values <- as.numeric(unlist(strsplit(gsub(".*%in% c\\(([^)]*)\\).*", "\\1", fltr_xpr), ", ")))
add_condition(column_name, "equal", values)
}
# Extract conditions for "equal" using ==
if (grepl("==", fltr_xpr)) {
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*== (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "equal", c(value))
}
# Extract conditions for "notEqual"
if (grepl("!=", fltr_xpr)) {
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*!= (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "notEqual", c(value))
}
# Extract conditions for "lessThan"
if (grepl("<[^=]", fltr_xpr)) { # "<" but not "<="
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*< (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "lessThan", c(value))
}
# Extract conditions for "lessThanOrEqual"
if (grepl("<=", fltr_xpr)) {
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*<= (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "lessThanOrEqual", c(value))
}
# Extract conditions for "greaterThan"
if (grepl(">[^=]", fltr_xpr)) { # ">" but not ">="
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*> (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "greaterThan", c(value))
}
# Extract conditions for "greaterThanOrEqual"
if (grepl(">=", fltr_xpr)) {
column_name <- sub(".*\\$([a-zA-Z0-9_]+) .*", "\\1", fltr_xpr)
value <- as.numeric(sub(".*>= (\\d+).*", "\\1", fltr_xpr))
add_condition(column_name, "greaterThanOrEqual", c(value))
}
}
conditions
}
#' get rows to hide (in _all but not in _sel)
#' @param wb a wbWorkbook
#' @param tab_len an index for a table object in the workbook
#' @param filter the filter as character string
rows_to_hide <- function(wb, tab_len, filter) {
x <- wb_to_df(wb, sheet = wb$tables$tab_sheet[tab_len], dims = wb$tables$tab_ref[tab_len])
rows_all <- rownames(x)
x <- x[eval(parse(text = filter)), ]
rows_sel <- rownames(x)
out <- rows_all[!rows_all %in% rows_sel]
as.integer(out)
}
#' function to apply data table filter
#' @param filter a base R statement to filter an object "x$cyl == 4"
#' @inheritParams openxlsx2::wb_add_data_table
wb_filter_data_table <- function(wb, sheet = current_sheet(), x, dims = "A1", filter = NULL, ...) {
openxlsx2:::assert_workbook(wb)
## add the data table object
wb$add_data_table(wb, sheet = sheet, x = x, dims = dims, ...)
if (!is.null(filter)) {
openxlsx2:::assert_class(filter, "character")
## assume that the table created is the last table added to the workbook
tab_len <- length(wb$tables$tab_xml)
### prepare condition list & autofilter xml
fltr <- create_conditions(filter)
new_autofilter <- prepare(tabs = wb$tables, tab_name = wb$tables$tab_name[tab_len], conditions = fltr)
sel <- rows_to_hide(wb, tab_len, filter)
# hide rows
wb$set_row_heights(rows = sel, hidden = TRUE)
# add auto filter to table
wb$tables$tab_xml[tab_len] <- gsub(
pattern = xml_node(wb$tables$tab_xml[tab_len], "table", "autoFilter"),
replacement = new_autofilter,
x = wb$tables$tab_xml[tab_len]
)
}
invisible(wb)
}
library(openxlsx2)
wb <- wb_workbook() %>%
wb_add_worksheet() %>%
wb_filter_data_table(x = mtcars, filter = "x$cyl %in% c(4, 8) & x$am != 1")
if (interactive()) wb$open()
This was not tested with characters or dates and not how it works with all the complex things like multiple tables next to each other on a sheet or a full page filter. Tested was only the case in the bottom of the function.
@jwhendy, since my initial post I've spent a couple of hours on the functions above. You might give this a try. It should work for basic numeric/integer data, but as stated probably for not much else.
The filter passed to wb_filter_data_table()
must reference the object as x
(because we have to run some internal evaluation of the filter string. If you try it out, please let me know what works. If we can bring it to at least a semi stable state, I might consider adding the filter to the wb_add_data_table()
function.
wb_filter_data_table(x = mtcars, filter = "x$cyl %in% c(4, 8) & x$am != 1")
I've created a gist here: https://gist.github.com/JanMarvin/db0d4081c70f3a1c6982140c174a5642, you should be able to source it from R (this includes the example data).
source("https://gist.githubusercontent.com/JanMarvin/db0d4081c70f3a1c6982140c174a5642/raw/71ac5b47547150a38c899ed8dfeaab3a25b9609a/wb_filter_data_table.R")
I've updated the gist. To reflect what we already have in wb_add_pivot_table()
with choose
. So the syntax slightly changed and is now something like this:
wb_filter_data_table(x = mtcars, choose = c(cyl = "x %in% c(4, 8)", am = c("x != 1")))
Filtering works with numerics, characters and dates as well. So yay!
https://github.com/ycphs/openxlsx/issues/76
this should be simpler to do now. row_attr is a data frame, just needs a check which rows to hide