ropensci / visdat

Preliminary Exploratory Visualisation of Data
https://docs.ropensci.org/visdat/
Other
453 stars 47 forks source link

implement vis_dat_ly, extending from this code #36

Open njtierney opened 7 years ago

njtierney commented 7 years ago

vis_dat_ly is not working at the moment, for reasons that I don't fully understand, so I'm going to dump the code here so I don't forget it. I would like to avoid unused, untested code in visdat.

#' Produces an interactive visualisation of a data.frame to tell you what it contains.
#'
#' \code{vis_dat_ly} uses plotly to provide an interactive version of vis_dat, providing an at-a-glance plotly object of what is inside a dataframe. Cells are coloured according to what class they are and whether the values are missing.
#'
#' @param x a \code{data.frame}
#'
#' @return a \code{plotly} object
#'
#' @examples
#'
#' \dontrun{
#' # currently does not work, some problems with palletes and other weird messages.
#' vis_dat_ly(airquality)
#'
#'}
#'
#'
vis_dat_ly <- function(x) {

  # x = data.frame(x = 1L:10L,
  #                y = letters[1:10],
  #                z = runif(10))

  # apply the fingerprint function to get the class
  d <- x %>% purrr::dmap(fingerprint) %>% as.matrix()

  # heatmap fails due to not being a numeric matrix
  # heatmap(d)

  # plotly fails due to the number of colours being too many?
  plotly::plot_ly(z = d,
                  type = "heatmap")

}
njtierney commented 7 years ago

OK, here is the current progress

library(visdat)
library(magrittr)
x = data.frame(x = 1L:10L,
               y = letters[1:10],
               z = runif(10))

d <- x %>%
  purrr::map_df(visdat:::fingerprint) %>%
  dplyr::mutate(rows = seq_len(nrow(.))) %>%
  tidyr::gather_(key_col = "variable",
                 value_col = "valueType",
                 gather_cols = names(.)[-length(.)]) %>%
  # dplyr::mutate(value = vis_extract_value_(x))
  dplyr::mutate(value = dplyr::case_when(
    valueType == "integer" ~ 1L,
    valueType == "factor" ~ 2L,
    valueType == "numeric" ~ 3L
  ))

plotly::plot_ly(d,
                x = ~variable,
                y = ~rows,
                z = ~value) %>%
  plotly::add_heatmap()

From my experimentation, It appears that I need to provide a numeric number for the "class" - I can't use the categorical class. Unless @cpsievert has any thoughts?

Carson, some context: I'm working on making the vis_* family fully in plot_ly, as calling ggplot2::ggplotly is awesome, but slow for these kind of plots.

Note - taking examples from: https://plotly-book.cpsievert.me/d-frequencies.html

cpsievert commented 7 years ago

If it were me, I'd try using heatmapgl (for performance) with showscale=FALSE and a custom colorscale (see fig 2.5 here). Then, for a "legend", I'd use shapes & annotations

njtierney commented 6 years ago

See #25 for reference, closing that issue to avoid duplication

njtierney commented 6 years ago

Here is another attempt at this, I don't have time to fix this up for the 0.5.0 release.

library(visdat)
library(magrittr)
x <- data.frame(x = 1L:10L,
                y = letters[1:10],
                z = runif(10))
n <- nrow(x)
rows <- rep(1:nrow(x),ncol(x))
vars <- rep(colnames(x), each = n)

txt <- matrix(paste(sprintf("value = %s", as.matrix(x)),
                    sprintf("variable = %s", vars),
                    sprintf("row = %s", rows),
                    sep = "<br />"),
              nrow = nrow(x))

d <- x %>%
  purrr::map_df(visdat:::fingerprint) %>%
  dplyr::mutate(rows = seq_len(nrow(.))) %>%
  tidyr::gather_(key_col = "variable",
                 value_col = "valueType",
                 gather_cols = names(.)[-length(.)]) %>%
  # dplyr::mutate(value = vis_extract_value_(x))
  dplyr::mutate(value = dplyr::case_when(
    valueType == "integer" ~ 1L,
    valueType == "factor" ~ 2L,
    valueType == "numeric" ~ 3L
  ))

# get class++ - classes plus is it missing?
whatsit <- function(x){
  dplyr::if_else(condition = is.na(x),
                 true = "NA",
                 false = class(x))
}

whatsit_v <- Vectorize(whatsit)

what_is_it_really <- whatsit_v(x)

categories <- unique(as.character(what_is_it_really))

n_categories <- length(categories)

discretize_colorscale <- function(palette, granularity = 100) {
  n <- length(palette)
  colorscale <- data.frame(range = seq(0, n, length.out = n*granularity),
                           color = rep(palette, each = granularity))

  setNames(colorscale, NULL)
}

plotly::plot_ly(d,
                x = ~variable,
                text = txt,
                y = ~rows,
                z = ~value,
                colorscale = n_categories,
                type = "heatmap",
                colorscale = discretize_colorscale(
                  palette = viridisLite::viridis(n_categories),
                  granularity = 20000
                )
) %>%
  plotly::colorbar(tickmode = "array",
                   ticktext = c(categories),
                   tickvals = 1:3,
                   len = 0) %>%
  plotly::layout(xaxis = list(side = "top"),
                 yaxis = list(autorange = "reversed"),
                 legend = list(orientation = 'h')
  )

Created on 2018-06-04 by the reprex package (v0.2.0).

going to move this to version 0.6.0 for the moment - add a note to remove this function from release at #81

cpsievert commented 6 years ago

I think you want

range = seq(0, 1, length.out = n*granularity),

not

range = seq(0, n, length.out = n*granularity),

also, here is another way to do this with a legend instead of a colorbar:


library(plotly)
library(htmlwidgets)

pal <- viridisLite::viridis(n_categories)
cols <- discretize_colorscale(
  palette = pal,
  granularity = 20000
)

p <- plot_ly() 

for (i in seq_along(categories)) {
  p <- add_markers(
    p, x = names(x)[[1]], y = 1, color = I(pal[[i]]), 
    name = categories[[i]], hoverinfo = "none", symbol = I(15),
    visible = "legendonly"
  )
}

p <- add_heatmap(
    p, data = d,
    x = ~variable,
    text = txt,
    y = ~rows,
    z = ~value,
    colorscale = cols,
    showscale = F
  ) %>%
  layout(
    xaxis = list(side = "top"),
    yaxis = list(autorange = "reversed"),
    legend = list(orientation = "h")
  )

# disable legend clicking https://github.com/plotly/plotly.js/issues/665
onRender(p, "
  function(el, x) {
    el.on('plotly_legendclick', function(x) { return false; })
  }
")
njtierney commented 6 years ago

Thanks for that, Carson - really appreciate it!

This looks much better, although there are some issues with NA values not appearing on mouseover - I think that this would have to do with the code I wrote that creates txt.

I will come back to this at another time for version 0.6.0

library(plotly)
#> Loading required package: ggplot2
#> 
#> Attaching package: 'plotly'
#> The following object is masked from 'package:ggplot2':
#> 
#>     last_plot
#> The following object is masked from 'package:stats':
#> 
#>     filter
#> The following object is masked from 'package:graphics':
#> 
#>     layout
library(htmlwidgets)

# x <- data.frame(x = 1L:10L,
#                 y = letters[1:10],
#                 z = runif(10))

x <- airquality

n <- nrow(x)
rows <- rep(1:nrow(x),ncol(x))
vars <- rep(colnames(x), each = n)

# get class++ - classes plus is it missing?
whatsit <- function(x){
  dplyr::if_else(condition = is.na(x),
                 true = "NA",
                 false = class(x))
}

whatsit_v <- Vectorize(whatsit)

what_is_it_really <- whatsit_v(x)

categories <- unique(as.character(what_is_it_really))

n_categories <- length(categories)

pal <- viridisLite::viridis(n_categories)

discretize_colorscale <- function(palette, granularity = 100) {
  n <- length(palette)
  colorscale <- data.frame(range = seq(from = 0, 
                                       to = 1, 
                                       length.out = n*granularity),
                           color = rep(palette, each = granularity))

  setNames(colorscale, NULL)
}

cols <- discretize_colorscale(
  palette = pal,
  granularity = 20000
)

txt <- matrix(paste(sprintf("value = %s", as.matrix(x)),
                    sprintf("variable = %s", vars),
                    sprintf("row = %s", rows),
                    sep = "<br />"),
              nrow = nrow(x))

p <- plot_ly() 

for (i in seq_along(categories)) {
  p <- add_markers(
    p, x = names(x)[[1]], y = 1, color = I(pal[[i]]), 
    name = categories[[i]], hoverinfo = "none", symbol = I(15),
    visible = "legendonly"
  )
}

d <- x %>%
  purrr::map_df(visdat:::fingerprint) %>%
  dplyr::mutate(rows = seq_len(nrow(.))) %>%
  tidyr::gather_(key_col = "variable",
                 value_col = "valueType",
                 gather_cols = names(.)[-length(.)]) %>%
  # dplyr::mutate(value = vis_extract_value_(x))
  dplyr::mutate(value = dplyr::case_when(
    valueType == "integer" ~ 1L,
    valueType == "factor" ~ 2L,
    valueType == "numeric" ~ 3L
  ))

p <- add_heatmap(
  p, data = d,
  x = ~variable,
  text = txt,
  y = ~rows,
  z = ~value,
  colorscale = cols,
  showscale = F
) %>%
  layout(
    xaxis = list(side = "top"),
    yaxis = list(autorange = "reversed"),
    legend = list(orientation = "h")
  )

# disable legend clicking https://github.com/plotly/plotly.js/issues/665
onRender(p, "
  function(el, x) {
    el.on('plotly_legendclick', function(x) { return false; })
  }
")

Created on 2018-06-05 by the reprex package (v0.2.0).