nathaneastwood / poorman

A poor man's dependency free grammar of data manipulation
https://nathaneastwood.github.io/poorman/
Other
338 stars 15 forks source link

Implement pivot_wider and pivot_longer #98

Closed etiennebacher closed 2 years ago

etiennebacher commented 2 years ago

@nathaneastwood it was faster than I thought to implement this. I basically copied the code from datawizard functions, changed the select helpers and removed other datawizard functions. Not all arguments are implemented but I think it's a good start. Below are some examples to check that poorman and tidyr outputs match (these are also implemented in tests).

Things to note:

  1. one important difference with tidyr::pivot_wider is about the NSE. For example, the following works with tidyr but not with poorman:

    us_rent_income %>%
    pivot_wider(
      names_from = variable,
      names_sep = ".",
      values_from = c(estimate, moe)
    )

    To make it work with poorman, we need to use quoted names. Maybe I simply missed something about how you deal with NSE in your package.

  2. I added tidyr in Suggests and loaded it in the tests for pivot_ functions but it has some side effects on other tests because of namespace collisions. I think it is useful to test that the outputs of tidyr and poorman match so I'd like to keep these but I don't know how to deal with these side effects (never used tinytest before). What do you think?

  3. I'm not very familiar with your coding style so let me know if I should improve the syntax in some places (or you can just modify this PR directly).

Once everything is addressed, I can add some bullet points in NEWS and bump the package version (if necessary).

Close #47, close #48.


Examples for pivot_longer

suppressPackageStartupMessages({
  library(tidyr)
  library(dplyr)
  library(poorman)
})

##### Example 1

tidyr1 <- relig_income %>% 
  tidyr::pivot_longer(!religion, names_to = "income", values_to = "count")

poorman1 <- relig_income %>% 
  poorman::pivot_longer(!religion, names_to = "income", values_to = "count")

identical(poorman1, tidyr1)
#> [1] TRUE

##### Example 2

tidyr2 <- billboard %>% 
  tidyr::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank"
  )

poorman2 <- billboard %>% 
  poorman::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank"
  )

identical(poorman2, tidyr2)
#> [1] TRUE

##### Example 3

tidyr3 <- billboard %>% 
  tidyr::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank",
    values_drop_na = TRUE
  )

poorman3 <- billboard %>% 
  poorman::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    values_to = "rank",
    values_drop_na = TRUE
  )

identical(poorman3, tidyr3)
#> [1] TRUE

##### Example 4

tidyr4 <- billboard %>% 
  tidyr::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    names_prefix = "wk",
    values_to = "rank",
    values_drop_na = TRUE
  )

poorman4 <- billboard %>% 
  poorman::pivot_longer(
    cols = starts_with("wk"), 
    names_to = "week", 
    names_prefix = "wk",
    values_to = "rank",
    values_drop_na = TRUE
  )

identical(poorman4, tidyr4)
#> [1] TRUE

##### Example 5

tidyr5 <- who |> 
  tidyr::pivot_longer(
    cols = 5:60,
    names_to = c("diagnosis", "gender", "age"),
    names_sep = "_",
    values_to = "count"
  )
#> Warning: Expected 3 pieces. Missing pieces filled with `NA` in 14 rows [43, 44,
#> 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56].

poorman5 <- who %>% 
  poorman::pivot_longer(
    cols = 5:60,
    names_to = c("diagnosis", "gender", "age"),
    names_sep = "_",
    values_to = "count"
  )

identical(poorman5, tidyr5)
#> [1] TRUE

##### Example 6

tidyr6 <- who |> 
  tidyr::pivot_longer(
    cols = new_sp_m014:newrel_f65,
    names_to = c("diagnosis", "gender", "age"),
    names_pattern = "new_?(.*)_(.)(.*)",
    values_to = "count"
  )

poorman6 <- who %>% 
  poorman::pivot_longer(
    cols = 5:60,
    names_to = c("diagnosis", "gender", "age"),
    names_pattern = "new_?(.*)_(.)(.*)",
    values_to = "count"
  )

identical(poorman6, tidyr6)
#> [1] TRUE

Created on 2022-07-28 by the reprex package (v2.0.1)

Examples for pivot_wider()

suppressPackageStartupMessages({
  library(tidyr)
  library(dplyr)
  library(poorman)
})

##### Example 1

tidyr1 <- fish_encounters |> 
  tidyr::pivot_wider(
    names_from = "station", 
    values_from = "seen", 
    values_fill = 0
  )

poorman1 <- fish_encounters %>% 
  poorman::pivot_wider(
    names_from = "station", 
    values_from = "seen",
    values_fill = 0
  )

identical(poorman1, tidyr1)
#> [1] FALSE
# Not identical because of the class of the columns

##### Example 2

production <- expand_grid(
  product = c("A", "B"),
  country = c("AI", "EI"),
  year = 2000:2014
) %>%
  filter((product == "A" & country == "AI") | product == "B") %>%
  mutate(production = rnorm(nrow(.)))

production
#> # A tibble: 45 × 4
#>    product country  year production
#>    <chr>   <chr>   <int>      <dbl>
#>  1 A       AI       2000    -0.224 
#>  2 A       AI       2001    -0.702 
#>  3 A       AI       2002     0.0199
#>  4 A       AI       2003     1.33  
#>  5 A       AI       2004     1.68  
#>  6 A       AI       2005     0.454 
#>  7 A       AI       2006    -0.427 
#>  8 A       AI       2007    -2.17  
#>  9 A       AI       2008     1.78  
#> 10 A       AI       2009     1.08  
#> # … with 35 more rows
#> # ℹ Use `print(n = ...)` to see more rows

tidyr2 <- production %>%
  tidyr::pivot_wider(
    names_from = c(product, country),
    values_from = production
  )

poorman2 <- production %>%
  poorman::pivot_wider(
    names_from = c("product", "country"),
    values_from = "production"
  )

identical(poorman2, tidyr2)
#> [1] TRUE

##### Example 3

tidyr3 <- us_rent_income %>%
  tidyr::pivot_wider(
    names_from = variable,
    values_from = c(estimate, moe)
  )

poorman3 <- us_rent_income %>%
  poorman::pivot_wider(
    names_from = "variable",
    values_from = c("estimate", "moe")
  )

identical(poorman3, tidyr3)
#> [1] TRUE

##### Example 4

tidyr4 <- us_rent_income %>%
  tidyr::pivot_wider(
    names_from = variable,
    names_sep = ".",
    values_from = c(estimate, moe)
  )

poorman4 <- us_rent_income %>%
  poorman::pivot_wider(
    names_from = "variable",
    names_sep = ".",
    values_from = c("estimate", "moe")
  )

identical(poorman4, tidyr4)
#> [1] TRUE

##### Example 5

contacts <- tribble(
  ~field, ~value,
  "name", "Jiena McLellan",
  "company", "Toyota", 
  "name", "John Smith", 
  "company", "google", 
  "email", "john@google.com",
  "name", "Huxley Ratcliffe"
) %>% 
  mutate(
    person_id = cumsum(field == "name")
  )

tidyr5 <- contacts %>% 
  tidyr::pivot_wider(names_from = field, values_from = value)

poorman5 <- contacts %>% 
  poorman::pivot_wider(names_from = "field", values_from = "value")

identical(poorman5, tidyr5)
#> [1] TRUE

Created on 2022-07-28 by the reprex package (v2.0.1)