tidyverse / purrr

A functional programming toolkit for R
https://purrr.tidyverse.org/
Other
1.28k stars 272 forks source link

map2() call in dplyr::mutate() error while standalone map2() call works #541

Closed leungi closed 6 years ago

leungi commented 6 years ago

Please refer to reprex below.

library(tidyverse)

data <- tibble::tribble(
  ~phase,      ~fc_dt,        ~oil,          ~q,        ~gas,  ~t,  ~mean,
  "water",  "4/1/2017", 4.602535714,     1.70775, 73.55432143,  1L, 10.60021233,
  "water",  "5/1/2017", 4.414064516, 5.845258065, 48.88796774,  2L,  7.86293583,
  "water",  "6/1/2017", 3.215866667, 4.149533333, 43.78976667,  3L,  6.47610122
)

data %>% 
  nest(-phase) -> nest_data

nest_data %>%
  mutate(test = map2(data, phase, ~ rename(.x, !!.y := mean)))
#> Error in quos(...): object '.y' not found

map2(nest_data$data, nest_data$phase, ~ rename(.x, !!.y := mean))
#> [[1]]
#> # A tibble: 3 x 6
#>   fc_dt      oil     q   gas     t water
#>   <chr>    <dbl> <dbl> <dbl> <int> <dbl>
#> 1 4/1/2017  4.60  1.71  73.6     1 10.6 
#> 2 5/1/2017  4.41  5.85  48.9     2  7.86
#> 3 6/1/2017  3.22  4.15  43.8     3  6.48

Created on 2018-08-28 by the reprex package (v0.2.0).

batpigandme commented 6 years ago

So, with that mutate() call, I assume you're trying to do the equivalent of assigning the output of the map2() at the end to a variable in the nest_data dataframe called test, right?

library(tidyverse)

data <- tibble::tribble(
  ~phase,      ~fc_dt,        ~oil,          ~q,        ~gas,  ~t,  ~mean,
  "water",  "4/1/2017", 4.602535714,     1.70775, 73.55432143,  1L, 10.60021233,
  "water",  "5/1/2017", 4.414064516, 5.845258065, 48.88796774,  2L,  7.86293583,
  "water",  "6/1/2017", 3.215866667, 4.149533333, 43.78976667,  3L,  6.47610122
)

nest_data <- data %>% 
  nest(-phase)

nest_data
#> # A tibble: 1 x 2
#>   phase data            
#>   <chr> <list>          
#> 1 water <tibble [3 × 6]>

nested2 <- nest_data %>%
  mutate(test = map2(data, phase, ~ rename(.x, !!expr(.y) := mean)))

nest_data$test <- map2(nest_data$data, nest_data$phase, ~ rename(.x, !!.y := mean))

str(nest_data)
#> Classes 'tbl_df', 'tbl' and 'data.frame':    1 obs. of  3 variables:
#>  $ phase: chr "water"
#>  $ data :List of 1
#>   ..$ :Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of  6 variables:
#>   .. ..$ fc_dt: chr  "4/1/2017" "5/1/2017" "6/1/2017"
#>   .. ..$ oil  : num  4.6 4.41 3.22
#>   .. ..$ q    : num  1.71 5.85 4.15
#>   .. ..$ gas  : num  73.6 48.9 43.8
#>   .. ..$ t    : int  1 2 3
#>   .. ..$ mean : num  10.6 7.86 6.48
#>  $ test :List of 1
#>   ..$ :Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of  6 variables:
#>   .. ..$ fc_dt: chr  "4/1/2017" "5/1/2017" "6/1/2017"
#>   .. ..$ oil  : num  4.6 4.41 3.22
#>   .. ..$ q    : num  1.71 5.85 4.15
#>   .. ..$ gas  : num  73.6 48.9 43.8
#>   .. ..$ t    : int  1 2 3
#>   .. ..$ water: num  10.6 7.86 6.48
str(nested2)
#> Classes 'tbl_df', 'tbl' and 'data.frame':    1 obs. of  3 variables:
#>  $ phase: chr "water"
#>  $ data :List of 1
#>   ..$ :Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of  6 variables:
#>   .. ..$ fc_dt: chr  "4/1/2017" "5/1/2017" "6/1/2017"
#>   .. ..$ oil  : num  4.6 4.41 3.22
#>   .. ..$ q    : num  1.71 5.85 4.15
#>   .. ..$ gas  : num  73.6 48.9 43.8
#>   .. ..$ t    : int  1 2 3
#>   .. ..$ mean : num  10.6 7.86 6.48
#>  $ test :List of 1
#>   ..$ :Classes 'tbl_df', 'tbl' and 'data.frame': 3 obs. of  6 variables:
#>   .. ..$ fc_dt: chr  "4/1/2017" "5/1/2017" "6/1/2017"
#>   .. ..$ oil  : num  4.6 4.41 3.22
#>   .. ..$ q    : num  1.71 5.85 4.15
#>   .. ..$ gas  : num  73.6 48.9 43.8
#>   .. ..$ t    : int  1 2 3
#>   .. ..$ .y   : num  10.6 7.86 6.48

Created on 2018-08-28 by the reprex package (v0.2.0.9000).

leungi commented 6 years ago

Thanks for prompt help @batpigandme.

You're very close, except the column name desired is the one corresponding to phase (which is water) in this case.

In nested2, it's taking the literal .y.

markdly commented 6 years ago

In case it helps, I think the expected_output can be achieved with a non tidyeval workaround:

library(tidyverse)

data <- tibble::tribble(
  ~phase,      ~fc_dt,        ~oil,          ~q,        ~gas,  ~t,  ~mean,
  "water",  "4/1/2017", 4.602535714,     1.70775, 73.55432143,  1L, 10.60021233,
  "water",  "5/1/2017", 4.414064516, 5.845258065, 48.88796774,  2L,  7.86293583,
  "water",  "6/1/2017", 3.215866667, 4.149533333, 43.78976667,  3L,  6.47610122
)

nest_data <- data %>% nest(-phase)

expected_output <- nest_data %>%
  mutate(test = map2(data, phase, function(x, y) {
    names(x) <- str_replace(names(x), "^mean$", y)
    x
  })) 

expected_output %>% unnest(test)
#> # A tibble: 3 x 7
#>   phase fc_dt      oil     q   gas     t water
#>   <chr> <chr>    <dbl> <dbl> <dbl> <int> <dbl>
#> 1 water 4/1/2017  4.60  1.71  73.6     1 10.6 
#> 2 water 5/1/2017  4.41  5.85  48.9     2  7.86
#> 3 water 6/1/2017  3.22  4.15  43.8     3  6.48

Created on 2018-08-29 by the reprex package (v0.2.0).

cderv commented 6 years ago

There is something going on with !! when use inside mutate and map directly.

If you use a defined function, it is working. !!.y evaluates to correct value.

library(tidyverse)

data <- tibble::tribble(
  ~phase,      ~fc_dt,        ~oil,          ~q,        ~gas,  ~t,  ~mean,
  "water",  "4/1/2017", 4.602535714,     1.70775, 73.55432143,  1L, 10.60021233,
  "water",  "5/1/2017", 4.414064516, 5.845258065, 48.88796774,  2L,  7.86293583,
  "water",  "6/1/2017", 3.215866667, 4.149533333, 43.78976667,  3L,  6.47610122
)

data %>% 
  nest(-phase) -> nest_data

# create a function
rename_custom <-function(tab, name) {
  rename(tab, !!name := mean)
}

# working
nest_data %>%
  mutate(test = map2(data, phase, rename_custom)) %>%
  pull(test)
#> [[1]]
#> # A tibble: 3 x 6
#>   fc_dt      oil     q   gas     t water
#>   <chr>    <dbl> <dbl> <dbl> <int> <dbl>
#> 1 4/1/2017  4.60  1.71  73.6     1 10.6 
#> 2 5/1/2017  4.41  5.85  48.9     2  7.86
#> 3 6/1/2017  3.22  4.15  43.8     3  6.48

Created on 2018-08-29 by the reprex package (v0.2.0).

I think mutate will use dplyr::named_quos on the all expression, and will try to unquote the !!.y. However, before anything goes on with purrr map2, .y does not exist yet. So not found, hence the error. when using the named function created outside, there is no !! in the expression, so rename_custom is evaluated when map2 is run. So it's working.

dplyr:::named_quos(test = map2(data, phase, rename_custom))
#> $test
#> <quosure>
#>   expr: ^map2(data, phase, rename_custom)
#>   env:  global
dplyr:::named_quos(test = map2(data, phase, ~ {rename(.x, !!.y := mean)}))
#> Error in quos(...): objet '.y' introuvable
.y <- 3
dplyr:::named_quos(test = map2(data, phase, ~ {rename(.x, !!.y := mean)}))
#> $test
#> <quosure>
#>   expr: ^map2(data, phase, ~{
#>           rename(.x, 3 := mean)
#>         })
#>   env:  global

Hope it helps isolate the issue.

leungi commented 6 years ago

@markdly, @cderv, thanks for the solutions!

The explanation by @cderv is especially useful :) Good to know mutate() behaviour; wondering if this is by design or en enhancement is in order.