tidyverse / lubridate

Make working with dates in R just that little bit easier
https://lubridate.tidyverse.org
GNU General Public License v3.0
728 stars 207 forks source link

A/a in orders argument produces unexpected failures interacting with presence of B/b #1104

Closed jmobrien closed 1 year ago

jmobrien commented 1 year ago

When A/a are explicitly provided to the orders argument, they sometimes can produce issues despite the specification being otherwise correct:

While I realize that, in practice, neither "A" nor "a" actually need to be present in the default case, it does seem strange that including them causes such failures.

require(tidyverse, quietly = TRUE)
require(lubridate, quietly = TRUE)
#> 
#> Attaching package: 'lubridate'
#> The following objects are masked from 'package:base':
#> 
#>     date, intersect, setdiff, union

# Build data frame for testing:

# Date with full and abbreviated weekday name:
full <- "Wednesday Apr 24 13:45:07 GMT-0500 2019"
abbrev <- "Wed Apr 24 13:45:07 GMT-0500 2019"
entries <- 
  data.frame(
    type = c("fullname", "abbrev"),
    datestring = c(full, abbrev)
  )

# Order options:
orders <- 
  c(
    # "A" & "a" (which docs say should behave identically to each other), plus "B"
    "aBdHMSzY","aBdHMSzY","ABdHMSzY","ABdHMSzY",
    # Same, but with "b" instead (which, again, should behave identically to "B")
    "abdHMSzY","abdHMSzY","AbdHMSzY","AbdHMSzY",
    # Versions without A/a
    "BdHMSzY","BdHMSzY","BdHMSzY","BdHMSzY",
    "bdHMSzY","bdHMSzY","bdHMSzY","bdHMSzY"
  )

# Testing df:
testing_df <- 
  expand_grid(entries, orders)

# Testing what works for parsing or not:
test_parse <- 
  testing_df |> 
  mutate(
    date_parse = 
      suppressWarnings(map2_dbl(datestring, orders, parse_date_time)) |> 
      as.POSIXct(origin = "1960-01-01")
  )

print(test_parse, n =  Inf)
#> # A tibble: 32 × 4
#>    type     datestring                              orders   date_parse         
#>    <chr>    <chr>                                   <chr>    <dttm>             
#>  1 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 aBdHMSzY NA                 
#>  2 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 aBdHMSzY NA                 
#>  3 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 ABdHMSzY NA                 
#>  4 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 ABdHMSzY NA                 
#>  5 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 abdHMSzY 2009-04-23 13:45:07
#>  6 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 abdHMSzY 2009-04-23 13:45:07
#>  7 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 AbdHMSzY 2009-04-23 13:45:07
#>  8 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 AbdHMSzY 2009-04-23 13:45:07
#>  9 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 BdHMSzY  2009-04-23 13:45:07
#> 10 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 BdHMSzY  2009-04-23 13:45:07
#> 11 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 BdHMSzY  2009-04-23 13:45:07
#> 12 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 BdHMSzY  2009-04-23 13:45:07
#> 13 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 bdHMSzY  2009-04-23 13:45:07
#> 14 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 bdHMSzY  2009-04-23 13:45:07
#> 15 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 bdHMSzY  2009-04-23 13:45:07
#> 16 fullname Wednesday Apr 24 13:45:07 GMT-0500 2019 bdHMSzY  2009-04-23 13:45:07
#> 17 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       aBdHMSzY NA                 
#> 18 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       aBdHMSzY NA                 
#> 19 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       ABdHMSzY NA                 
#> 20 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       ABdHMSzY NA                 
#> 21 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       abdHMSzY 2009-04-23 13:45:07
#> 22 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       abdHMSzY 2009-04-23 13:45:07
#> 23 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       AbdHMSzY NA                 
#> 24 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       AbdHMSzY NA                 
#> 25 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       BdHMSzY  2009-04-23 13:45:07
#> 26 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       BdHMSzY  2009-04-23 13:45:07
#> 27 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       BdHMSzY  2009-04-23 13:45:07
#> 28 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       BdHMSzY  2009-04-23 13:45:07
#> 29 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       bdHMSzY  2009-04-23 13:45:07
#> 30 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       bdHMSzY  2009-04-23 13:45:07
#> 31 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       bdHMSzY  2009-04-23 13:45:07
#> 32 abbrev   Wed Apr 24 13:45:07 GMT-0500 2019       bdHMSzY  2009-04-23 13:45:07

# Testing what the guessing tool gives back:
test_guess <- 
  testing_df |> 
  mutate(
    guess = 
      map2(datestring, orders, guess_formats)
  ) |> 
  select(-datestring) |> 
  unnest(guess) |> 
  group_by(across(-guess)) |> 
  mutate(count = row_number(),
         .before = guess) |> 
  ungroup()

print(test_guess, n = Inf)
#> # A tibble: 66 × 4
#>    type     orders   count guess                              
#>    <chr>    <chr>    <int> <chr>                              
#>  1 fullname aBdHMSzY     1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  2 fullname aBdHMSzY     2 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  3 fullname ABdHMSzY     1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  4 fullname ABdHMSzY     2 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  5 fullname abdHMSzY     1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  6 fullname abdHMSzY     2 %A %b %d %H:%M:%S GMT%Oz %Y        
#>  7 fullname abdHMSzY     3 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#>  8 fullname abdHMSzY     4 %A %b %d %H:%M:%S GMT%Oz %Y        
#>  9 fullname AbdHMSzY     1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 10 fullname AbdHMSzY     2 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 11 fullname AbdHMSzY     3 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 12 fullname AbdHMSzY     4 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 13 fullname BdHMSzY      1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 14 fullname BdHMSzY      2 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 15 fullname BdHMSzY      3 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 16 fullname BdHMSzY      4 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 17 fullname BdHMSzY      5 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 18 fullname BdHMSzY      6 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 19 fullname BdHMSzY      7 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 20 fullname BdHMSzY      8 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 21 fullname bdHMSzY      1 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 22 fullname bdHMSzY      2 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 23 fullname bdHMSzY      3 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 24 fullname bdHMSzY      4 Wednesday %b %d %H:%M:%S GMT%Oz %Y 
#> 25 fullname bdHMSzY      5 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 26 fullname bdHMSzY      6 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 27 fullname bdHMSzY      7 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 28 fullname bdHMSzY      8 Wednesday %b %d %H:%M:%S GMT%Oz %Y 
#> 29 fullname bdHMSzY      9 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 30 fullname bdHMSzY     10 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 31 fullname bdHMSzY     11 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 32 fullname bdHMSzY     12 Wednesday %b %d %H:%M:%S GMT%Oz %Y 
#> 33 fullname bdHMSzY     13 %A %Ob %d %H:%M:%S GMT%Oz %Y       
#> 34 fullname bdHMSzY     14 %A %b %d %H:%M:%S GMT%Oz %Y        
#> 35 fullname bdHMSzY     15 Wednesday %Ob %d %H:%M:%S GMT%Oz %Y
#> 36 fullname bdHMSzY     16 Wednesday %b %d %H:%M:%S GMT%Oz %Y 
#> 37 abbrev   aBdHMSzY     1 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 38 abbrev   aBdHMSzY     2 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 39 abbrev   abdHMSzY     1 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 40 abbrev   abdHMSzY     2 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 41 abbrev   abdHMSzY     3 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 42 abbrev   abdHMSzY     4 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 43 abbrev   BdHMSzY      1 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 44 abbrev   BdHMSzY      2 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 45 abbrev   BdHMSzY      3 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 46 abbrev   BdHMSzY      4 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 47 abbrev   BdHMSzY      5 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 48 abbrev   BdHMSzY      6 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 49 abbrev   BdHMSzY      7 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 50 abbrev   BdHMSzY      8 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 51 abbrev   bdHMSzY      1 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 52 abbrev   bdHMSzY      2 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 53 abbrev   bdHMSzY      3 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 54 abbrev   bdHMSzY      4 Wed %b %d %H:%M:%S GMT%Oz %Y       
#> 55 abbrev   bdHMSzY      5 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 56 abbrev   bdHMSzY      6 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 57 abbrev   bdHMSzY      7 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 58 abbrev   bdHMSzY      8 Wed %b %d %H:%M:%S GMT%Oz %Y       
#> 59 abbrev   bdHMSzY      9 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 60 abbrev   bdHMSzY     10 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 61 abbrev   bdHMSzY     11 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 62 abbrev   bdHMSzY     12 Wed %b %d %H:%M:%S GMT%Oz %Y       
#> 63 abbrev   bdHMSzY     13 %a %Ob %d %H:%M:%S GMT%Oz %Y       
#> 64 abbrev   bdHMSzY     14 %a %b %d %H:%M:%S GMT%Oz %Y        
#> 65 abbrev   bdHMSzY     15 Wed %Ob %d %H:%M:%S GMT%Oz %Y      
#> 66 abbrev   bdHMSzY     16 Wed %b %d %H:%M:%S GMT%Oz %Y

<sup>Created on 2022-12-15 by the [reprex package](https://reprex.tidyverse.org) (v2.0.1)</sup>
vspinu commented 1 year ago

Thanks for the comprehensive report. Fixed!