Open MichaelChirico opened 2 months ago
This is an opportunity to extend the functionality of seq.Date()
, which is a pure R function.
Working on this with @shannonpileggi
Here are some examples and expected outputs
to = as.Date("2024-08-15")
# by various days
seq(to = to, by = 'day', length.out = 3)
# [1] "2024-08-13" "2024-08-14" "2024-08-15"
seq(to = to, by = '2 days', length.out = 3)
# [1] "2024-08-11" "2024-08-13" "2024-08-15"
seq(to = to, by = 3, length.out = 3)
# [1] "2024-08-09" "2024-08-12" "2024-08-15"
# backwards
seq(to = to, by = "-4 days", length.out=3)
# [1] "2024-08-23" "2024-08-19" "2024-08-15"
# non-integer 'by'
seq(to = to, by = 1.5, length.out = 3)
# [1] "2024-08-12" "2024-08-13" "2024-08-15"
# other by=STRING
seq(to = to, by = "2 weeks", length.out = 3)
[1] "2024-07-18" "2024-08-01" "2024-08-15"
# NB: month,quarter,year are "irregular" in terms of the underlying count of days
seq(to = to, by = "month", length.out = 3)
[1] "2024-06-15" "2024-07-15" "2024-08-15"
seq(to = to, by = "quarter", length.out = 3)
[1] "2024-02-15" "2024-05-15" "2024-08-15"
seq(to = to, by = "year", length.out = 3)
[1] "2022-08-15" "2023-08-15" "2024-08-15"
# by=difftime()
seq(to = to, by = as.difftime(1, units='days'), length.out = 3)
[1] "2024-08-13" "2024-08-14" "2024-08-15"
seq(to = to, by = as.difftime(-1, units='weeks'), length.out = 3)
[1] "2024-08-29" "2024-08-22" "2024-08-15"
One tangential note:
almost surely, this code should be replaced by dispatching:
if (inherits(by, "difftime")) {
by <- switch(attr(by,"units"), secs = 1/86400, mins = 1/1440,
hours = 1/24, days = 1, weeks = 7) * unclass(by)
}
Can be replaced by:
if (inherits(by, "difftime")) by <- as.double(by, units="days")
This is shorter & doesn't hardcode the logic for this conversion in a second place.
For the record, the logic is handled by units<-.difftime
:
One branch in seq.Date()
covers an old error on this test case:
seq(as.Date("2011-01-07"), as.Date("2011-03-01"), by = "month")
# [1] "2011-01-07" "2011-02-07"
I think this error only applies when both from=
and to=
are present; we may just need to edit the branching logic there to correspond.
Note that this matches the similar seq.default()
behavior:
seq(1, 3, by=5)
# [1] 1
Current status:
The logic for the cases of by %in% c("months", "quarters", "years")
(which results in non-arithmetic sequences of days) overlaps a lot with the corresponding cases for seq.POSIXt()
(the difference being the former denominates entries in days, the latter in seconds):
Of course, there's no reason to support inferring from=
in seq.Date()
but not in seq.POSIXt()
, so ideally we can also handle the POSIXt
case in the same patch.
Given the substantial overlap of logic of the two functions, we discussed some options with @lawremi:
Dispatch to seq.POSIXt
from seq.Date
, and only maintain the logic in seq.POSIXt
. The downside here is there's some overhead to duplicating the argument validation that was done in seq.Date()
, again in seq.POSIXt()
.
We'll go ahead with the last option, and evaluate later if the overhead is noticeable / should be reduced with some smarter approach (e.g., we can check if by
requires re-dispatch higher up in seq.Date
).
not sure if this helps!
seq.POSIXt <-
function(from, to, by, length.out = NULL, along.with = NULL, ...)
{
if (!missing(along.with)) {
length.out <- length(along.with)
} else if (!is.null(length.out)) {
if (length(length.out) != 1L) stop("'length.out' must be of length 1")
length.out <- ceiling(length.out)
}
if (!missing(by) & length(by) != 1L) stop(gettextf("'%s' must be of length 1", "by"), domain=NA)
status <- which(c(missing(to), missing(from), missing(by), is.null(length.out)))
if(length(status) != 1L)
stop("exactly three of 'to', 'from', 'by' and 'length.out' / 'along.with' must be specified")
if (!missing(to)){
if (!inherits(to, "POSIXt")) stop(gettextf("'%s' must be a \"POSIXt\" object", "to"), domain=NA)
if (length(as.Date(to)) != 1L) stop(gettextf("'%s' must be of length 1", "to"), domain=NA)
cto <- as.POSIXct(to)
tz <- attr(cto , "tzone")
}
if (!missing(from)){
if (!inherits(from, "POSIXt")) stop(gettextf("'%s' must be a \"POSIXt\" object", "from"), domain=NA)
if (length(as.Date(from)) != 1L) stop(gettextf("'%s' must be of length 1", "from"), domain=NA)
cfrom <- as.POSIXct(from)
tz <- attr(cfrom , "tzone")
}
valid <- 0L
if (inherits(by, "difftime")) {
by <- switch(attr(by,"units"), secs = 1, mins = 60, hours = 3600,
days = 86400, weeks = 7*86400) * unclass(by)
} else if(is.character(by)) {
by2 <- strsplit(by, " ", fixed = TRUE)[[1L]]
if(length(by2) > 2L || length(by2) < 1L)
stop("invalid 'by' string")
valid <- pmatch(by2[length(by2)],
c("secs", "mins", "hours", "days", "weeks",
"months", "years", "DSTdays", "quarters"))
if(is.na(valid)) stop("invalid string for 'by'")
if(valid <= 5L) {
by <- c(1, 60, 3600, 86400, 7*86400)[valid]
if (length(by2) == 2L) by <- by * as.integer(by2[1L])
} else
by <- if(length(by2) == 2L) as.integer(by2[1L]) else 1
} else if(!is.numeric(by)) stop("invalid mode for 'by'")
if(is.na(by)) stop("'by' is NA")
# if one of secs, mins, hours, days, or weeks
if(valid <= 5L) { # days or weeks
res <- switch(status,
seq.int(from = unclass(from), by = by, length.out = length.out), # missing(to)
seq.int(to = unclass(to), by = by, length.out = length.out), # missing(from)
seq.int(from = unclass(from), to = unclass(to), length.out = length.out), # missing(by)
seq.int(from = unclass(from), to = unclass(to), by = by) # is.null(length.out)
)
res <- .POSIXct(as.numeric(res), tz)
}
if(valid <= 5L) { # secs, mins, hours, days, weeks
from <- unclass(as.POSIXct(from))
if(!is.null(length.out))
res <- seq.int(from, by = by, length.out = length.out)
else {
to0 <- unclass(as.POSIXct(to))
## defeat test in seq.default
res <- seq.int(0, to0 - from, by) + from
}
return(.POSIXct(res, tz))
} else { # months or years or DSTdays or quarters
r1 <- as.POSIXlt(from)
if(valid == 7L) { # years
if(missing(to)) { # years
yr <- seq.int(r1$year, by = by, length.out = length.out)
} else {
to <- as.POSIXlt(to)
yr <- seq.int(r1$year, to$year, by)
}
r1$year <- yr
} else if(valid %in% c(6L, 9L)) { # months or quarters
if (valid == 9L) by <- by * 3
if(missing(to)) {
mon <- seq.int(r1$mon, by = by, length.out = length.out)
} else {
to0 <- as.POSIXlt(to)
mon <- seq.int(r1$mon, 12*(to0$year - r1$year) + to0$mon, by)
}
r1$mon <- mon
} else if(valid == 8L) { # DSTdays
if(!missing(to)) {
## We might have a short day, so need to over-estimate.
length.out <- 2L + floor((unclass(as.POSIXct(to)) -
unclass(as.POSIXct(from)))/(by * 86400))
}
r1$mday <- seq.int(r1$mday, by = by, length.out = length.out)
}
r1$isdst <- -1L
res <- as.POSIXct(r1)
## now shorten if necessary.
if(!missing(to)) {
to <- as.POSIXct(to)
res <- if(by > 0) res[res <= to] else res[res >= to]
}
res
}
}
Work on this issue has also exposed another (very minor) bug(ish): seq.POSIXt()
can return an object of type integer in some cases (it's usually, and should be, double):
Linking the preliminary patch on r-svn:
https://github.com/r-devel/r-svn/pull/177
I have some tests (added to tests/datetime3.R), but feel like I came up short on other possibilities to check for regression. @shannonpileggi any suggestions for new cases to check?
To bolster some more confidence in the patch, I found some 33 CRAN packages relying on seq()
methods for base time objects & ran their R CMD check
under the patched r-devel with this script:
https://gist.github.com/MichaelChirico/aedcc59a07d49800bcce3be71400cee1
Seems this could be ready to post back on Bugzilla?
Yep, I was waiting a few days to let things simmer. Posted now: https://bugs.r-project.org/show_bug.cgi?id=17672
Bug 17672 - seq.Date should accept to,by,length.out (i.e., no need for from
The idea is for this to "just work":
seq.default()
is perfectly capable of doing so:The workaround is far less readable:
And requires us to do a bunch of arithmetic to figure out
from
in the generalSys.Date(to = t, length.out=n, by='k days')
.