schochastics / timeless

A general purpose date(time) parser for R
https://schochastics.github.io/timeless/
Other
19 stars 2 forks source link

Cleaning #13

Closed chainsawriot closed 6 months ago

chainsawriot commented 6 months ago

The current implementation won't break out of the function if that's the happiest path (No NA). And therefore, it does the wasteful parse_datetime and several rounds of POSIX parsing. This seems to work (unit tests won't work anyway).

#' @export
chronos.character <- function(x, formats = NULL, tz = "", out_format = "datetime") {
    out_format <- match.arg(out_format, c("datetime", "date", "character"))
    res <- parse_guess_rs(x)
    idx <- res == "not found"
    if (!any(idx)) {
        return(.return_parsed(res, tz = tz, format = out_format))
    }

    tmp <- parse_datetime(x[idx], formats)
    res[idx] <- tmp
    idx <- is.na(res)
    if (!any(idx)) {
        return(.return_parsed(res, tz = tz, format = out_format))
    }

    if (out_format == "date") {
        tmp <- parse_date(x[idx], formats)
        res[idx] <- tmp
        idx <- is.na(res)
        if (!any(idx)) {
            return(.return_parsed(res, tz = tz, format = out_format))
        }
    }
    tmp <- parse_epoch(x[idx])
    res[idx] <- tmp
    idx <- is.na(res)
    if (!any(idx)) {
        return(.return_parsed(res, tz = tz, format = out_format))
    }

    if (out_format != "date") {
        tmp <- parse_date(x[idx], formats)
        tmp <- paste(tmp, "00:00:00")
        res[idx] <- tmp
    }
    res[is.na(res)] <- NA_character_
    return(.return_parsed(res, tz = tz, format = out_format))
}

And the performance is much better.

library(chronos)

chronos1 <- function(x) {
    chronos(x,out_format = "datetime")
}

chronos2 <- function(x) {
    fasttime::fastPOSIXct(chronos(x,out_format = "character"))
}

chronos3 <- function(x) {
    anytime::anytime(chronos(x))
}

chronos4 <- function(x) {
    anytime::anytime(chronos(x,out_format = "character"))
}

chronos5 <- function(x) {
    as.POSIXct(chronos(x, out_format = "datetime"))
}

bench::mark(
    chronos1(bench_date),
    chronos2(bench_date),
    chronos3(bench_date),
    chronos4(bench_date),
    chronos5(bench_date),
    anytime::anytime(bench_date), check = FALSE
)
#> # A tibble: 6 × 6
#>   expression                        min   median `itr/sec` mem_alloc `gc/sec`
#>   <bch:expr>                   <bch:tm> <bch:tm>     <dbl> <bch:byt>    <dbl>
#> 1 chronos1(bench_date)          309.5µs    323µs    2997.   876.88KB     2.03
#> 2 chronos2(bench_date)          147.8µs    153µs    6406.    15.63KB     2.02
#> 3 chronos3(bench_date)          318.1µs    327µs    2997.     5.55MB     2.02
#> 4 chronos4(bench_date)          324.7µs    334µs    2886.    19.18KB     2.02
#> 5 chronos5(bench_date)          311.8µs    322µs    3032.    12.51KB     2.01
#> 6 anytime::anytime(bench_date)   19.9ms     20ms      50.0    3.27KB     0

Created on 2024-02-28 with reprex v2.1.0

schochastics commented 6 months ago

nice just in time for the first cran release. Thanks, this did indeed speedup the "happy path"