spsanderson / healthyR.ts

A time-series companion package to healthyR
https://www.spsanderson.com/healthyR.ts/
Other
19 stars 3 forks source link

Update `auto_stationarize()` #483

Closed spsanderson closed 1 year ago

spsanderson commented 1 year ago

Change the ndiffs section of the auto_stationarize function to the following:

auto_stationarizev2 <- function(.time_series) {

  # Variables
  time_series <- .time_series
  freq <- frequency(.time_series)
  min_x <- min(.time_series)

  # Check if the time series is already stationary
  if (ts_adf_test(time_series)$p_value < 0.05) {
    cat("The time series is already stationary via ts_adf_test().\n")
    return(time_series)
  } else {
    cat("The time series is not stationary. Attempting to make it stationary...\n")
  }

  # Transformation (e.g., logarithmic)
  if (min_x > 0){
    if (ts_adf_test(log(time_series))$p_value < 0.05) {
      cat("Logarithmic transformation made the time series stationary.\n")
      stationary_ts <- log(time_series)
      return(
        list(
          stationary_ts = stationary_ts,
          ndiffs = 0,
          adf_stats = ts_adf_test(stationary_ts)
        )
      )
    }
  } else {
    cat("The minimum value of the .time_series is", min_x, "skipping logrithmic transform.\n")
  }

  # Single Differencing
  diff_order <- 1
  while ((ts_adf_test(diff(time_series, diff_order))$p_value >= 0.05) & (diff_order <= freq)){
      diff_order <- diff_order + 1
  }

  if (diff_order <= freq){
    cat("Differencing of order", diff_order, "made the time series stationary.\n")
    stationary_ts = diff(time_series, diff_order)
    return(
      list(
        stationary_ts = stationary_ts,
        ndiffs = diff_order,
        adf_stats = ts_adf_test(stationary_ts)
      )
    )
  } else {
    cat("Data requires more single differencing than it's frequency, trying double differencing.\n")
  }

  # Double Differencing
  diff_order <- 1
  while ((ts_adf_test(diff(diff(time_series, diff_order)))$p_value >= 0.05) & (diff_order <= freq)){
    diff_order <- diff_order + 1
  }

  if (diff_order <= freq){
    cat("Double Differencing with and inner Difference order", diff_order, "made the time series stationary.\n")
    stationary_ts = diff(diff(time_series, diff_order))
    return(
      list(
        stationary_ts = stationary_ts,
        ndiffs = diff_order,
        adf_stats = ts_adf_test(stationary_ts)
      )
    )
  } else {
    cat("Data requires more differencing than it's frequency, trying Diff Log.\n")
  }

  # Diff of Log
  diff_order <- 1
  while ((min_x > 0) & (ts_adf_test(diff(log(time_series), diff_order))$p_value >= 0.05 & (diff_order <= freq))){
    diff_order <- diff_order + 1
  }

  if (diff_order <= freq){
    cat("Differencing of log with order", diff_order, "made the time series stationary.\n")
    stationary_ts = diff(log(time_series), diff_order)
    return(
      list(
        stationary_ts = stationary_ts,
        ndiffs = diff_order,
        adf_stats = ts_adf_test(stationary_ts)
      )
    )
  } else {
    cat("Data requires more differencing than it's frequency. Tyring Double Difference Log.\n")
  }

  # Double Diff Log
  diff_order <- 1
  while ((min_x) &(ts_adf_test(diff(diff(log(time_series), diff_order)))$p_value >= 0.05) & 
         (diff_order <= freq)){
    diff_order <- diff_order + 1
  }

  if (diff_order <= freq){
    cat("Double Difference Log with an inner Difference order", diff_order, "made the time series stationary.\n")
    stationary_ts = diff(diff(log(time_series), diff_order))
    return(
      list(
        stationary_ts = stationary_ts,
        ndiffs = diff_order,
        adf_stats = ts_adf_test(stationary_ts)
      )
    )
  } else {
    cat("Could not automatically stationarize data.")
    return(NULL)
  }
}

Example:

> auto_stationarize(BJsales)
The time series is not stationary. Attempting to make it stationary...
Differencing of order 1 made the time series stationary.
$stationary_ts
Time Series:
Start = 2 
End = 150 
Frequency = 1 
  [1] -0.6 -0.1 -0.5  0.1  1.2 -1.6  1.4  0.3  0.9  0.4 -0.1  0.0  2.0  1.4  2.2  3.4  0.0 -0.7
 [19] -1.0  0.7  3.7  0.5  1.4  3.6  1.1  0.7  3.3 -1.0  1.0 -2.1  0.6 -1.5 -1.4  0.7  0.5 -1.7
 [37] -1.1 -0.1 -2.7  0.3  0.6  0.8  0.0  1.0  1.0  4.2  2.0 -2.7 -1.5 -0.7 -1.3 -1.7 -1.1 -0.1
 [55] -1.7 -1.8  1.6  0.7 -1.0 -1.5 -0.7  1.7 -0.2  0.4 -1.8  0.8  0.7 -2.0 -0.3 -0.6  1.3 -1.4
 [73] -0.3 -0.9  0.0  0.0  1.8  1.3  0.9 -0.3  2.3  0.5  2.2  1.3  1.9  1.5  4.5  1.7  4.8  2.5
 [91]  1.4  3.5  3.2  1.5  0.7  0.3  1.4 -0.1  0.2  1.6 -0.4  0.9  0.6  1.0 -2.5 -1.4  1.2  1.6
[109]  0.3  2.3  0.7  1.3  1.2 -0.2  1.4  3.0 -0.4  1.3 -0.9  1.2 -0.8 -1.0 -0.8 -0.1 -1.5  0.3
[127]  0.2 -0.5 -0.1  0.3  1.3 -1.1 -0.1 -0.5  0.3 -0.7  0.7 -0.5  0.6 -0.3  0.2  2.1  1.5  1.8
[145]  0.4 -0.5 -1.0  0.4  0.5

$ndiffs
[1] 1

> ts_adf_test(diff(BJsales))
$test_stat
[1] -3.348524

$p_value
[1] 0.065846

_We See It Actually Fails the ts_adftest()

New Examples

> auto_stationarizev2(BJsales)
The time series is not stationary. Attempting to make it stationary...
Data requires more single differencing than it's frequency, trying double differencing.
Double Differencing with and inner Difference order 1 made the time series stationary.
$stationary_ts
Time Series:
Start = 3 
End = 150 
Frequency = 1 
  [1]  0.5 -0.4  0.6  1.1 -2.8  3.0 -1.1  0.6 -0.5 -0.5  0.1  2.0 -0.6  0.8  1.2 -3.4 -0.7 -0.3
 [19]  1.7  3.0 -3.2  0.9  2.2 -2.5 -0.4  2.6 -4.3  2.0 -3.1  2.7 -2.1  0.1  2.1 -0.2 -2.2  0.6
 [37]  1.0 -2.6  3.0  0.3  0.2 -0.8  1.0  0.0  3.2 -2.2 -4.7  1.2  0.8 -0.6 -0.4  0.6  1.0 -1.6
 [55] -0.1  3.4 -0.9 -1.7 -0.5  0.8  2.4 -1.9  0.6 -2.2  2.6 -0.1 -2.7  1.7 -0.3  1.9 -2.7  1.1
 [73] -0.6  0.9  0.0  1.8 -0.5 -0.4 -1.2  2.6 -1.8  1.7 -0.9  0.6 -0.4  3.0 -2.8  3.1 -2.3 -1.1
 [91]  2.1 -0.3 -1.7 -0.8 -0.4  1.1 -1.5  0.3  1.4 -2.0  1.3 -0.3  0.4 -3.5  1.1  2.6  0.4 -1.3
[109]  2.0 -1.6  0.6 -0.1 -1.4  1.6  1.6 -3.4  1.7 -2.2  2.1 -2.0 -0.2  0.2  0.7 -1.4  1.8 -0.1
[127] -0.7  0.4  0.4  1.0 -2.4  1.0 -0.4  0.8 -1.0  1.4 -1.2  1.1 -0.9  0.5  1.9 -0.6  0.3 -1.4
[145] -0.9 -0.5  1.4  0.1

$ndiffs
[1] 1

$adf_stats
$adf_stats$test_stat
[1] -6.562008

$adf_stats$p_value
[1] 0.01

> auto_stationarizev2(cumsum(rnorm(150)))
The time series is not stationary. Attempting to make it stationary...
The minimum value of the .time_series is -9.864393 skipping logrithmic transform.
Differencing of order 1 made the time series stationary.
$stationary_ts
  [1] -0.758345417 -1.035892884  0.948159303  0.914158734 -1.298731995  0.424378795 -1.112545320
  [8] -1.051073226  0.525412448 -0.686024000  0.993479982  0.038523599  0.536148976 -0.523626698
 [15] -1.151221335  0.914752241  0.238071492 -0.239067759  0.069235327  1.325908343 -0.698166635
 [22] -0.749408444 -0.619615053 -1.584991268  0.819628138  0.192369647  0.207171974 -0.043347354
 [29] -0.510160441 -0.823418614  0.851856403 -1.426184673  0.440298942 -0.792611651  0.282310215
 [36] -0.740690522 -0.523341683  1.769365917  0.668282619 -2.144897024  0.126412416 -0.451812936
 [43] -1.136626188  0.209785890  0.129965516 -0.328506573  1.972703567 -2.248690067  0.838219387
 [50] -0.289023270  0.656513411 -0.453997701 -0.593864562 -1.710379666 -0.209448428  2.478745801
 [57]  0.989702208  1.675572156  0.914965318  1.144262708  0.902876414  0.475392432 -0.582528774
 [64] -0.532934737 -1.600839996 -0.005817714  0.899355676  1.031922557  0.095132704 -0.547627617
 [71]  3.290517443  0.736685531  1.420575305 -0.337680641 -0.037957627  0.448607098  1.676522312
 [78] -0.311474545  0.853615667 -2.094814634 -0.507254434 -1.292009077  1.113362717 -0.164453088
 [85] -0.390374082  1.369099846  1.116272858 -0.898021203  0.427866488 -1.228444569 -0.475615024
 [92]  1.616577637  1.450127951  1.109018755 -0.570903886 -1.881431470 -1.175698184  0.952556525
 [99] -0.290567886 -2.162608146 -0.180187488  1.410239221  0.643468641 -0.821258544 -1.545916652
[106] -0.826547226  0.034527671  0.888073701 -1.939940155  1.023201755  0.005457727  0.569778970
[113] -1.653255563 -0.666654380 -0.448234189  1.043891348  1.028174047  0.435090459  1.604212182
[120] -0.515411200  1.012537194 -0.035940030 -0.667342096  0.923380038  1.381100331  0.878250416
[127] -0.509403455 -0.469787634  1.377675847  0.352826406  0.829573979 -0.338701984  1.261034936
[134] -0.808755145  0.625351521 -0.817174966 -2.462575017 -1.342957511  0.136295199  0.882922750
[141] -1.751302083 -1.251424469  1.764545997 -0.433899350  0.505700132 -0.526935321 -0.298582885
[148]  0.087244207  0.010961843

$ndiffs
[1] 1

$adf_stats
$adf_stats$test_stat
[1] -4.794686

$adf_stats$p_value
[1] 0.01

Plots:

set.seed(123)
plot.ts(cumsum(rnorm(150)))
plot.ts(auto_stationarizev2(.time_series = cumsum(rnorm(150)))$stationary_ts)

image

image

Edit function to use internal helpers to break the different types out to make things a little easier to read.

spsanderson commented 1 year ago

Function Update (want to keep original)

auto_stationarizev2 <- function(.time_series) {

  # Variables
  time_series <- .time_series
  freq <- frequency(.time_series)
  min_x <- min(.time_series)

  # Check if the time series is already stationary
  if (ts_adf_test(time_series)$p_value < 0.05) {
    cat("The time series is already stationary via ts_adf_test().\n")
    return(time_series)
  } else {
    cat("The time series is not stationary. Attempting to make it stationary...\n")
  }

  # Transformation (e.g., logarithmic)
  ret <- util_log_ts(time_series)
  if(ret$ret == TRUE){return(ret)}

  # Single Differencing
  ret <- util_singlediff_ts(time_series)
  if (ret$ret == TRUE){return(ret)}

  # Double Differencing
  ret <- util_doublediff_ts(time_series)
  if (ret$ret == TRUE){return(ret)}

  # Diff of Log
  ret <- util_difflog_ts(time_series)
  if (ret$ret == TRUE){return(ret)}

  # Double Diff Log
  ret <- util_doubledifflog_ts(time_series)
  if (ret$ret == TRUE){return(ret)}

}

Examples:

> auto_stationarizev2(AirPassengers)
The time series is already stationary via ts_adf_test().
     Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
1949 112 118 132 129 121 135 148 148 136 119 104 118
1950 115 126 141 135 125 149 170 170 158 133 114 140
1951 145 150 178 163 172 178 199 199 184 162 146 166
1952 171 180 193 181 183 218 230 242 209 191 172 194
1953 196 196 236 235 229 243 264 272 237 211 180 201
1954 204 188 235 227 234 264 302 293 259 229 203 229
1955 242 233 267 269 270 315 364 347 312 274 237 278
1956 284 277 317 313 318 374 413 405 355 306 271 306
1957 315 301 356 348 355 422 465 467 404 347 305 336
1958 340 318 362 348 363 435 491 505 404 359 310 337
1959 360 342 406 396 420 472 548 559 463 407 362 405
1960 417 391 419 461 472 535 622 606 508 461 390 432
> auto_stationarizev2(BJsales)
The time series is not stationary. Attempting to make it stationary...
Logrithmic Transformation Failed.
Data requires more single differencing than it's frequency, trying double differencing
Double Differencing of order 1 made the time series stationary
$stationary_ts
Time Series:
Start = 3 
End = 150 
Frequency = 1 
  [1]  0.5 -0.4  0.6  1.1 -2.8  3.0 -1.1  0.6 -0.5 -0.5  0.1  2.0 -0.6  0.8  1.2 -3.4 -0.7 -0.3
 [19]  1.7  3.0 -3.2  0.9  2.2 -2.5 -0.4  2.6 -4.3  2.0 -3.1  2.7 -2.1  0.1  2.1 -0.2 -2.2  0.6
 [37]  1.0 -2.6  3.0  0.3  0.2 -0.8  1.0  0.0  3.2 -2.2 -4.7  1.2  0.8 -0.6 -0.4  0.6  1.0 -1.6
 [55] -0.1  3.4 -0.9 -1.7 -0.5  0.8  2.4 -1.9  0.6 -2.2  2.6 -0.1 -2.7  1.7 -0.3  1.9 -2.7  1.1
 [73] -0.6  0.9  0.0  1.8 -0.5 -0.4 -1.2  2.6 -1.8  1.7 -0.9  0.6 -0.4  3.0 -2.8  3.1 -2.3 -1.1
 [91]  2.1 -0.3 -1.7 -0.8 -0.4  1.1 -1.5  0.3  1.4 -2.0  1.3 -0.3  0.4 -3.5  1.1  2.6  0.4 -1.3
[109]  2.0 -1.6  0.6 -0.1 -1.4  1.6  1.6 -3.4  1.7 -2.2  2.1 -2.0 -0.2  0.2  0.7 -1.4  1.8 -0.1
[127] -0.7  0.4  0.4  1.0 -2.4  1.0 -0.4  0.8 -1.0  1.4 -1.2  1.1 -0.9  0.5  1.9 -0.6  0.3 -1.4
[145] -0.9 -0.5  1.4  0.1

$ndiffs
[1] 1

$adf_stats
$adf_stats$test_stat
[1] -6.562008

$adf_stats$p_value
[1] 0.01

$trans_type
[1] "double_diff"

$ret
[1] TRUE

> auto_stationarizev2(BJsales.lead)
The time series is not stationary. Attempting to make it stationary...
Logrithmic Transformation Failed.
Differencing of order 1 made the time series stationary
$stationary_ts
Time Series:
Start = 2 
End = 150 
Frequency = 1 
  [1]  0.06  0.25 -0.57  0.58 -0.20  0.23 -0.04 -0.19  0.03  0.42  0.04  0.24  0.34 -0.46 -0.18
 [16] -0.08  0.29  0.56 -0.37  0.20  0.54 -0.31  0.03  0.52 -0.70  0.35 -0.63  0.44 -0.38 -0.01
 [31]  0.22  0.10 -0.50  0.01  0.30 -0.76  0.52  0.15  0.06 -0.10  0.21 -0.01  0.70 -0.22 -0.76
 [46]  0.06  0.02 -0.17 -0.08  0.01  0.11 -0.39  0.01  0.50 -0.02 -0.37 -0.13  0.05  0.54 -0.46
 [61]  0.25 -0.52  0.44  0.02 -0.47  0.11  0.06  0.25 -0.35  0.00 -0.06  0.21 -0.09  0.36  0.09
 [76] -0.04 -0.20  0.44 -0.23  0.40 -0.01  0.17  0.08  0.58 -0.27  0.79 -0.21  0.02  0.30  0.28
 [91] -0.27 -0.01  0.03  0.16 -0.28  0.15  0.26 -0.36  0.32 -0.11  0.22 -0.65  0.00  0.47  0.16
[106] -0.19  0.48 -0.26  0.21  0.00 -0.20  0.35  0.38 -0.48  0.20 -0.32  0.43 -0.50  0.12 -0.17
[121]  0.15 -0.36  0.35 -0.03 -0.18  0.16  0.07  0.21 -0.50  0.23 -0.13  0.14 -0.15  0.19 -0.24
[136]  0.26 -0.22  0.17  0.37 -0.06  0.29 -0.34 -0.12 -0.16  0.25  0.08 -0.07  0.26 -0.37

$ndiffs
[1] 1

$adf_stats
$adf_stats$test_stat
[1] -4.838625

$adf_stats$p_value
[1] 0.01

$trans_type
[1] "diff"

$ret
[1] TRUE

BJsales image

BJsales stationarized image

BJsales.lead image

BJsales.lead stationarized image