joshuaulrich / TTR

Technical analysis and other functions to construct technical trading rules with R
GNU General Public License v2.0
326 stars 102 forks source link

run* functions optionally return all NA if nrow(x) < n #68

Open joshuaulrich opened 6 years ago

joshuaulrich commented 6 years ago

Michael Ohlrogge commented on my answer to "Moving variance in R" that it could be useful for the run* functions to return a vector of NA the same length as the input when n is greater than the number of non-NA observations in the input object.

Need to investigate what zoo::rollapply() and friends do in this case.

ethanbsmith commented 5 years ago

I would wholeheartedly support this approach. I think it would improve the consistency of many of these functions.

take a simple example:

> SMA(1:10, n = 3)
 [1] NA NA  2  3  4  5  6  7  8  9

here, I think we all agree that the ramp-up until row >=n properly returns NA. So any code using these results already has to handle the leading NA in the output. returning properly structured and named output that had all NA values would greatly simplify downstream processing. in fact, I generally consider the case where nrow(x) < n to be a degenerate form of ramp-up.

Also, selfishly, it would allow me to get rid of my ever-growing (and very poorly implemented) library of wrappers to handle this, so I'd be willing to help on the project if it gets greenlighted:

ATR <- function(HLC, n = 14, maType, ...) {
    HLC <- as.xts(HLC, error = as.matrix)
    if (n > (z <- NROW(HLC))) {
        r <- matrix(nrow = z, ncol = 4)
        r <- reclass(r, HLC)
        if (!is.null(dim(r))) colnames(r) <- c('tr', 'atr', 'trueHigh', 'trueLow')
    } else {
        r <- TTR::ATR(HLC, n = n, maType, ...)
    }
    return(r)
}

ROC <- function(x, n = 1, type = c("continuous", "discrete"), na.pad = TRUE) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || (n > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
    } else {
        r <- TTR::ROC(x, n = n, type = type, na.pad = na.pad)
    }
    return(r)

}

SMA <- function(x, n = 10, ...) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || (n > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
        if (!is.null(dim(r))) colnames(r) <- "SMA"
    } else {
        r <- TTR::SMA(x, n, ...)
    }
    return(r)
}

EMA <- function(x, n = 10, wilder = FALSE, ratio = NULL, ...) {
    #return NA if not enough values
    if ((n + 1L) > (z <- NROW(x)) || ((n + 1L) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
        if (!is.null(dim(r))) colnames(r) <- "EMA"
    } else {
        r <- TTR::EMA(x, n, wilder, ratio, ...)
    }
    return(r)
}

WMA <- function(x, n = 10, wts = 1:n, ...) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || ((n) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
        if (!is.null(dim(r))) colnames(r) <- "WMA"
    } else {
        r <- TTR::WMA(x, n = n, wts = wts, ...)
    }
    return(r)
}

DEMA <- function(x, n = 10, v = 1, wilder = FALSE, ratio = NULL) {
    #return NA if not enough values
    if ((n * 2L) >= (z <- NROW(x)) || ((n * 2L) >= sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
        if (!is.null(dim(r))) colnames(r) <- "DEMA"
    } else {

        r <- TTR::DEMA(x, n, v, wilder, ratio)
    }
    return(r)
}

runMax <- function(x, n = 10, cumulative = FALSE) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || ((n) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
    } else {
        r <- TTR::runMax(x, n, cumulative)
    }
    return(r)
}

runMin <- function(x, n = 10, cumulative = FALSE) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || ((n) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
    } else {
        r <- TTR::runMin(x, n, cumulative)
    }
    return(r)
}

runMean <- function(x, n = 10, cumulative = FALSE) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || ((n) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
    } else {
        r <- TTR::runMean(x, n, cumulative)
    }
    return(r)
}

runMedian <- function(x, n = 10, non.unique = "mean", cumulative = FALSE) {
    #return NA if not enough values
    if ((n) > (z <- NROW(x)) || ((n) > sum(!is.na(x)))) {
        r <- reclass(rep(NA_real_, z), x)
    } else {
        r <- TTR::runMedian(x, n, non.unique, cumulative)
    }
    return(r)
}

stoch <- function(HLC, nFastK = 50, nFastD = round(nFastK / 5), nSlowD = round(nFastK / 5), maType = 'NWMA', bounded = TRUE, smooth = 1, ...) {
    if (max(nFastK,nFastD) > (z <- NROW(HLC))) {
        r <- matrix(nrow = z, ncol = 3)
        r <- reclass(r, x)
        if (!is.null(dim(r))) colnames(r) <- c('fastK', 'fastD', 'slowD')
    } else {
        r <- TTR::stoch(HLC, nFastK = nFastK, nFastD = nFastD, nSlowD = nSlowD, maType = maType, bounded = TRUE, smooth = 1, ...)
    }
    return(r)
}
joshuaulrich commented 5 years ago

Thanks for the comment! It's good to know that you created wrapper functions to deal with this. That means you encounter it enough for those function to be useful for you, which means it probably affects others similarly.

I also think it's worth considering throwing a warning when the function will return all NA, since it's likely possible the user didn't expect that. Maybe with a global option to suppress the warning? Thoughts?

ethanbsmith commented 5 years ago

Agree on a warning, and that a global option is probably better than per function parameters as I suspect most people would want this functionality to be consistent (i.e. always warn, or never warn)

Also, I think it probably needs a transitional global option to support existing functionality for backward comparability as some people may have existing try/catch blocks that depend on the error

ethanbsmith commented 5 years ago

rethinking this a bit, another option is to add a na.pad=FALSE parameter to the function signature. this would have the following advantages: