xKDR / CRRao.jl

MIT License
34 stars 21 forks source link

Bartels rank test for randomness of residuals for linear regression #99

Open sourish-cmi opened 1 year ago

sourish-cmi commented 1 year ago

The Bartels rank test for randomness can be used to check the randomness assumption of residuals in Regression.

This test should be part of HypothesisTest.jl

We should do PR in HypothesisTest.jl

Here is the R implementtation.

> getAnywhere(bartels.rank.test)
A single object matching ‘bartels.rank.test’ was found
It was found in the following places
  package:randtests
  namespace:randtests
with value

function (x, alternative = "two.sided", pvalue = "normal") 
{
    dname <- deparse(substitute(x))
    x <- na.omit(x)
    stopifnot(is.numeric(x))
    n <- length(x)
    if (alternative == "t") {
        alternative <- "two.sided"
    }
    if (alternative == "l") {
        alternative <- "left.sided"
    }
    if (alternative == "r") {
        alternative <- "right.sided"
    }
    if (alternative != "two.sided" & alternative != "left.sided" & 
        alternative != "right.sided") {
        stop("must give a valid alternative")
    }
    if (n < 3) {
        stop("sample size must be greater than 2")
    }
    rk <- rank(x)
    d <- diff(rk)
    nm <- sum(rk^2)
    d.rank <- nm - n * (mean(rk)^2)
    RVN <- sum(d^2)/d.rank
    mu <- 2
    vr <- (4 * (n - 2) * (5 * n^2 - 2 * n - 9))/(5 * n * (n + 
        1) * (n - 1)^2)
    if (pvalue == "auto") {
        pvalue <- ifelse(n <= 100, "beta", "normal")
        if (n < 10) 
            pvalue <- "exact"
    }
    if (pvalue == "exact") {
        pv <- pbartelsrank(q = c(nm - 1, nm), n)
        pv0 <- pv[2]
        pv1 <- pv[1]
    }
    if (pvalue == "beta") {
        btp <- (5 * n * (n + 1) * (n - 1)^2)/(2 * (n - 2) * (5 * 
            n^2 - 2 * n - 9)) - 1/2
        pv0 <- pbeta(RVN/4, shape1 = btp, shape2 = btp)
        pv1 <- 1 - pv0
    }
    if (pvalue == "normal") 
        pv0 <- pnorm((RVN - mu)/sqrt(vr))
    if (pvalue == "beta" | pvalue == "normal") 
        pv1 <- 1 - pv0
    if (alternative == "two.sided") {
        pv <- 2 * min(pv0, pv1)
        alternative <- "nonrandomness"
    }
    if (alternative == "left.sided") {
        pv <- pv0
        alternative <- "trend"
    }
    if (alternative == "right.sided") {
        pv <- pv1
        alternative <- "systematic oscillation"
    }
    test <- (RVN - mu)/sqrt(vr)
    rval <- list(statistic = c(statistic = test), nm = sum(d^2), 
        rvn = RVN, mu = mu, var = vr, p.value = pv, method = "Bartels Ratio Test", 
        data.name = dname, parameter = c(n = n), n = n, alternative = alternative)
    class(rval) <- "htest"
    return(rval)
}
sourish-cmi commented 1 year ago

pbartelsrank is Probability function for the distribution of the Bartels Rank statistic NM, for a sample of size n.

> getAnywhere(pbartelsrank)
A single object matching ‘pbartelsrank’ was found
It was found in the following places
  package:randtests
  namespace:randtests
with value

function (q, n, lower.tail = TRUE, log.p = FALSE) 
{
    stopifnot(is.numeric(q) & n > 0)
    tmp <- permut(x = 1:n, m = n, FUN = randtests.aux, method = "bartels")[, 
        1]
    yr <- rep(0, length(q))
    for (i in 1:length(q)) {
        yr[i] <- ifelse(lower.tail, sum(tmp <= q[i]), sum(tmp > 
            q[i]))
    }
    r0 <- yr/factorial(n)
    ifelse(log.p, return(log(r0)), return(r0))
}
sourish-cmi commented 1 year ago

dbartelsrank is distribution function for the distribution of the Bartels Rank statistic NM, for a sample of size n.


> getAnywhere(dbartelsrank)
A single object matching ‘dbartelsrank’ was found
It was found in the following places
  package:randtests
  namespace:randtests
with value

function (x, n, log = FALSE) 
{
    stopifnot(is.numeric(x) & n > 0)
    tmp <- permut(x = 1:n, m = n, FUN = randtests.aux, method = "bartels")[, 
        1]
    yr <- rep(0, length(x))
    for (i in 1:length(x)) {
        yr[i] <- sum(tmp == x[i])
    }
    r0 <- yr/factorial(n)
    ifelse(log, return(log(r0)), return(r0))
}

permut function generate all permutations of mm elements of a vector

> getAnywhere(permut)
A single object matching ‘permut’ was found
It was found in the following places
  package:randtests
  namespace:randtests
with value

function (x, m = length(x), FUN = NULL, ...) 
{
    n <- length(x)
    X <- NULL
    if (m == 1) 
        X <- matrix(x, n, 1)
    else if (n == 1) 
        X <- matrix(x, 1, m)
    else if (n == 2 & m == 2) 
        X <- matrix(c(x, x[2:1]), 2, 2)
    else if (n == 4 & m == 4 & is.null(FUN)) {
        idx <- c(1:4, c(1:2, 4:3), c(1, 3, 2, 4), c(1, 3, 4, 
            2), c(1, 4, 2, 3), c(1, 4, 3, 2))
        X <- rbind(X, matrix(x[idx], nrow = 6, ncol = 4, byrow = T))
        X <- rbind(X, matrix((x[c(2, 1, 3:4)])[idx], nrow = 6, 
            ncol = 4, byrow = T))
        X <- rbind(X, matrix((x[c(3, 1, 2, 4)])[idx], nrow = 6, 
            ncol = 4, byrow = T))
        X <- rbind(X, matrix((x[c(4, 1:3)])[idx], nrow = 6, ncol = 4, 
            byrow = T))
    }
    else if (n == 5 & m == 5 & is.null(FUN)) {
        idx <- c(1:5, 1:3, 5, 4, 1, 2, 4, 3, 5, 1, 2, 4, 5, 3, 
            1, 2, 5, 3, 4, 1, 2, 5, 4, 3, 1, 3, 2, 4, 5, 1, 3, 
            2, 5, 4, 1, 3, 4, 2, 5, 1, 3, 4, 5, 2, 1, 3, 5, 2, 
            4, 1, 3, 5, 4, 2, 1, 4, 2, 3, 5, 1, 4, 2, 5, 3, 1, 
            4, 3, 2, 5, 1, 4, 3, 5, 2, 1, 4, 5, 2, 3, 1, 4, 5, 
            3, 2, 1, 5, 2, 3, 4, 1, 5, 2, 4, 3, 1, 5, 3, 2, 4, 
            1, 5, 3, 4, 2, 1, 5, 4, 2, 3, 1, 5, 4, 3, 2)
        X <- rbind(X, matrix(x[idx], nrow = 24, ncol = 5, byrow = T))
        X <- rbind(X, matrix((x[c(2, 1, 3:5)])[idx], nrow = 24, 
            ncol = 5, byrow = T))
        X <- rbind(X, matrix((x[c(3, 1, 2, 4, 5)])[idx], nrow = 24, 
            ncol = 5, byrow = T))
        X <- rbind(X, matrix((x[c(4, 1, 2, 3, 5)])[idx], nrow = 24, 
            ncol = 5, byrow = T))
        X <- rbind(X, matrix((x[c(5, 1, 2, 3, 4)])[idx], nrow = 24, 
            ncol = 5, byrow = T))
    }
    else {
        for (i in 1:n) {
            if (is.null(FUN)) {
                X <- rbind(X, cbind(x[i], Recall(x[-i], m - 1)))
            }
            else {
                y <- apply(cbind(x[i], Recall(x[-i], m - 1)), 
                  1, FUN, ...)
                X <- rbind(X, matrix(y))
            }
        }
    }
    return(X)
}
sourish-cmi commented 1 year ago

References

  1. Bartels, R. (1982). The Rank Version of von Neumann's Ratio Test for Randomness, Journal of the American Statistical Association, 77(377), 40–46.

  2. Gibbons, J.D. and Chakraborti, S. (2003). Nonparametric Statistical Inference, 4th ed. (pp. 97–98). URL: http://books.google.pt/books?id=dPhtioXwI9cC&lpg=PA97&ots=ZGaQCmuEUq

  3. von Neumann, J. (1941). Distribution of the Ratio of the Mean Square Successive Difference to the Variance. The Annals of Mathematical Statistics 12(4), 367–395. doi:10.1214/aoms/1177731677. https://projecteuclid.org/journals/annals-of-mathematical-statistics/volume-12/issue-4/Distribution-of-the-Ratio-of-the-Mean-Square-Successive-Difference/10.1214/aoms/1177731677.full