benjaminrich / table1

79 stars 26 forks source link

Creating multiple extra.cols with extra.col #125

Open CKuhneHMRI opened 5 months ago

CKuhneHMRI commented 5 months ago

Hi,

I am currently using a function to create an extra column of p-values, however, I would love to extend this to creating two extra columns, the second for Bayesian analyses.

Current: extra.col = list(P-value = pvalue_3plus_diagnostics)

Ideal: extra.col = list((P-value = pvalue , BF10 = bf_func )

Is this possible?

pvalue <- function(x, ...) {

Construct vectors of data y, and groups (strata) g

y <- unlist(x) g <- factor(rep(1:length(x), times = sapply(x, length)))

tests_used_list <- list() test_used <- "" p_value_formatted <- ""

if (is.numeric(y)) {

For numeric variables, perform a Kruskal-Wallis test

p <- kruskal.test(y ~ g)$p.value
test_used <- "Kruskal"

p_value_formatted <- format_p_value(p)
p_value_formatted <- paste0(p_value_formatted,{supsc('1')})

} else {

For categorical variables

# Construct contingency table
contingency_table <- table(y, g)

# Calculate expected frequencies
expected <- chisq.test(contingency_table, simulate.p.value = TRUE)$expected

# Calculate the percentage of cells with expected frequencies < 5
percent_cells_below_5 <- sum(expected < 5) / length(expected)

# If more than 20% of cells have expected frequencies < 5, use Fisher's exact test
if (percent_cells_below_5 > 0.20 || min(expected) < 1) {
  p <- fisher.test(contingency_table, workspace = 2e8)$p.value
  test_used <- "Fishers"

  p_value_formatted <- format_p_value(p)
  p_value_formatted <- paste0(p_value_formatted,{supsc('2')})

} else {
  p <- chisq.test(contingency_table)$p.value
  test_used <- "Chi"

  p_value_formatted <- format_p_value(p)
  p_value_formatted <- paste0(p_value_formatted,{supsc('3')})

}

}

if (!exists("tests_used_list", envir = .GlobalEnv)) { .GlobalEnv$tests_used_list <- list() } .GlobalEnv$tests_used_list <- c(.GlobalEnv$tests_used_list, test_used)

Return the formatted p-value

The initial empty string places the output on the line below the variable label.

c("", sub("<", "<", p_value_formatted)) }

bf_func <- function(x, ...) {

Construct vectors of data y, and groups (strata) g

y <- unlist(x) g <- factor(rep(1:length(x), times = sapply(x, length)))

tests_used_list_bf <- list() test_used <- "" bf <- ""

if (is.numeric(y)) {

tmp <- data.frame(y = y,g= g)
tmp <- tmp %>% filter(!is.na(y))

test <- anovaBF(y ~ g, data = tmp, whichRandom = NULL)
test_result <- extractBF(test)[[1]]
test_used <- "Bayesian ANOVA"

bf <- BF_arrange(BF=test_result)

} else {

For categorical variables

# Construct contingency table
contingency_table <- table(y, g)

  test <- contingencyTableBF(contingency_table, sampleType = "poisson")
  test_result <- extractBF(test)[[1]]
  test_used <- "Bayesian Contingency"
  bf <- BF_arrange(BF = test_result)

}

if (!exists("tests_used_list_bf", envir = .GlobalEnv)) { .GlobalEnv$tests_used_list_bf <- list() } .GlobalEnv$tests_used_list_bf <- c(.GlobalEnv$tests_used_list_bf, test_used)

Return the formatted bf

The initial empty string places the output on the line below the variable label.

c("", sub("<", "<", bf)) }

Function to interpret and print out BF

BF_arrange <- function(BF) { interpretation = bfactor_interpret(BF) if(interpretation == "Negative") { Null_BF = 1/BF Null_BF = round(Null_BF,2) return(paste0("1/", Null_BF)) } else { return(as.character(round(BF, 2))) }

}

Thanks!

benjaminrich commented 4 months ago

It should work. Did you try it?