Open JB0207 opened 7 months ago
Hi!
tab_last_sig_cpct
works only on percents. When you apply it on the table with counts, as in your case, it gives incorrect results.
As for count of NA:
library(expss)
data("mtcars")
mtcars$am[1:3] = NA
var_lab(mtcars$am) = 'Transmission'
count_na = function(x, weight = NULL){
if(NCOL(x)>1){
# for multiple choice
vec = rowSums(!is.na(x))==0
} else {
vec = is.na(x)
}
w_sum(vec, weight = weight)
}
mtcars %>%
tab_cols(total(), vs) %>%
tab_cells(am) %>%
tab_stat_cpct() %>%
tab_stat_fun(count_na, label = 'NA') %>%
tab_pivot()
Thank you for your reply, clarification on tab_last_sig_cpct
and your suggestion for a solution counting NAs. Unfortunately, the function does not work correctly in combination with subtotal. Example code for error replication:
# set digits
expss_digits(digits = 2)
# set output option
expss_output_viewer()
# for replication
set.seed(123)
test_df <- data.frame(
Y1 = sample(1:3, 100, replace = TRUE),
Q1 = sample(0:1, 100, replace = TRUE),
Q3 = sample(c(1:5, NA), 100, replace = TRUE, prob = c(rep(1, 5), 0.1)),
Q4 = sample(c(1:5, NA), 100, replace = TRUE, prob = c(rep(1, 5), 0.1))
)
var_lab(test_df$Q1) = "Test1"
var_lab(test_df$Q3) = "Test2"
var_lab(test_df$Q4) = "Test3"
test_df %>%
tab_cols(
total(),
Y1
) %>%
tab_cells(
Q1,
subtotal(Q3 %to% Q4, "Bottom 2 Box" = 1:2, "Top 2 Box" = 4:5, position = "bottom")
) %>%
tab_stat_cpct(label="%", total_statistic = "w_cpct", total_label = "#Total cases") %>%
tab_stat_fun(count_na, label = 'NA') %>%
tab_pivot()
But when I specified NA within the subtotal command, it actually worked, but the count of NAs is displayed twice as category 6 (not wanted) and as defined by me with "NA". With hide
I could not hide the unwanted category 6 in the output. Is there any way to do this, or is it possible to avoid the automatic setting of a value label?
Code:
test_df %>%
tab_cols(total(),
Y1) %>%
tab_cells(Q1,
subtotal(Q3 %to% Q4, "Bottom 2 Box" = 1:2, "Top 2 Box" = 4:5, "NA" = NA, position = "bottom")
) %>%
tab_stat_cases(label = "Cases", total_row_position = "below") %>%
tab_stat_cpct(label = "%", total_statistic = "w_cpct", total_label = "#Total cases") %>%
tab_pivot(stat_position = "inside_columns")
Which version of the expss dou you use? hide
works for me:
test_df %>%
tab_cols(total(),
Y1) %>%
tab_cells(Q1,
subtotal(Q3 %to% Q4, "Bottom 2 Box" = 1:2, "Top 2 Box" = 4:5, "NA" = hide(NA), position = "bottom")
) %>%
tab_stat_cases(label = "Cases", total_row_position = "below") %>%
tab_stat_cpct(label = "%", total_statistic = "w_cpct", total_label = "#Total cases") %>%
tab_pivot(stat_position = "inside_columns")
# | | | #Total | | Y1 | | | | | |
# | | | Cases | % | 1 | | 2 | | 3 | |
# | | | | | Cases | % | Cases | % | Cases | % |
# | ----- | ------------ | ------ | --- | ----- | ------ | ----- | ------ | ----- | ------ |
# | Test1 | 0 | 44 | 44 | 16 | 48.48 | 11 | 34.38 | 17 | 48.57 |
# | | 1 | 56 | 56 | 17 | 51.52 | 21 | 65.62 | 18 | 51.43 |
# | | #Total cases | 100 | 100 | 33 | 100.00 | 32 | 100.00 | 35 | 100.00 |
# | Test2 | 1 | 16 | 16 | 5 | 15.15 | 9 | 28.12 | 2 | 5.71 |
# | | 2 | 23 | 23 | 7 | 21.21 | 7 | 21.88 | 9 | 25.71 |
# | | 3 | 20 | 20 | 5 | 15.15 | 7 | 21.88 | 8 | 22.86 |
# | | 4 | 21 | 21 | 7 | 21.21 | 5 | 15.62 | 9 | 25.71 |
# | | 5 | 19 | 19 | 9 | 27.27 | 4 | 12.50 | 6 | 17.14 |
# | | Bottom 2 Box | 39 | 39 | 12 | 36.36 | 16 | 50.00 | 11 | 31.43 |
# | | Top 2 Box | 40 | 40 | 16 | 48.48 | 9 | 28.12 | 15 | 42.86 |
# | | NA | 1 | 1 | | | | | 1 | 2.86 |
# | | #Total cases | 100 | 100 | 33 | 100.00 | 32 | 100.00 | 35 | 100.00 |
# | Test3 | 1 | 25 | 25 | 5 | 15.15 | 11 | 34.38 | 9 | 25.71 |
# | | 2 | 19 | 19 | 6 | 18.18 | 6 | 18.75 | 7 | 20.00 |
# | | 3 | 15 | 15 | 9 | 27.27 | 5 | 15.62 | 1 | 2.86 |
# | | 4 | 19 | 19 | 8 | 24.24 | 5 | 15.62 | 6 | 17.14 |
# | | 5 | 17 | 17 | 4 | 12.12 | 4 | 12.50 | 9 | 25.71 |
# | | Bottom 2 Box | 44 | 44 | 11 | 33.33 | 17 | 53.12 | 16 | 45.71 |
# | | Top 2 Box | 36 | 36 | 12 | 36.36 | 9 | 28.12 | 15 | 42.86 |
# | | NA | 5 | 5 | 1 | 3.03 | 1 | 3.12 | 3 | 8.57 |
# | | #Total cases | 100 | 100 | 33 | 100.00 | 32 | 100.00 | 35 | 100.00 |
Ah, I was using hide
incorrectly. Now it works! Is there a way to remove the NA count from the total number of cases and the percentage column so that for Variable Q3 ("Test2") n=99 is used as the base and for Variable Q4 ("Test3") n=95 is used as the base?
Sorry for delay. Below the code - it is not very nice but works. The idea is to reorder rows according to variable labels.
# set digits
library(expss)
expss_digits(digits = 2)
# set output option
expss_output_default()
count_na = function(x, weight = NULL){
if(NCOL(x)>1){
# for multiple choice
vec = rowSums(!is.na(x))==0
} else {
vec = is.na(x)
}
w_sum(vec, weight = weight)
}
tab_reorder = function(data){
to_split = gsub('^([^|]+).+$', "\\1", data$row_labels)
keep_order = unique(to_split)
res = split(data, to_split)
do.call(add_rows, res[keep_order])
}
# for replication
set.seed(123)
test_df <- data.frame(
Y1 = sample(1:3, 100, replace = TRUE),
Q1 = sample(0:1, 100, replace = TRUE),
Q3 = sample(c(1:5, NA), 100, replace = TRUE, prob = c(rep(1, 5), 0.1)),
Q4 = sample(c(1:5, NA), 100, replace = TRUE, prob = c(rep(1, 5), 0.1))
)
var_lab(test_df$Q1) = "Test1"
var_lab(test_df$Q3) = "Test2"
var_lab(test_df$Q4) = "Test3"
test_df %>%
tab_cols(
total(),
Y1
) %>%
tab_cells(
Q1,
subtotal(Q3 %to% Q4, "Bottom 2 Box" = 1:2, "Top 2 Box" = 4:5, position = "bottom")
) %>%
tab_stat_cpct(label="%", total_statistic = "w_cpct", total_label = "#Total cases") %>%
tab_cells(Q1, Q3 %to% Q4) %>%
tab_stat_fun(count_na, label = 'NA') %>%
tab_pivot() %>%
tab_reorder()
Dear @gdemin,
I have two questions:
tab_last_sig_cpct
isprop.test
. But when comparing the results, I can not reproduce the result oftab_last_sig_cpct
(e.g. red boxes in picture). Am I conducting the significance tests wrong?Results:
My Code:
Many thanks in advance.