Closed mattansb closed 3 years ago
I just realized I haven't followed tidyverse development for quite a while, so I must first translate group_by(across(all_of(c(collapse_re, "x"))))
into base R or so, to better understand what this code does ;-)
Here with base R:
library(ggeffects)
library(lme4)
#> Loading required package: Matrix
library(patchwork)
library(ggplot2)
set.seed(1)
data("stroop", package = "afex")
stroop1 <- subset(stroop, study == 1 & acc == 1)
stroop1 <- stroop1[sample(nrow(stroop1), 1000), ]
stroop1$pno <- factor(stroop1$pno)
m <- lmer(rt ~ condition + congruency +
(congruency | pno) + ( 1 | trialnum),
data = stroop1)
terms <- "congruency"
collapse_re <- "pno"
gge <- ggeffect(m, terms)
# Make the data
rt1 <- aggregate(stroop1[,"rt"], stroop1[c(collapse_re, terms)], mean)
# comapre plots
p1 <- plot(gge, add.data = TRUE)
p2 <- plot(gge) +
geom_point(aes(as.numeric(congruency), x), data = rt1,
alpha = 0.2, color = "red",
position = position_jitter(0.1))
p1 + p2 & coord_cartesian(ylim = c(0, 1.6))
# Make the data
rt2 <- residualize_over_grid(gge, m)
rt2 <- aggregate(rt2[,"predicted"], stroop1[c(collapse_re, terms)], mean)
# comapre plots
p3 <- plot(gge, residuals = TRUE)
p4 <- plot(gge) +
geom_point(aes(as.numeric(factor(congruency)), x), data = rt2,
alpha = 0.2,
position = position_jitter(0.1))
p3 + p4 & coord_cartesian(ylim = c(0, 1.6))
Created on 2021-01-19 by the reprex package (v0.3.0)
(that can only accept one if there are more than 1 in the model).
What do you exactly mean here? I was just re-visiting this issue, thinking about how to implement it.
In the example above we have two random grouping variable: pno
and trialnum
. I think in most cases people are only interested in collapsing, visually, across one random grouping variable, no? Like, I want to see how subjects vary, not how subjects and trials vary...
@strengejacke Anything I can do to help make this a reality?
Yes, make days having more than 24 hours ;-)
Does this help?
collaps_re_data <- function(grid, model, collaps_re = NULL, residuals = FALSE) {
data <- insight::get_data(model)
if (is.null(collaps_re)) {
collaps_re <- insight::find_random(model, flatten = TRUE)
}
if (length(collaps_re) > 1) {
collaps_re <- collaps_re[1]
warning("More than one random grouping variable found.",
"\nUsing `", collaps_re, "`.", call. = FALSE)
}
data <- insight::get_data(m)
if (!collaps_re %in% colnames(data)) {
stop("Could not find `", collaps_re, "` column.", call. = FALSE)
}
if (residuals) {
rawdata <- residualize_over_grid(grid, model, protect_names = TRUE)
y_name <- "predicted"
} else {
rawdata <- attr(grid, "rawdata", exact = TRUE)
y_name <- "response"
if (any(sapply(rawdata[-(1:2)], Negate(is.factor))) ||
attr(grid, "x.is.factor", exact = TRUE) == "0") {
warning("Collapsing usually not informative across a continious variable.",
call. = FALSE)
}
}
rawdata$random <- factor(data[[collaps_re]])
agg_data <- aggregate(rawdata[[y_name]],
by = rawdata[colnames(rawdata) != y_name],
FUN = mean)
colnames(agg_data)[ncol(agg_data)] <- y_name
return(agg_data)
}
Okay, that was the actual function.
Here are some examples:
# Make some data ----
library(dplyr)
data("stroop", package = "afex")
set.seed(42)
some_stroop_data <- stroop %>%
filter(study == 1 & acc == 1, trialnum < 60) %>%
sample_n(1000) %>%
mutate(
pno = factor(pno),
level1_cov = rnorm(n())
) %>%
group_by(pno) %>%
mutate(level2_cov = rnorm(1)) %>%
ungroup()
# Make a model ----
m <- lme4::lmer(rt ~ condition + congruency + level1_cov + level2_cov +
(congruency | pno) + ( 1 | trialnum),
data = some_stroop_data)
# Make ggeffects grid ----
library(ggeffects)
#> Warning: package 'ggeffects' was built under R version 4.0.5
# Collapsed.... ----
## Data ----
library(ggplot2)
library(patchwork)
### Factors ----
gge <- ggemmeans(m, c("congruency","condition"))
#> Loading required namespace: emmeans
data <- collaps_re_data(gge, m)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
p1 <- plot(gge, add.data = TRUE)
p2a <- plot(gge) +
geom_point(aes(x, response, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2,
position = position_jitterdodge(0.2, 0.2, 0.25))
p1 + p2a
# Or
data <- collaps_re_data(gge, m, collaps_re = "trialnum")
p2b <- plot(gge) +
geom_point(aes(x, response, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2,
position = position_jitterdodge(0.2, 0.2, 0.25))
p1 + p2a + p2b
### Level 1 cov ----
gge <- ggemmeans(m, c("level1_cov", "condition"))
data <- collaps_re_data(gge, m)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
#> Warning: Collapsing usually not informative across a continious variable.
p1 <- plot(gge, add.data = TRUE, jitter = 0)
p2 <- plot(gge) +
geom_point(aes(x, response, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2)
p1 + p2 # the same
### Level 2 cov ----
gge <- ggemmeans(m, c("level2_cov","condition"))
data <- collaps_re_data(gge, m)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
#> Warning: Collapsing usually not informative across a continious variable.
p1 <- plot(gge, add.data = TRUE, jitter = 0)
p2 <- plot(gge) +
geom_point(aes(x, response, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2)
p1 + p2 # not the same!
## Residuals ----
### Factors ----
gge <- ggemmeans(m, c("congruency","condition"))
data <- collaps_re_data(gge, m, residuals = TRUE)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
p1 <- plot(gge, add.data = TRUE)
p2a <- plot(gge) +
geom_point(aes(as.numeric(factor(x)), predicted, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2,
position = position_jitterdodge(0.2, 0.2, 0.25))
p1 + p2a
# Or
data <- collaps_re_data(gge, m, collaps_re = "trialnum", residuals = TRUE)
p2b <- plot(gge) +
geom_point(aes(as.numeric(factor(x)), predicted, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2,
position = position_jitterdodge(0.2, 0.2, 0.25))
p1 + p2a + p2b
### Level 1 cov ----
gge <- ggemmeans(m, c("level1_cov [n=10]", "condition"))
data <- collaps_re_data(gge, m, residuals = TRUE)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
p1 <- plot(gge, add.data = TRUE, jitter = 0)
p2 <- plot(gge) +
geom_point(aes(x, predicted, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2)
p1 + p2 # Not the same!
### Level 2 cov ----
gge <- ggemmeans(m, c("level2_cov [n=10]","condition"))
data <- collaps_re_data(gge, m, residuals = TRUE)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
p1 <- plot(gge, add.data = TRUE, jitter = 0)
p2 <- plot(gge) +
geom_point(aes(x, predicted, color = group), data = data,
inherit.aes = FALSE,
alpha = 0.35, shape = 16, size = 2)
p1 + p2 # not the same!
Created on 2021-04-21 by the reprex package (v1.0.0)
two issues:
residuals = TRUE
does not yet work.library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggeffects)
data("stroop", package = "afex")
set.seed(42)
some_stroop_data <- stroop %>%
filter(study == 1 & acc == 1, trialnum < 60) %>%
sample_n(1000) %>%
mutate(
pno = factor(pno),
level1_cov = rnorm(n())
) %>%
group_by(pno) %>%
mutate(level2_cov = rnorm(1)) %>%
ungroup()
# Make a model ----
m <- lme4::lmer(rt ~ condition + congruency + level1_cov + level2_cov +
(congruency | pno) + ( 1 | trialnum),
data = some_stroop_data)
gge <- ggemmeans(m, c("congruency","condition"))
#> Loading required namespace: emmeans
plot(gge)
#> Loading required namespace: ggplot2
plot(gge, add.data = TRUE)
plot(gge, grouplevel.data = "pno", jitter = .05)
plot(gge, grouplevel.data = "trialnum", jitter = .05)
Created on 2021-04-23 by the reprex package (v2.0.0)
Arg name is a bit opaque, no? What's wrong with collapse.group?
Also, seems like you need position jitter + dodge, no?
collapse.group
makes more sense in combination with add.data = TRUE
, so I could combine those. If add.data = TRUE
and collapse.group
not NULL
, data is collapsed by RE groups.
Also, seems like you need position jitter + dodge, no?
What do you mean?
So collapse.data can be
Dodge + jitter - If you look at the first plot you make, the dots are jittered but also dodged. In the other plots, they are only jittered, but not dodged.
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(ggeffects)
data("stroop", package = "afex")
set.seed(42)
some_stroop_data <- stroop %>%
filter(study == 1 & acc == 1, trialnum < 60) %>%
sample_n(1000) %>%
mutate(
pno = factor(pno),
level1_cov = rnorm(n())
) %>%
group_by(pno) %>%
mutate(level2_cov = rnorm(1)) %>%
ungroup()
# Make a model ----
m <- lme4::lmer(rt ~ condition + congruency + level1_cov + level2_cov +
(congruency | pno) + ( 1 | trialnum),
data = some_stroop_data)
gge <- ggemmeans(m, c("congruency","condition"))
#> Loading required namespace: emmeans
plot(gge)
#> Loading required namespace: ggplot2
plot(gge, add.data = TRUE)
plot(gge, collapse.group = TRUE)
#> Warning: More than one random grouping variable found.
#> Using `pno`.
plot(gge, collapse.group = "trialnum")
Created on 2021-04-23 by the reprex package (v2.0.0)
@mattansb do you think this can be closed? I might submit an update the next days...
I think? Did you add the option for residuals also?
should work now.
Could be useful to have an option to collapse the raw / decidualized data points by random effect group, maybe with a
collapse_re = <GROUPING VAR>
(that can only accept one if there are more than 1 in the model).This would be useful since often (especially in psych) the single trial data is not the main focus.
Example:
Raw data
residualized
Created on 2021-01-15 by the reprex package (v0.3.0)