Closed JesseJvdw closed 1 year ago
Can you give me small reprex here that illustrates the problem for MCAR? Then I customise a solution to your exact code.
So much for "short" but here is the smallest runnable code.
library(mice) # for imputation and amputation
#>
#> Attaching package: 'mice'
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
library(purrr) # for functional programming
#> Warning: package 'purrr' was built under R version 4.1.3
library(furrr) # for functional futures
#> Warning: package 'furrr' was built under R version 4.1.3
#> Loading required package: future
#> Warning: package 'future' was built under R version 4.1.3
library(magrittr) # for pipes
#> Warning: package 'magrittr' was built under R version 4.1.3
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
library(dplyr) # for data manipulation
#> Warning: package 'dplyr' was built under R version 4.1.3
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
library(tibble) # for tibbles
#> Warning: package 'tibble' was built under R version 4.1.3
library(remstats) # for REM statistics
library(remify) # for converting
library(data.table)
#> Warning: package 'data.table' was built under R version 4.1.3
#>
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#>
#> between, first, last
#> The following object is masked from 'package:purrr':
#>
#> transpose
library(survival) # for REM analysis
#> Warning: package 'survival' was built under R version 4.1.3
#>
#> Attaching package: 'survival'
#> The following object is masked from 'package:future':
#>
#> cluster
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.1.3
#> Warning: package 'ggplot2' was built under R version 4.1.3
#> Warning: package 'tidyr' was built under R version 4.1.3
#> Warning: package 'readr' was built under R version 4.1.3
#> Warning: package 'stringr' was built under R version 4.1.3
#> Warning: package 'forcats' was built under R version 4.1.3
library(datapasta)
#> Warning: package 'datapasta' was built under R version 4.1.3
set.seed(123)
##### Load data
load("UUsummerschool.rdata")
apollo <- as_tibble(PartOfApollo_13)
##### renaming columns to work with remify
setnames(apollo, old = c('sender','receiver'),
new = c('actor1','actor2'))
whichcol <- c("", "actor2", "actor1")
names(whichcol) <- colnames(apollo)
# use the custom pmm method
method <- make.method(apollo)
method[c(2,3)] <- "pmm.conditional"
##### Creating missing data
##### Missing pattern
pattern <- matrix(c(1,0,1,1,1,0), nrow=2, byrow=TRUE)
##### predictor matrix
predictormatrix <- matrix(c(0,0,0,0,0,1,0,1,0), nrow=3, byrow=TRUE)
##### Model-based finite populations
mbased_finite_apollo <- list(
MCAR = furrr::future_map(1:5, ~ {
apollo %>%
ampute(prop = .5,
mech = "MCAR",
patterns = pattern) %>% .$amp %>%
mice(m = 5,
maxit = 5,
method = method,
whichcolumn = whichcol,
predictorMatrix = predictormatrix,
print = FALSE)
}, .options = furrr_options(seed = 123)))
####
effects <- ~ -1 + outdegreeReceiver() + indegreeSender() + reciprocity()
rehObject <- remify(apollo, model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
### get the risk sets
risk_sets <- attr(statsObject, "riskset")
risk_sets <- risk_sets %>% select(-'id')
### Get the times
time <- apollo$time
### merge riskset with each timepoint
combined <- merge(risk_sets, time, by = NULL)
combined <- combined %>% rename("time" = "y")
combined <- lapply(combined, as.numeric)
combined <- as.data.frame(combined)
### Create matrices for subtraction to make a status column for coxph
combined_matrix <- data.matrix(combined)
matrix_rows <- nrow(combined)
repeated_df <- apollo[rep(seq_len(nrow(apollo)), each = 240), ] %>% rename("sender" = "actor1", "receiver" = "actor2")
repeated_df <- repeated_df[, c(2,3,1)]
apollo_matrix <- data.matrix(repeated_df)
status_matrix <- apollo_matrix - combined_matrix
### create a status column
status <- as.integer(rowSums(status_matrix == 0) == ncol(status_matrix))
status <- as.data.frame(status)
### Add status to the combined set
combined <- cbind(combined, status)
###### REM analysis on completed data sets of MCAR and MAR
run_coxph <- function(df) {
rehObject <- remify(as.data.frame(df), model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
reciprocity <- as.vector(statsObject[,,3])
outdegreeReceiver <- as.vector(statsObject[,,1])
indegreeSender <- as.vector(statsObject[,,2])
data <- cbind(combined, reciprocity, outdegreeReceiver, indegreeSender)
}
###### running the function on all completed datasets
Results <- mbased_finite_apollo %>%
map(~.x %>% # for every missingness mechanism
map(~.x %>% # for every simulated multiple imputation.... # create a list of completed data sets
complete("all") %>%
map(~.x %$% # for every completed data set....
with(.x, run_coxph(.x))
)
)
)
#> Error in `map()`:
#> i In index: 1.
#> i With name: MCAR.
#> Caused by error in `map()`:
#> i In index: 3.
#> Caused by error in `map()`:
#> i In index: 1.
#> i With name: 1.
#> Caused by error in `data.frame()`:
#> ! arguments imply differing number of rows: 931680, 815220
#> Backtrace:
#> x
#> 1. +-mbased_finite_apollo %>% ...
#> 2. +-purrr::map(...)
#> 3. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 4. | +-purrr:::with_indexed_errors(...)
#> 5. | | \-base::withCallingHandlers(...)
#> 6. | +-purrr:::call_with_cleanup(...)
#> 7. | \-.f(.x[[i]], ...)
#> 8. | \-.x %>% ...
#> 9. +-purrr::map(...)
#> 10. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 11. | +-purrr:::with_indexed_errors(...)
#> 12. | | \-base::withCallingHandlers(...)
#> 13. | +-purrr:::call_with_cleanup(...)
#> 14. | \-.f(.x[[i]], ...)
#> 15. | \-.x %>% complete("all") %>% ...
#> 16. +-purrr::map(., ~.x %$% with(.x, run_coxph(.x)))
#> 17. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 18. | +-purrr:::with_indexed_errors(...)
#> 19. | | \-base::withCallingHandlers(...)
#> 20. | +-purrr:::call_with_cleanup(...)
#> 21. | \-.f(.x[[i]], ...)
#> 22. | \-.x %$% with(.x, run_coxph(.x))
#> 23. +-base::with(., with(.x, run_coxph(.x)))
#> 24. +-base::with.default(., with(.x, run_coxph(.x)))
#> 25. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 26. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 27. | +-base::with(.x, run_coxph(.x))
#> 28. | \-base::with.default(.x, run_coxph(.x))
#> 29. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 30. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 31. | \-global run_coxph(.x)
#> 32. | \-mice::cbind(combined, reciprocity, outdegreeReceiver, indegreeSender)
#> 33. | \-base::cbind(...)
#> 34. | \-base::cbind(deparse.level, ...)
#> 35. | \-base::data.frame(..., check.names = FALSE)
#> 36. | \-base::stop(...)
#> 37. \-base::.handleSimpleError(...)
#> 38. \-purrr (local) h(simpleError(msg, call))
#> 39. \-cli::cli_abort(...)
#> 40. \-rlang::abort(...)
Created on 2023-06-01 with reprex v2.0.2
;-) Give me some time.
I believe you have used the wrong pipe operator. Changing the exposition pipe %$%
to the regular pipe %>%
or |>
in the line where you call run_coxph(.x)
should do the trick:
library(mice, warn.conflicts = FALSE) # for imputation and amputation
library(purrr, warn.conflicts = FALSE) # for functional programming
library(furrr, warn.conflicts = FALSE) # for functional futures
#> Loading required package: future
library(magrittr, warn.conflicts = FALSE) # for pipes
library(dplyr, warn.conflicts = FALSE) # for data manipulation
library(tibble, warn.conflicts = FALSE) # for tibbles
library(remstats, warn.conflicts = FALSE) # for REM statistics
library(remify, warn.conflicts = FALSE) # for converting
library(data.table, warn.conflicts = FALSE)
library(survival, warn.conflicts = FALSE) # for REM analysis
library(tidyverse, warn.conflicts = FALSE)
library(datapasta, warn.conflicts = FALSE)
set.seed(123)
##### Load data
con <- url("https://github.com/mshafieek/ADS-Missing-data-social-network/raw/main/literature_%20REM/Tutorial_REM_REH_DATA/UUsummerschool.Rdata")
load(con)
apollo <- as_tibble(PartOfApollo_13)
##### renaming columns to work with remify
setnames(apollo, old = c('sender','receiver'),
new = c('actor1','actor2'))
whichcol <- c("", "actor2", "actor1")
names(whichcol) <- colnames(apollo)
# use the custom pmm method
method <- make.method(apollo)
method[c(2,3)] <- "pmm.conditional"
##### Creating missing data
##### Missing pattern
pattern <- matrix(c(1,0,1,1,1,0), nrow=2, byrow=TRUE)
##### predictor matrix
predictormatrix <- matrix(c(0,0,0,0,0,1,0,1,0), nrow=3, byrow=TRUE)
##### Model-based finite populations
mbased_finite_apollo <- list(
MCAR = furrr::future_map(1:5, ~ {
apollo %>%
ampute(prop = .5,
mech = "MCAR",
patterns = pattern) %>% .$amp %>%
mice(m = 5,
maxit = 5,
method = method,
whichcolumn = whichcol,
predictorMatrix = predictormatrix,
print = FALSE)
}, .options = furrr_options(seed = 123)))
####
effects <- ~ -1 + outdegreeReceiver() + indegreeSender() + reciprocity()
rehObject <- remify(apollo, model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
### get the risk sets
risk_sets <- attr(statsObject, "riskset")
risk_sets <- risk_sets %>% select(-'id')
### Get the times
time <- apollo$time
### merge riskset with each timepoint
combined <- merge(risk_sets, time, by = NULL)
combined <- combined %>% rename("time" = "y")
combined <- lapply(combined, as.numeric)
combined <- as.data.frame(combined)
### Create matrices for subtraction to make a status column for coxph
combined_matrix <- data.matrix(combined)
matrix_rows <- nrow(combined)
repeated_df <- apollo[rep(seq_len(nrow(apollo)), each = 240), ] %>% rename("sender" = "actor1", "receiver" = "actor2")
repeated_df <- repeated_df[, c(2,3,1)]
apollo_matrix <- data.matrix(repeated_df)
status_matrix <- apollo_matrix - combined_matrix
### create a status column
status <- as.integer(rowSums(status_matrix == 0) == ncol(status_matrix))
status <- as.data.frame(status)
### Add status to the combined set
combined <- cbind(combined, status)
###### REM analysis on completed data sets of MCAR and MAR
run_coxph <- function(df) {
rehObject <- remify(as.data.frame(df), model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
reciprocity <- as.vector(statsObject[,,3])
outdegreeReceiver <- as.vector(statsObject[,,1])
indegreeSender <- as.vector(statsObject[,,2])
data <- cbind(combined, reciprocity, outdegreeReceiver, indegreeSender)
}
###### running the function on all completed datasets
Results <- mbased_finite_apollo %>%
map(~.x %>% # for every missingness mechanism
map(~.x %>% # for every simulated multiple imputation.... # create a list of completed data sets
complete("all") %>%
map(~.x %>% # for every completed data set....
with(.x, run_coxph(.x))
)
)
)
str(Results)
#> List of 1
#> $ MCAR:List of 5
#> ..$ :List of 5
#> .. ..$ 1:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 2 ...
#> .. ..$ 2:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 2 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 2 ...
#> .. ..$ 3:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 2 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 18 17 19 17 18 ...
#> .. ..$ 4:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 19 17 19 18 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 2 ...
#> .. ..$ 5:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 2 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 2 ...
#> ..$ :List of 5
#> .. ..$ 1:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 18 19 17 19 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 19 ...
#> .. ..$ 2:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 19 17 2 2 17 19 17 19 17 ...
#> .. .. ..$ actor2: num [1:3882] 17 18 2 18 17 2 17 19 17 19 ...
#> .. ..$ 3:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 17 19 2 19 19 17 19 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 19 ...
#> .. ..$ 4:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 17 17 2 17 19 17 19 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 19 ...
#> .. ..$ 5:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 17 2 2 17 19 17 19 17 ...
#> .. .. ..$ actor2: num [1:3882] 7 18 2 18 17 2 17 19 17 19 ...
#> ..$ :List of 5
#> .. ..$ 1:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 17 2 18 2 2 17 19 17 2 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 19 2 18 17 2 17 19 17 19 ...
#> .. ..$ 2:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 19 2 18 17 2 17 2 17 2 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 17 2 18 17 2 17 19 17 19 ...
#> .. ..$ 3:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 2 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 19 17 19 ...
#> .. ..$ 4:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 19 2 18 2 2 17 2 17 2 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 19 2 18 17 2 17 19 17 19 ...
#> .. ..$ 5:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 18 17 2 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 17 2 18 17 2 17 19 17 19 ...
#> ..$ :List of 5
#> .. ..$ 1:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 19 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 18 17 19 ...
#> .. ..$ 2:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 19 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 2 17 19 ...
#> .. ..$ 3:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 19 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 2 17 19 ...
#> .. ..$ 4:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 19 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 19 17 2 17 19 ...
#> .. ..$ 5:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 2 17 19 2 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 17 2 17 19 ...
#> ..$ :List of 5
#> .. ..$ 1:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 2 2 17 19 ...
#> .. ..$ 2:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 19 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 2 2 17 19 ...
#> .. ..$ 3:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 17 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 2 2 17 19 ...
#> .. ..$ 4:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 2 2 17 19 ...
#> .. ..$ 5:'data.frame': 3882 obs. of 3 variables:
#> .. .. ..$ time : num [1:3882] 11849 11854 11885 11890 12232 ...
#> .. .. ..$ actor1: num [1:3882] 18 2 18 2 2 17 19 17 2 17 ...
#> .. .. ..$ actor2: num [1:3882] 2 18 2 18 17 2 2 18 17 19 ...
sessionInfo()
#> R version 4.3.0 (2023-04-21)
#> Platform: aarch64-apple-darwin20 (64-bit)
#> Running under: macOS Ventura 13.4
#>
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
#>
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#>
#> time zone: Europe/Amsterdam
#> tzcode source: internal
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> other attached packages:
#> [1] datapasta_3.1.0 lubridate_1.9.2 forcats_1.0.0 stringr_1.5.0
#> [5] readr_2.1.4 tidyr_1.3.0 ggplot2_3.4.2 tidyverse_2.0.0
#> [9] survival_3.5-5 data.table_1.14.8 remify_3.0.0 remstats_3.1.0
#> [13] tibble_3.2.1 dplyr_1.1.2 magrittr_2.0.3 furrr_0.3.1
#> [17] future_1.32.0 purrr_1.0.1 mice_3.15.3
#>
#> loaded via a namespace (and not attached):
#> [1] utf8_1.2.3 generics_0.1.3 stringi_1.7.12 lattice_0.21-8
#> [5] hms_1.1.3 listenv_0.9.0 digest_0.6.31 timechange_0.2.0
#> [9] evaluate_0.21 grid_4.3.0 fastmap_1.1.1 Matrix_1.5-4
#> [13] backports_1.4.1 fansi_1.0.4 scales_1.2.1 codetools_0.2-19
#> [17] cli_3.6.1 rlang_1.1.1 parallelly_1.36.0 munsell_0.5.0
#> [21] splines_4.3.0 reprex_2.0.2 withr_2.5.0 yaml_2.3.7
#> [25] tools_4.3.0 parallel_4.3.0 tzdb_0.4.0 colorspace_2.1-0
#> [29] globals_0.16.2 broom_1.0.4 vctrs_0.6.2 R6_2.5.1
#> [33] lifecycle_1.0.3 fs_1.6.2 pkgconfig_2.0.3 pillar_1.9.0
#> [37] gtable_0.3.3 glue_1.6.2 Rcpp_1.0.10 xfun_0.39
#> [41] tidyselect_1.2.0 rstudioapi_0.14 knitr_1.43 htmltools_0.5.5
#> [45] rmarkdown_2.21 compiler_4.3.0
Created on 2023-06-01 with reprex v2.0.2
That didn't fix the problem. When I do that it doesn't run the coxph function to the data (which I left out in the reprex) Below is the code with the coxph in my own made function. When I run this it works fine right up to 1 complete data set where it gives this error: ! arguments imply differing number of rows: 931680, 815220. This happens because when the completed dataset gets fed to the remify function there are only 210 risk sets instead of the original 240. Below is the unedited which prints the results of the coxph without pooling. Maybe some actors are left out during imputation.
library(mice, warn.conflicts = FALSE) # for imputation and amputation
library(purrr, warn.conflicts = FALSE) # for functional programming
#> Warning: package 'purrr' was built under R version 4.1.3
library(furrr, warn.conflicts = FALSE) # for functional futures
#> Warning: package 'furrr' was built under R version 4.1.3
#> Loading required package: future
#> Warning: package 'future' was built under R version 4.1.3
#> Loading required package: future
library(magrittr, warn.conflicts = FALSE) # for pipes
#> Warning: package 'magrittr' was built under R version 4.1.3
library(dplyr, warn.conflicts = FALSE) # for data manipulation
#> Warning: package 'dplyr' was built under R version 4.1.3
library(tibble, warn.conflicts = FALSE) # for tibbles
#> Warning: package 'tibble' was built under R version 4.1.3
library(remstats, warn.conflicts = FALSE) # for REM statistics
library(remify, warn.conflicts = FALSE) # for converting
library(data.table, warn.conflicts = FALSE)
#> Warning: package 'data.table' was built under R version 4.1.3
library(survival, warn.conflicts = FALSE) # for REM analysis
#> Warning: package 'survival' was built under R version 4.1.3
library(tidyverse, warn.conflicts = FALSE)
#> Warning: package 'tidyverse' was built under R version 4.1.3
#> Warning: package 'ggplot2' was built under R version 4.1.3
#> Warning: package 'tidyr' was built under R version 4.1.3
#> Warning: package 'readr' was built under R version 4.1.3
#> Warning: package 'stringr' was built under R version 4.1.3
#> Warning: package 'forcats' was built under R version 4.1.3
library(datapasta, warn.conflicts = FALSE)
#> Warning: package 'datapasta' was built under R version 4.1.3
set.seed(123)
##### Load data
con <- url("https://github.com/mshafieek/ADS-Missing-data-social-network/raw/main/literature_%20REM/Tutorial_REM_REH_DATA/UUsummerschool.Rdata")
load(con)
apollo <- as_tibble(PartOfApollo_13)
##### renaming columns to work with remify
setnames(apollo, old = c('sender','receiver'),
new = c('actor1','actor2'))
whichcol <- c("", "actor2", "actor1")
names(whichcol) <- colnames(apollo)
# use the custom pmm method
method <- make.method(apollo)
method[c(2,3)] <- "pmm.conditional"
##### Creating missing data
##### Missing pattern
pattern <- matrix(c(1,0,1,1,1,0), nrow=2, byrow=TRUE)
##### predictor matrix
predictormatrix <- matrix(c(0,0,0,0,0,1,0,1,0), nrow=3, byrow=TRUE)
##### Model-based finite populations
mbased_finite_apollo <- list(
MCAR = furrr::future_map(1:5, ~ {
apollo %>%
ampute(prop = .5,
mech = "MCAR",
patterns = pattern) %>% .$amp %>%
mice(m = 5,
maxit = 5,
method = method,
whichcolumn = whichcol,
predictorMatrix = predictormatrix,
print = FALSE)
}, .options = furrr_options(seed = 123)))
####
effects <- ~ -1 + outdegreeReceiver() + indegreeSender() + reciprocity()
rehObject <- remify(apollo, model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
### get the risk sets
risk_sets <- attr(statsObject, "riskset")
risk_sets <- risk_sets %>% select(-'id')
### Get the times
time <- apollo$time
### merge riskset with each timepoint
combined <- merge(risk_sets, time, by = NULL)
combined <- combined %>% rename("time" = "y")
combined <- lapply(combined, as.numeric)
combined <- as.data.frame(combined)
### Create matrices for subtraction to make a status column for coxph
combined_matrix <- data.matrix(combined)
matrix_rows <- nrow(combined)
repeated_df <- apollo[rep(seq_len(nrow(apollo)), each = 240), ] %>% rename("sender" = "actor1", "receiver" = "actor2")
repeated_df <- repeated_df[, c(2,3,1)]
apollo_matrix <- data.matrix(repeated_df)
status_matrix <- apollo_matrix - combined_matrix
### create a status column
status <- as.integer(rowSums(status_matrix == 0) == ncol(status_matrix))
status <- as.data.frame(status)
### Add status to the combined set
combined <- cbind(combined, status)
###### REM analysis on completed data sets of MCAR and MAR
run_coxph <- function(df) {
rehObject <- remify(as.data.frame(df), model = "tie")
statsObject <- remstats(rehObject, tie_effects = effects)
reciprocity <- as.vector(statsObject[,,3])
outdegreeReceiver <- as.vector(statsObject[,,1])
indegreeSender <- as.vector(statsObject[,,2])
data <- cbind(combined, reciprocity, outdegreeReceiver, indegreeSender)
fit <- coxph(Surv(time, status) ~ reciprocity + indegreeSender + outdegreeReceiver, data = data)
return(fit)
}
###### running the function on all completed datasets
Results <- mbased_finite_apollo %>%
map(~.x %>% # for every missingness mechanism
map(~.x %>% # for every simulated multiple imputation.... # create a list of completed data sets
complete("all") %>%
map(~.x %$% # for every completed data set....
print(with(.x, run_coxph(.x))
)
)
))
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -5.470e-05 9.999e-01 4.376e-04 -0.125 0.901
#> indegreeSender 3.923e-04 1.000e+00 7.542e-05 5.201 1.98e-07
#> outdegreeReceiver -5.963e-05 9.999e-01 7.626e-05 -0.782 0.434
#>
#> Likelihood ratio test=28.84 on 3 df, p=2.425e-06
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -1.204e-04 9.999e-01 4.523e-04 -0.266 0.790
#> indegreeSender 4.440e-04 1.000e+00 7.680e-05 5.781 7.43e-09
#> outdegreeReceiver -6.127e-05 9.999e-01 7.559e-05 -0.811 0.418
#>
#> Likelihood ratio test=34.83 on 3 df, p=1.325e-07
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -1.375e-05 1.000e+00 4.243e-04 -0.032 0.974
#> indegreeSender 5.057e-04 1.001e+00 7.603e-05 6.651 2.9e-11
#> outdegreeReceiver -6.634e-05 9.999e-01 7.428e-05 -0.893 0.372
#>
#> Likelihood ratio test=46.56 on 3 df, p=4.318e-10
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -1.082e-04 9.999e-01 4.431e-04 -0.244 0.807
#> indegreeSender 4.041e-04 1.000e+00 7.688e-05 5.257 1.47e-07
#> outdegreeReceiver -5.584e-05 9.999e-01 7.473e-05 -0.747 0.455
#>
#> Likelihood ratio test=28.83 on 3 df, p=2.428e-06
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -5.048e-05 9.999e-01 4.499e-04 -0.112 0.911
#> indegreeSender 3.241e-04 1.000e+00 7.494e-05 4.325 1.53e-05
#> outdegreeReceiver -5.843e-05 9.999e-01 7.542e-05 -0.775 0.438
#>
#> Likelihood ratio test=20.51 on 3 df, p=0.0001328
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -4.434e-05 1.000e+00 4.331e-04 -0.102 0.918
#> indegreeSender 4.319e-04 1.000e+00 7.434e-05 5.810 6.24e-09
#> outdegreeReceiver -6.534e-05 9.999e-01 7.545e-05 -0.866 0.386
#>
#> Likelihood ratio test=35.9 on 3 df, p=7.858e-08
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -4.870e-05 1.000e+00 4.423e-04 -0.110 0.912
#> indegreeSender 4.909e-04 1.000e+00 7.458e-05 6.582 4.65e-11
#> outdegreeReceiver -7.241e-05 9.999e-01 7.579e-05 -0.955 0.339
#>
#> Likelihood ratio test=46 on 3 df, p=5.664e-10
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -5.458e-05 9.999e-01 4.302e-04 -0.127 0.899
#> indegreeSender 4.147e-04 1.000e+00 7.404e-05 5.601 2.13e-08
#> outdegreeReceiver -6.408e-05 9.999e-01 7.527e-05 -0.851 0.395
#>
#> Likelihood ratio test=33.38 on 3 df, p=2.672e-07
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -3.972e-05 1.000e+00 4.250e-04 -0.093 0.926
#> indegreeSender 4.946e-04 1.000e+00 7.335e-05 6.743 1.55e-11
#> outdegreeReceiver -6.490e-05 9.999e-01 7.506e-05 -0.865 0.387
#>
#> Likelihood ratio test=47.73 on 3 df, p=2.433e-10
#> n= 931680, number of events= 3882
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = data)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -4.477e-05 1.000e+00 4.686e-04 -0.096 0.923882
#> indegreeSender 2.891e-04 1.000e+00 8.129e-05 3.557 0.000376
#> outdegreeReceiver -5.661e-05 9.999e-01 7.636e-05 -0.741 0.458500
#>
#> Likelihood ratio test=13.87 on 3 df, p=0.003087
#> n= 931680, number of events= 3882
#> Error in `map()`:
#> i In index: 1.
#> i With name: MCAR.
#> Caused by error in `map()`:
#> i In index: 3.
#> Caused by error in `map()`:
#> i In index: 1.
#> i With name: 1.
#> Caused by error in `data.frame()`:
#> ! arguments imply differing number of rows: 931680, 815220
#> Backtrace:
#> x
#> 1. +-mbased_finite_apollo %>% ...
#> 2. +-purrr::map(...)
#> 3. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 4. | +-purrr:::with_indexed_errors(...)
#> 5. | | \-base::withCallingHandlers(...)
#> 6. | +-purrr:::call_with_cleanup(...)
#> 7. | \-.f(.x[[i]], ...)
#> 8. | \-.x %>% ...
#> 9. +-purrr::map(...)
#> 10. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 11. | +-purrr:::with_indexed_errors(...)
#> 12. | | \-base::withCallingHandlers(...)
#> 13. | +-purrr:::call_with_cleanup(...)
#> 14. | \-.f(.x[[i]], ...)
#> 15. | \-.x %>% complete("all") %>% ...
#> 16. +-purrr::map(., ~.x %$% print(with(.x, run_coxph(.x))))
#> 17. | \-purrr:::map_("list", .x, .f, ..., .progress = .progress)
#> 18. | +-purrr:::with_indexed_errors(...)
#> 19. | | \-base::withCallingHandlers(...)
#> 20. | +-purrr:::call_with_cleanup(...)
#> 21. | \-.f(.x[[i]], ...)
#> 22. | \-.x %$% print(with(.x, run_coxph(.x)))
#> 23. +-base::with(., print(with(.x, run_coxph(.x))))
#> 24. +-base::with.default(., print(with(.x, run_coxph(.x))))
#> 25. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 26. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 27. | +-base::print(with(.x, run_coxph(.x)))
#> 28. | +-base::with(.x, run_coxph(.x))
#> 29. | \-base::with.default(.x, run_coxph(.x))
#> 30. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 31. | \-base::eval(substitute(expr), data, enclos = parent.frame())
#> 32. | \-global run_coxph(.x)
#> 33. | \-mice::cbind(combined, reciprocity, outdegreeReceiver, indegreeSender)
#> 34. | \-base::cbind(...)
#> 35. | \-base::cbind(deparse.level, ...)
#> 36. | \-base::data.frame(..., check.names = FALSE)
#> 37. | \-base::stop(...)
#> 38. \-base::.handleSimpleError(...)
#> 39. \-purrr (local) h(simpleError(msg, call))
#> 40. \-cli::cli_abort(...)
#> 41. \-rlang::abort(...)
Created on 2023-06-01 with reprex v2.0.2
Hi Jesse,
@mshafieek an I figured out where the error comes from. One model has
events actors dyads
3882 16 240
while the other model has
events actors dyads
3882 15 210
exactly what you identified. [n=15 actors * n-1 = 14 actors] = 210
, 16*15=240
. The missingness generated is the cause for this. With MCAR, this happens not so often. with MAR, there is a higher likelihood of this happening, depending on the MAR mechanism (left, right, tail, mid. There are three solutions:
tryCatch()
may help here, or only generating missingness in a part of the data, where the other part always includes the necessary actors (much simpler!*). 13*12
, only focus all simulation on the subset of those 13*12
. combined <- cbind(combined, status)
and all required code into your run_coxph()
evaluation function, and make the evaluation of each simulation independent of the others.
I would do (1) or (2), as it is the most robust and ensures equal evaluation of analysis models over the sims. This may avoid problems later on.
Hope this helps. Let me know if you need me.
I want to do the first solution but I have no idea where to start.
I would do something along the lines of this:
library(mice, warn.conflicts = FALSE) # for imputation and amputation
library(purrr, warn.conflicts = FALSE) # for functional programming
library(furrr, warn.conflicts = FALSE) # for functional futures
#> Loading required package: future
library(magrittr, warn.conflicts = FALSE) # for pipes
library(dplyr, warn.conflicts = FALSE) # for data manipulation
library(tibble, warn.conflicts = FALSE) # for tibbles
library(remstats, warn.conflicts = FALSE) # for REM statistics
library(remify, warn.conflicts = FALSE) # for converting
library(data.table, warn.conflicts = FALSE)
library(survival, warn.conflicts = FALSE) # for REM analysis
library(tidyverse, warn.conflicts = FALSE)
library(datapasta, warn.conflicts = FALSE)
##### Load data
con <- url("https://github.com/mshafieek/ADS-Missing-data-social-network/raw/main/literature_%20REM/Tutorial_REM_REH_DATA/UUsummerschool.Rdata")
load(con)
apollo <- as_tibble(PartOfApollo_13)
##### renaming columns to work with remify
setnames(apollo, old = c('sender','receiver'),
new = c('actor1','actor2'))
whichcol <- c("", "actor2", "actor1")
names(whichcol) <- colnames(apollo)
#### set with sufficient actors & dyads
set.seed(123) # fix seed to realize a sufficient set
indic <- sample(1:nrow(apollo), 1500)
remify(apollo[indic, ], model = "tie") %>% dim()
#>
#> Warning: the `time` variable is not sorted. Sorting will be forced.
#> events actors dyads
#> 1500 16 240
#### Combine the sufficient set and the incomplete set
make_missing <- function(x, indic){
sufficient <- x[indic, ]
miss <- x[-c(indic), ] |>
ampute(prop = .8,
mech = "MCAR",
patterns = matrix(c(1,0,1,
1,1,0),
nrow=2,
byrow=TRUE)) %>%
.$amp
combined <- rbind(sufficient,
miss)
return(combined[order(combined$time), ]) # sort it all like apollo
}
#### Test function
test <- make_missing(apollo, indic)
dim(test)
#> [1] 3882 3
all(apollo$time == test$time)
#> [1] TRUE
head(apollo, n=10); head(test, n=10)
#> # A tibble: 10 × 3
#> time actor1 actor2
#> <dbl> <dbl> <dbl>
#> 1 11849. 18 2
#> 2 11854. 2 18
#> 3 11885. 18 2
#> 4 11890. 2 18
#> 5 12232. 2 17
#> 6 12342. 17 2
#> 7 12345. 19 17
#> 8 12348. 17 19
#> 9 12370. 19 17
#> 10 12382. 17 19
#> # A tibble: 10 × 3
#> time actor1 actor2
#> <dbl> <dbl> <dbl>
#> 1 11849. 18 2
#> 2 11854. 2 18
#> 3 11885. NA 2
#> 4 11890. 2 18
#> 5 12232. 2 NA
#> 6 12342. 17 NA
#> 7 12345. 19 NA
#> 8 12348. 17 19
#> 9 12370. 19 17
#> 10 12382. 17 19
tail(apollo, n=10); tail(test, n=10)
#> # A tibble: 10 × 3
#> time actor1 actor2
#> <dbl> <dbl> <dbl>
#> 1 49980. 9 7
#> 2 49983. 2 19
#> 3 49993. 4 7
#> 4 49994. 7 4
#> 5 49996. 19 2
#> 6 49997. 4 7
#> 7 50001. 2 19
#> 8 50002. 7 4
#> 9 50013. 7 4
#> 10 50015. 4 7
#> # A tibble: 10 × 3
#> time actor1 actor2
#> <dbl> <dbl> <dbl>
#> 1 49980. 9 7
#> 2 49983. 2 NA
#> 3 49993. 4 7
#> 4 49994. 7 4
#> 5 49996. 19 2
#> 6 49997. 4 7
#> 7 50001. NA 19
#> 8 50002. NA 4
#> 9 50013. 7 4
#> 10 50015. 4 7
#### missing data pattern
md.pattern(test)
#> time actor1 actor2
#> 1950 1 1 1 0
#> 966 1 1 0 1
#> 966 1 0 1 1
#> 0 966 966 1932
Created on 2023-06-02 with reprex v2.0.2
This will yield approx 1500+.2*2382 = 1976
observed cases and .8*2382 = 1906
missing cases. Play with the proportion in ampute()
to get to the desired missingness proportion, if needed. You can use the make_missing()
function in your future, as long as you specify a grand (i.e. for all sims the same) indic
outside of the future.
That did the trick. Thank you.
Not sure if it was the intention to do both MCAR and MAR but with MAR I've found an issue where when it's calculating the remstats for the MAR imputation it only gives me 210 risk sets instead of 240. So I can't combine the statistics with the coxph data because the rows are not equal. I've checked the completed MAR set and the contain the 19 actors so it's weird that it only gives 210 risk sets.
The code regarding this is all the way at the bottom of my rmd.
EDIT : I see that when I do multiple simulations it also happens for MCAR. So for some imputations it creates only 210 risk sets instead of 240.