Closed LisanneLageweg closed 1 year ago
I also ran the code on the 1500 set and did not get any significant p-values.
library(mice)
#>
#> Attaching package: 'mice'
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
require(lattice)
#> Loading required package: lattice
library(tidyverse)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
library(dplyr)
library(purrr)
library(furrr)
#> Loading required package: future
library(relevent)
#> Loading required package: trust
#> Loading required package: sna
#> Loading required package: statnet.common
#>
#> Attaching package: 'statnet.common'
#> The following objects are masked from 'package:base':
#>
#> attr, order
#> Loading required package: network
#>
#> 'network' 1.18.1 (2023-01-24), part of the Statnet Project
#> * 'news(package="network")' for changes since last version
#> * 'citation("network")' for citation information
#> * 'https://statnet.org' for help, support, and other information
#> sna: Tools for Social Network Analysis
#> Version 2.7-1 created on 2023-01-24.
#> copyright (c) 2005, Carter T. Butts, University of California-Irvine
#> For citation information, type citation("sna").
#> Type help(package="sna") to get started.
#> Loading required package: coda
#> relevent: Relational Event Models
#> Version 1.2-1 created on 2023-01-24.
#> copyright (c) 2007, Carter T. Butts, University of California-
#> Irvine
#> For citation information, type citation("relevent").
#> Type help(package="relevent") to get started.
library(broom.mixed)
library(rem)
library(ggplot2)
library(survival)
#>
#> Attaching package: 'survival'
#> The following object is masked from 'package:future':
#>
#> cluster
library(remify)
library(remstats)
library(devtools)
#> Loading required package: usethis
#library(remstimate)
library(tibble)
library(reprex)
library(styler)
## 1. Read the data
load("UUsummerschool.Rdata")
rm(Twitter_data_rem3, WTCPoliceCalls, ClassIntercept, ClassIsFemale,
ClassIsTeacher, WTCPoliceIsICR, Class)
## 2. Ampute and then impute the data
# renaming dataset for later convenience
apollo.renamed <- PartOfApollo_13
apollo.renamed <- apollo.renamed %>%
rename(
actor1 = sender,
actor2 = receiver
)
#### set with sufficient actors & dyads
set.seed(123) # for some reason the seed 123 was just not working for me, it
# keeps giving me 210 dyads, even with size 1600
# even the seed, it was giving me 210 dyads, so I changed the sample to 1600
fixed.sample <- apollo.renamed[sample(1:nrow(apollo.renamed), 1500),]
remify(fixed.sample, model = "tie") %>% dim()
#>
#> Warning: the `time` variable is not sorted. Sorting will be forced.
#> events actors dyads
#> 1500 16 240
# creating functions
# Define effects
effects <- ~ -1 + reciprocity(scaling = ("std")) +
indegreeSender() + outdegreeReceiver()
# Prepare event history
fixed.reh <- remify(edgelist = fixed.sample,
model = "tie")
#>
#> Warning: the `time` variable is not sorted. Sorting will be forced.
# calculate stats
stats <- remstats(tie_effects = effects,
reh = fixed.reh)
risk_sets <- attr(stats, "riskset") %>% select(-'id')
# creating one set with all risksets for each time point
combined <- merge(risk_sets, fixed.sample$time, by=NULL) %>%
rename(time = y) %>%
.[, c("time", "sender", "receiver")] %>%
mutate(sender = as.numeric(sender),
receiver = as.numeric(receiver))
# GV: Calculate divergence
diff <- fixed.sample[rep(seq_len(nrow(fixed.sample)), each = 240), ] %>%
data.matrix() %>%
.[, 1:3] - combined
# GV: identify non-divergence
combined$status <-
rowSums(diff == 0) == ncol(diff)
#combining the dataset with riskset to the statistic
combined$reciprocity <- c(stats[,,1])
combined$indegreeSender <- c(stats[,,2])
combined$outdegreeReceiver <- c(stats[,,3])
fixed.cox.fit <- coxph(Surv(time, status) ~ reciprocity + indegreeSender +
outdegreeReceiver,
data=combined)
fixed.cox.fit
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = combined)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity -0.0312374 0.9692455 0.0286696 -1.090 0.276
#> indegreeSender 0.0003728 1.0003729 0.0003083 1.209 0.227
#> outdegreeReceiver -0.0003139 0.9996861 0.0003019 -1.040 0.298
#>
#> Likelihood ratio test=4.1 on 3 df, p=0.2509
#> n= 360000, number of events= 1500
Created on 2023-06-21 with reprex v2.0.2
And if you run it on one imputed set, as if the imputed set were observed data?
I hope I understood you correctly, I didn't do any simulations and changed the number of imputations to 1. After analysing just a single imputed dataset I still did not get all significant p-values. Only one is significant, exactly like in the fully observed dataset.
library(mice)
#>
#> Attaching package: 'mice'
#> The following object is masked from 'package:stats':
#>
#> filter
#> The following objects are masked from 'package:base':
#>
#> cbind, rbind
require(lattice)
#> Loading required package: lattice
library(tidyverse)
library(magrittr)
#>
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#>
#> set_names
#> The following object is masked from 'package:tidyr':
#>
#> extract
library(dplyr)
library(purrr)
library(furrr)
#> Loading required package: future
library(relevent)
#> Loading required package: trust
#> Loading required package: sna
#> Loading required package: statnet.common
#>
#> Attaching package: 'statnet.common'
#> The following objects are masked from 'package:base':
#>
#> attr, order
#> Loading required package: network
#>
#> 'network' 1.18.1 (2023-01-24), part of the Statnet Project
#> * 'news(package="network")' for changes since last version
#> * 'citation("network")' for citation information
#> * 'https://statnet.org' for help, support, and other information
#> sna: Tools for Social Network Analysis
#> Version 2.7-1 created on 2023-01-24.
#> copyright (c) 2005, Carter T. Butts, University of California-Irvine
#> For citation information, type citation("sna").
#> Type help(package="sna") to get started.
#> Loading required package: coda
#> relevent: Relational Event Models
#> Version 1.2-1 created on 2023-01-24.
#> copyright (c) 2007, Carter T. Butts, University of California-
#> Irvine
#> For citation information, type citation("relevent").
#> Type help(package="relevent") to get started.
library(broom.mixed)
library(rem)
library(ggplot2)
library(survival)
#>
#> Attaching package: 'survival'
#> The following object is masked from 'package:future':
#>
#> cluster
library(remify)
library(remstats)
library(devtools)
#> Loading required package: usethis
#library(remstimate)
library(tibble)
library(reprex)
library(styler)
## 1. Read the data
load("UUsummerschool.Rdata")
rm(Twitter_data_rem3, WTCPoliceCalls, ClassIntercept, ClassIsFemale,
ClassIsTeacher, WTCPoliceIsICR, Class)
## 2. Ampute and then impute the data
# renaming dataset for later convenience
apollo.renamed <- PartOfApollo_13
apollo.renamed <- apollo.renamed %>%
rename(
actor1 = sender,
actor2 = receiver
)
# making the dataset a tibble
apollo.renamed <- as_tibble(apollo.renamed)
# determine which column to condition on
whichcol <- c("", "actor2", "actor1")
names(whichcol) <- colnames(apollo.renamed)
# create predictor matrix for imoutations
pred <- make.predictorMatrix(apollo.renamed)
pred[ ,"time"] <- 0
pred
#> time actor1 actor2
#> time 0 1 1
#> actor1 0 0 1
#> actor2 0 1 0
# use the custom pmm method
method <- make.method(apollo.renamed)
method[c(2,3)] <- "pmm.conditional"
MCAR_finite <- apollo.renamed %>%
ampute(prop = .2,
patterns = c(1,0,1),
mech = "MCAR") %>% .$amp %>%
mice(m = 1,
maxit = 5,
method = method,
pred=pred,
whichcolumn = whichcol,
print = F)
one.imp <- complete(MCAR_finite)
# creating functions
# Define effects
effects <- ~ -1 + reciprocity(scaling = ("std")) +
indegreeSender() + outdegreeReceiver()
# Prepare event history
single.reh <- remify(edgelist = one.imp,
model = "tie")
# calculate stats
single.stats <- remstats(tie_effects = effects,
reh = single.reh)
risk_sets <- attr(single.stats, "riskset") %>% select(-'id')
# creating one set with all risksets for each time point
combined <- merge(risk_sets, PartOfApollo_13$time, by=NULL) %>%
rename(time = y) %>%
.[, c("time", "sender", "receiver")] %>%
mutate(sender = as.numeric(sender),
receiver = as.numeric(receiver))
# GV: Calculate divergence
diff <- PartOfApollo_13[rep(seq_len(nrow(PartOfApollo_13)), each = 240), ] %>%
data.matrix() %>%
.[, 1:3] - combined
# GV: identify non-divergence
combined$status <-
rowSums(diff == 0) == ncol(diff)
#combining the dataset with riskset to the statistic
combined$reciprocity <- c(single.stats[,,1])
combined$indegreeSender <- c(single.stats[,,2])
combined$outdegreeReceiver <- c(single.stats[,,3])
single.cox.fit <- coxph(Surv(time, status) ~ reciprocity + indegreeSender +
outdegreeReceiver,
data=combined)
single.cox.fit
#> Call:
#> coxph(formula = Surv(time, status) ~ reciprocity + indegreeSender +
#> outdegreeReceiver, data = combined)
#>
#> coef exp(coef) se(coef) z p
#> reciprocity 2.433e-02 1.025e+00 1.873e-02 1.299 0.194
#> indegreeSender 4.305e-04 1.000e+00 7.399e-05 5.818 5.95e-09
#> outdegreeReceiver -8.757e-05 9.999e-01 7.282e-05 -1.203 0.229
#>
#> Likelihood ratio test=41.29 on 3 df, p=5.664e-09
#> n= 931680, number of events= 3882
Created on 2023-06-22 with reprex v2.0.2
I think it has to do with the exclusion of the 1500 cases, which artificially deflates the between variance. I'll look into it, but don't worry too much about it 😉
Hai @gerkovink ,
Ik heb, zoals je zei, analyse met die 1500 gedaan op onderstaande manier. Dan kom ik op dezelfde p-values uit als met de original dataset. Heb ik het goed gedaan? En zo ja, en de p-values zijn dus inderdaad lager door het imputation proces. Wat voor verklaring kan hieraan ten grondslag liggen?
indic <- as.data.frame(indic)
class(indic)
true.reh <- remify(edgelist = indic, model = "tie")
calculate stats
stats <- remstats(tie_effects = effects, reh = true.reh)
use the function to create the correct format of the dataframe
true.cox.set <- prepare_coxph_data(stats, PartOfApollo_13)
fit cox model
true.cox.fit <- coxph(Surv(time, status) ~ reciprocity + indegreeSender + outdegreeReceiver, data=true.cox.set) true <- coefficients(true.cox.fit)