mshafieek / ADS-Missing-data-social-network

ADS master thesis
MIT License
0 stars 1 forks source link

Time not sorted - warning #19

Closed dominic-comerford closed 5 months ago

dominic-comerford commented 6 months ago

After Gerko's suggestion to use 'norm' as initial method to impute time rather than 'pmm' or others, now the warning 'time is not sorted, sorted will be forced' persists, but the model does run.

# Load packages
packages_to_install <- c("purrr", "furrr", "magrittr", "dplyr", 
                         "tibble", "data.table", "survival", "tidyverse", 
                         "devtools", "reprex", "styler")

for (pkg in packages_to_install) {
  if (!require(pkg, character.only = TRUE)) {
    # If not, install the package
    install.packages(pkg)
  }
}
#> Loading required package: purrr
#> Loading required package: furrr
#> Loading required package: future
#> Loading required package: magrittr
#> 
#> Attaching package: 'magrittr'
#> The following object is masked from 'package:purrr':
#> 
#>     set_names
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> Loading required package: tibble
#> Loading required package: data.table
#> 
#> Attaching package: 'data.table'
#> The following objects are masked from 'package:dplyr':
#> 
#>     between, first, last
#> The following object is masked from 'package:purrr':
#> 
#>     transpose
#> Loading required package: survival
#> 
#> Attaching package: 'survival'
#> The following object is masked from 'package:future':
#> 
#>     cluster
#> Loading required package: tidyverse
#> Loading required package: devtools
#> Loading required package: usethis
#> Loading required package: reprex
#> Loading required package: styler

library(purrr, warn.conflicts = FALSE)    # for functional programming
library(furrr, warn.conflicts = FALSE)    # for functional futures
library(magrittr, warn.conflicts = FALSE) # for pipes
library(dplyr, warn.conflicts = FALSE)    # for data manipulation
library(tibble, warn.conflicts = FALSE)     # for tibbles
library(data.table, warn.conflicts = FALSE)   
library(survival, warn.conflicts = FALSE) # for REM analysis
library(tidyverse, warn.conflicts = FALSE)  
library(tidyr, warn.conflicts = FALSE)
library(reprex, warn.conflicts = FALSE)   # reproducible code
library(styler)

devtools::install_github("TilburgNetworkGroup/remify")
#> Using GitHub PAT from the git credential store.
#> Skipping install of 'remify' from a github remote, the SHA1 (fbd8e53e) has not changed since last install.
#>   Use `force = TRUE` to force installation
devtools::install_github("gerkovink/mice@match_conditional_current") 
#> Using GitHub PAT from the git credential store.
#> Skipping install of 'mice' from a github remote, the SHA1 (776d92b4) has not changed since last install.
#>   Use `force = TRUE` to force installation
devtools::install_github("TilburgNetworkGroup/remstats")
#> Using GitHub PAT from the git credential store.
#> Skipping install of 'remstats' from a github remote, the SHA1 (1fd890e1) has not changed since last install.
#>   Use `force = TRUE` to force installation

library(mice, warn.conflicts = FALSE)     # for imputation and amputation
library(remstats, warn.conflicts = FALSE) # for REM statistics
library(remify, warn.conflicts = FALSE)   # for converting  

##### Load data
con <- url("https://github.com/mshafieek/ADS-Missing-data-social-network/raw/main/literature_%20REM/Tutorial_REM_REH_DATA/UUsummerschool.Rdata")
load(con)
apollo <- as_tibble(PartOfApollo_13) %>% 
  rename(
    actor1 = sender,
    actor2 = receiver
  )

rm(Class, PartOfApollo_13, Twitter_data_rem3, WTCPoliceCalls, ClassIntercept, 
   ClassIsFemale, ClassIsTeacher, WTCPoliceIsICR, con, pkg)

set.seed(123) # fix seed to realize a sufficient set
indic <- sample(1:nrow(apollo), 1500)

remify(apollo[indic, ], model = "tie") %>% dim()
#> Warning: Unknown or uninitialised column: `type`.
#> Warning: Unknown or uninitialised column: `weight`.
#> Warning in remify(apollo[indic, ], model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.
#> events actors  dyads 
#>   1500     16    240
#### Combine the sufficient set and the incomplete set
make_missing <- function(x, indic){
  sufficient <- x[indic, ]
  miss <- x[-c(indic), ] |>
    # prop is set to .4 to get close to .2 missingness since almost half of the dataset is used for the sufficient set
    ampute(prop = .5, 
           mech = "MCAR",
           patterns = matrix(c(1,1,0, # missing in actor 2
                               1,0,1, # missing in actor 1
                               0,1,1, # missing in time
                               0,0,0, # missing in all
                               1,0,0, # missing in actor 1 + 2
                               0,1,0, # missing in time + actor 2
                               0,0,1  # missing in time + actor 1
                               ),
                             nrow=7,  # change to number of missingness patterns above
                             byrow=TRUE)) %>% 
    .$amp
  combined <- rbind(sufficient, miss)
  return(combined[order(combined$time), ]) # sort it all like apollo
}

whichcol <- c("", "actor2", "actor1") # Ensure that actor 1 != actor 2 in imputations
names(whichcol) <- colnames(apollo)

## predictor matrix
pred <- make.predictorMatrix(apollo)
pred[ ,"time"] <- 0 # using time for imputation creates loops
## use the pmm.conditional method
method <- make.method(apollo)
method[1] <- "norm" # imputation of time correct? Something Like Last Obs Carried Forward?
method[c(2,3)] <- "pmm.conditional"

#### Model-based finite populations
mbased_finite_apollo <-
  furrr::future_map(1:10, ~ {   # start of with 10 simulations for testing
    make_missing(apollo, indic) %>%
      mice(m = 5, 
           maxit = 5,
           method = method,
           whichcolumn = whichcol,
           predictorMatrix = pred,
           print = FALSE)
  }, .options = furrr_options(seed = 123))

# Defining effects for the relational event model
effects <- ~ -1 + reciprocity(scaling = ("std")) + indegreeSender() + outdegreeReceiver()

##### Function to get the statistics of the previously defined effects.
stats_function <- function(data) {
  # remify the data
  reh <- remify::remify(edgelist = data, model = "tie")
  # calculate effect statistics
  statsObject_imp <- remstats(reh = reh, tie_effects = effects)
  # Return the statistics
  return(statsObject_imp)
}

#Function for making the data compatible with coxph()
prepare_coxph_data <- function(statsObject, apollo) {
  risk_sets <- attr(statsObject, "riskset")
  risk_sets <- risk_sets %>% select(-'id')

  # Get the times
  time <- apollo$time

  # merge riskset with each timepoint
  combined <- merge(risk_sets, time, by = NULL)

  combined <- combined %>% rename("time" = "y")
  combined <- lapply(combined, as.numeric)
  combined <- as.data.frame(combined)

  # Create matrices for subtraction to make a status column for coxph
  combined_matrix <- data.matrix(combined)
  matrix_rows <- nrow(combined)

  repeated_df <- apollo[rep(seq_len(nrow(apollo)), each = 240), ] 
  repeated_df <- repeated_df[, c(2,3,1)]
  apollo_matrix <- data.matrix(repeated_df)

  status_matrix <- apollo_matrix - combined_matrix

  # create a status column
  status <- as.integer(rowSums(status_matrix == 0) == ncol(status_matrix))
  status <- as.data.frame(status)

  # Add status to the combined set
  combined <- cbind(combined, status)

  # Extract statistics and add them to the dataframe
  reciprocity <- statsObject[,,1]
  indegreeSender <- statsObject[,,2]
  outdegreeReceiver <- statsObject[,,3]

  combined$reciprocity <- c(reciprocity)
  combined$indegreeSender <- c(indegreeSender)
  combined$outdegreeReceiver <- c(outdegreeReceiver)

  return(combined)
}

# TRUE ANALYSIS
true.reh <- remify(edgelist = apollo, 
                   model = "tie")
#> Warning: Unknown or uninitialised column: `type`.
#> Warning: Unknown or uninitialised column: `weight`.
# calculate stats
stats <- remstats(tie_effects = effects, 
                  reh = true.reh)
# use the function to create the correct format of the dataframe
true.cox.set <- prepare_coxph_data(stats, apollo)
# fit cox model 
true.cox.fit <- coxph(Surv(time, status) ~ reciprocity + indegreeSender + 
                        outdegreeReceiver, 
                      data=true.cox.set)
true <- coefficients(true.cox.fit)

# Running the REM on  simulations
Results1 <- 
  mbased_finite_apollo[1:5] %>% 
  map(~.x %>% # for every simulation
        complete("all") %>% 
        map(~.x %>% # for every imputation
              stats_function() %>% # do stats function
              prepare_coxph_data(apollo = apollo) %$% # prepare cox ph
              coxph(Surv(time, status) ~ 
                      reciprocity + 
                      indegreeSender + 
                      outdegreeReceiver)) %>%
        pool(custom.t = ".data$b + .data$b / .data$m") %>% # pool coefficients
        .$pooled %>% # extract table of pooled coefficients
          mutate(true = true, # add true
                 df = m-1,  # correct df
                 riv = Inf, # correct riv
                 std.error = sqrt(t), # standard error
                 statistic = estimate / std.error, # test statistic
                 p.value = 2 * (pt(abs(statistic), 
                                   pmax(df, 0.001), 
                                   lower.tail = FALSE)), # correct p.value
                 `2.5 %` = estimate - qt(.975, df) * std.error, # lower bound CI
                 `97.5 %` = estimate + qt(.975, df) * std.error, # upper bound CI
                 cov = `2.5 %` < true & true < `97.5 %`, # coverage
                 bias = estimate - true) %>% # bias
          select(term, m, true, estimate, std.error, statistic, p.value, 
                 riv, `2.5 %`, `97.5 %`, cov, bias) %>% 
          column_to_rownames("term") # `term` as rownames
    )
#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.
#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.

#> Warning in remify::remify(edgelist = data, model = "tie"): 
#> Warning: the `time` variable is not sorted. Sorting will be forced.
Created on 2024-04-18 with [reprex v2.1.0](https://reprex.tidyverse.org/)

Created on 2024-04-18 with reprex v2.1.0