library(tidyverse)
file_source = <enter location of downloaded CSV>
data<-read.csv(file = file_source)
#### Columns/variables
# "registered"
# "age"
# "sex"
# "ethnicity"
# "patient_id"
data$injury_poisoning_undet_intent_date <- as.Date(data$injury_poisoning_undet_intent_date, format = "%Y-%m-%d")
data$intentional_self_harm_date <- as.Date(data$intentional_self_harm_date, format = "%Y-%m-%d")
data$sequelae_self_harm_injury_poisoning_date <- as.Date(data$sequelae_self_harm_injury_poisoning_date, format = "%Y-%m-%d")
data<-data %>%
rename(
intentional_ICD_date = intentional_self_harm_date,
undetermined_ICD_date = injury_poisoning_undet_intent_date,
sequelae_ICD_date = sequelae_self_harm_injury_poisoning_date)
data<-data %>%
rename(
intentional_ICD_flag = intentional_self_harm,
undetermined_ICD_flag = injury_poisoning_undet_intent,
sequelae_ICD_flag = sequelae_self_harm_injury_poisoning)
#Define binary age variable to use as a filter
data$age16 <- ifelse(data$age >= 16, 1,0)
# filter the data
data2 <- data%>%
filter(registered == 1 & age16 ==1 & (intentional_ICD_flag == 1 | undetermined_ICD_flag==1 | sequelae_ICD_flag ==1))
# The following code is picking NA as the largest value. how do we get rid of NAs?
data2$deathdate = pmax(data2$intentional_ICD_date,data2$undetermined_ICD_date,data2$sequelae_ICD_date)
# There seem to be multiple entries for attempted suicide.
# This suggests that suicide attempts can be made multiple times
# Do we look at the latest suicide attempt only, as that is
# presumed to be the one that led to death
Started writing R code for this.