Writing R code - Githubissues

Started writing R code for this.
library(tidyverse)

file_source = <enter location of downloaded CSV>

data<-read.csv(file = file_source)

#### Columns/variables
# "registered"
# "age"
# "sex"
# "ethnicity"
# "patient_id"

data$injury_poisoning_undet_intent_date <- as.Date(data$injury_poisoning_undet_intent_date, format = "%Y-%m-%d")
data$intentional_self_harm_date <- as.Date(data$intentional_self_harm_date, format = "%Y-%m-%d")
data$sequelae_self_harm_injury_poisoning_date <- as.Date(data$sequelae_self_harm_injury_poisoning_date, format = "%Y-%m-%d")

data<-data %>% 
  rename(
    intentional_ICD_date = intentional_self_harm_date,
    undetermined_ICD_date = injury_poisoning_undet_intent_date,
    sequelae_ICD_date = sequelae_self_harm_injury_poisoning_date)

data<-data %>% 
  rename(
    intentional_ICD_flag = intentional_self_harm,
    undetermined_ICD_flag = injury_poisoning_undet_intent,
    sequelae_ICD_flag = sequelae_self_harm_injury_poisoning)

#Define binary age variable to use as a filter
data$age16 <- ifelse(data$age >= 16, 1,0)

# filter the data
data2 <- data%>%
  filter(registered == 1 & age16 ==1 & (intentional_ICD_flag == 1 | undetermined_ICD_flag==1 | sequelae_ICD_flag ==1))

# The following code is picking NA as the largest value. how do we get rid of NAs?
data2$deathdate = pmax(data2$intentional_ICD_date,data2$undetermined_ICD_date,data2$sequelae_ICD_date)

# There seem to be multiple entries for attempted suicide. 
# This suggests that suicide attempts can be made multiple times
# Do we look at the latest suicide attempt only, as that is 
# presumed to be the one that led to death
opensafely / suicide-deaths-pcc

Writing R code #8