dashaasienga / Statistics-Senior-Honors-Thesis

0 stars 0 forks source link

SQL Scripts for COMPAS data #28

Closed dashaasienga closed 3 months ago

dashaasienga commented 4 months ago

@katcorr

Shell script that returns 19075 rows:

select * from compas
inner join people
on compas.person_id = people.id
and compas.first = people.first
and compas.last = people.last
and compas.decile_score = people.decile_score

Note that some participants have 3 duplicate rows for the 3 types of assessments: risk of recidivism, risk of violence, and risk of failure to appear.

Data set saved as compas_data1 and we can revise this based on what columns we need, etc.

dashaasienga commented 4 months ago

Updated with documentation here: https://github.com/dashaasienga/Statistics-Senior-Honors-Thesis/blob/main/Data%20Sets/COMPAS/SQL%20Script%20to%20Pull%20COMPAS%20Data.pdf

Data saved as compas_data.

Code:

select 

people.id,
compas.compas_person_id,
people.name,
people.first,
people.last,
people.sex,
people.race, 
people.age,
people.age_cat,
compas.marital_status,
compas.custody_status,
people.juv_fel_count,
people.juv_misd_count,
people.juv_other_count,
people.priors_count,
people.days_b_screening_arrest,
people.c_days_from_compas,
people.c_charge_degree,
people.c_charge_desc,
compas.type_of_assessment,
compas.raw_score,
people.decile_score,
compas.score_text,
people.is_violent_recid,
people.num_vr_cases,
people.is_recid,
people.num_r_cases,

sum(
  round(
     julianday(jailhistory.out_custody) - julianday(jailhistory.in_custody)
  )
) as days_in_jail,
sum(
   coalesce(
      julianday(prisonhistory.out_custody) - julianday(prisonhistory.in_custody),
   0)
) as days_in_prison

from compas

inner join people
on compas.person_id = people.id
and compas.first = people.first
and compas.last = people.last
and compas.decile_score = people.decile_score

left join jailhistory
on jailhistory.person_id = people.id
and jailhistory.first = people.first
and jailhistory.last = people.last

left join prisonhistory
on prisonhistory.person_id = people.id
and prisonhistory.first = people.first
and prisonhistory.last = people.last

where type_of_assessment = 'Risk of Recidivism'

group by 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27
dashaasienga commented 4 months ago

Updated to fix a discrepancy in how I was calculating days_in_jail and days_in_prison.

Code:

select 

people.id,
compas.compas_person_id,
people.name,
people.first,
people.last,
people.sex,
people.race, 
people.age,
people.age_cat,
compas.marital_status,
compas.custody_status,
people.juv_fel_count,
people.juv_misd_count,
people.juv_other_count,
people.priors_count,
people.days_b_screening_arrest,
people.c_days_from_compas,
people.c_charge_degree,
people.c_charge_desc,
compas.type_of_assessment,
compas.raw_score,
people.decile_score,
compas.score_text,
people.is_violent_recid,
people.num_vr_cases,
people.is_recid,
people.num_r_cases,
round(jail_agg.days_in_jail) as days_in_jail,
round(prison_agg.days_in_prison) as days_in_prison

from compas

inner join people
on compas.person_id = people.id
and compas.first = people.first
and compas.last = people.last
and compas.decile_score = people.decile_score

left join  (

select 

jailhistory.person_id,
jailhistory.first,
jailhistory.last,
jailhistory.out_custody,
jailhistory.in_custody,
sum(distinct(julianday(jailhistory.out_custody) - julianday(jailhistory.in_custody))) as days_in_jail

from jailhistory

inner join compas 
on compas.person_id = jailhistory.person_id
and compas.first = jailhistory.first
and compas.last = jailhistory.last
and jailhistory.in_custody <= compas.screening_date

group by 1,2,3

) as jail_agg 
on jail_agg.person_id = people.id
and jail_agg.first = people.first
and jail_agg.last = people.last

left join  (

select 

prisonhistory.person_id,
prisonhistory.first,
prisonhistory.last,
prisonhistory.out_custody,
prisonhistory.in_custody,
sum(distinct(julianday(prisonhistory.out_custody) - julianday(prisonhistory.in_custody))) as days_in_prison

from prisonhistory

inner join compas 
on compas.person_id = prisonhistory.person_id
and compas.first = prisonhistory.first
and compas.last = prisonhistory.last
and prisonhistory.in_custody <= compas.screening_date

group by 1,2,3

) as prison_agg 
on prison_agg.person_id = people.id
and prison_agg.first = people.first
and prison_agg.last = people.last

where type_of_assessment = 'Risk of Recidivism'