HI guys! My EDA graphics are too large. When I download the files as PNG, they are not that big...
Code for creating overlapping histograms for the different categories
var_dict = {"num_medications": "Number of Medications",
"num_lab_procedures": "Number of Lab Procedures",
"num_procedures": "Number of Procedures other than lab",
"diag_1" : "Primary Diagnosis",
}
for key, value in var_dict.items():
chart = alt.Chart(diabetes_clean).mark_area(
opacity=0.3,
interpolate='step'
).encode(
alt.X(key, type = "quantitative", bin=alt.Bin(maxbins=10), title = value),
alt.Y('count()', stack=None),
alt.Color('readmitted:N', title = "Readmitted Status")
).properties(width = 100, height = 100)
chart.display()
Code for investigation on the race column
HI guys! My EDA graphics are too large. When I download the files as PNG, they are not that big...
Code for creating overlapping histograms for the different categories
diabetes_clean = diabetes_csv.drop(columns = ["encounter_id", "patient_nbr","weight", "payer_code", "medical_specialty", "examide", "citoglipton"])
race_plot_a = alt.Chart(diabetes_clean).mark_bar().encode( x = alt.X("race", sort = "y", title = None), y = alt.Y("count(readmitted)"), )
sort_list = ["Asian", "Hispanic", "Other", "NaN", "AfricanAmerican", "Caucasian"]
race_plot_b = alt.Chart(diabetes_clean).mark_rect().encode( y = alt.Y("readmitted", title = "Readmitted Status"), x = alt.X("race:N", title = None, sort = sort_list), color = "count()", ).properties(width = 100, height = 300)
(race_plot_a | race_plot_b).properties(title = "Distribution of race and comparison across the readmitted status")