library(rio)
library(skimr) # get overview of data
library(gtsummary) # summary statistics and tests
library(rstatix) # summary statistics and statistical tests
library(janitor) # adding totals and percents to tables
library(scales) # easily convert proportions to percents
library(flextable) # table layout and formatting
This old thread has been automatically locked. If you think you have found something related to this, please open a new issue and link to this old issue if necessary.
library(rio) library(skimr) # get overview of data library(gtsummary) # summary statistics and tests library(rstatix) # summary statistics and statistical tests library(janitor) # adding totals and percents to tables library(scales) # easily convert proportions to percents library(flextable) # table layout and formatting
import the linelist
linelist <- import("./data4R/linelist_cleaned.rds")
get information about each variable in a dataset
skim(linelist)
linelist %>% tabyl(age_cat, gender) %>% adorn_totals(where = "col") %>% adorn_percentages(denominator = "col") %>% adorn_pct_formatting() %>% adorn_ns(position = "front") %>% adorn_title( row_name = "Age Category", col_name = "Gender", placement = "combined") %>% # this is necessary to print as image flextable::flextable() %>% # convert to pretty image flextable::autofit() # format to one line per row
Output for the above code snippet:
── Variable type: character ───────────────────────────────────────────────────────────────────────────────────── skim_variable n_missing complete_rate min max empty n_unique whitespace 1 case_id 0 1 6 6 0 5888 0 2 outcome 1323 0.775 5 7 0 2 0 3 gender 278 0.953 1 1 0 2 0 4 age_unit 0 1 5 6 0 2 0 5 hospital 0 1 5 36 0 6 0 6 infector 2088 0.645 6 6 0 2697 0 7 source 2088 0.645 5 7 0 2 0 8 fever 249 0.958 2 3 0 2 0 9 chills 249 0.958 2 3 0 2 0 10 cough 249 0.958 2 3 0 2 0 11 aches 249 0.958 2 3 0 2 0 12 vomit 249 0.958 2 3 0 2 0 13 time_admission 765 0.870 5 5 0 1072 0
── Variable type: Date ────────────────────────────────────────────────────────────────────────────────────────── skim_variable n_missing complete_rate min max median n_unique 1 date_infection 2087 0.646 2014-03-19 2015-04-27 2014-10-11 359 2 date_onset 256 0.957 2014-04-07 2015-04-30 2014-10-23 367 3 date_hospitalisation 0 1 2014-04-17 2015-04-30 2014-10-23 363 4 date_outcome 936 0.841 2014-04-19 2015-06-04 2014-11-01 371
── Variable type: factor ──────────────────────────────────────────────────────────────────────────────────────── skim_variable n_missing complete_rate ordered n_unique top_counts
1 age_cat 86 0.985 FALSE 8 0-4: 1095, 5-9: 1095, 20-: 1073, 10-: 941 2 age_cat5 86 0.985 FALSE 17 0-4: 1095, 5-9: 1095, 10-: 941, 15-: 743
── Variable type: numeric ─────────────────────────────────────────────────────────────────────────────────────── skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist 1 generation 0 1 16.6 5.79 0 13 16 20 37 ▁▆▇▂▁ 2 age 86 0.985 16.1 12.6 0 6 13 23 84 ▇▅▁▁▁ 3 age_years 86 0.985 16.0 12.6 0 6 13 23 84 ▇▅▁▁▁ 4 lon 0 1 -13.2 0.0194 -13.3 -13.3 -13.2 -13.2 -13.2 ▅▃▃▆▇ 5 lat 0 1 8.47 0.0113 8.45 8.46 8.47 8.48 8.49 ▅▇▇▇▆ 6 wt_kg 0 1 52.6 18.6 -11 41 54 66 111 ▁▃▇▅▁ 7 ht_cm 0 1 125. 49.5 4 91 129 159 295 ▂▅▇▂▁ 8 ct_blood 0 1 21.2 1.69 16 20 22 22 26 ▁▃▇▃▁ 9 temp 149 0.975 38.6 0.977 35.2 38.2 38.8 39.2 40.8 ▁▂▂▇▁ 10 bmi 0 1 46.9 55.4 -1200 24.6 32.1 50.0 1250 ▁▁▇▁▁ 11 days_onset_hosp 256 0.957 2.06 2.26 0 1 1 3 22 ▇▁▁▁▁
SessionInfo() output:
Matrix products: default
locale: [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8
[3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C
[5] LC_TIME=English_United States.utf8
time zone: America/Denver tzcode source: internal
attached base packages: [1] stats graphics grDevices utils datasets methods base
other attached packages: [1] flextable_0.9.6 scales_1.3.0 janitor_2.2.0 rstatix_0.7.2 gtsummary_1.7.2 skimr_2.1.5
[7] rio_1.0.1 librarian_1.8.1
loaded via a namespace (and not attached): [1] xfun_0.44 vctrs_0.6.5 tools_4.4.0 generics_0.1.3
[5] curl_5.2.1 tibble_3.2.1 fansi_1.0.6 pkgconfig_2.0.3
[9] R.oo_1.26.0 data.table_1.15.4 uuid_1.2-0 gt_0.10.1
[13] lifecycle_1.0.4 compiler_4.4.0 stringr_1.5.1 textshaping_0.3.7
[17] munsell_0.5.1 repr_1.1.7 carData_3.0-5 snakecase_0.11.1
[21] httpuv_1.6.15 fontquiver_0.2.1 fontLiberation_0.1.0 htmltools_0.5.8.1
[25] pillar_1.9.0 later_1.3.2 car_3.1-2 crayon_1.5.2
[29] tidyr_1.3.1 gfonts_0.2.0 R.utils_2.12.3 broom.helpers_1.15.0
[33] openssl_2.2.0 abind_1.4-5 mime_0.12 fontBitstreamVera_0.1.1 [37] zip_2.3.1.9000 tidyselect_1.2.1 digest_0.6.35 stringi_1.8.4
[41] dplyr_1.1.4 purrr_1.0.2 grid_4.4.0 fastmap_1.2.0
[45] colorspace_2.1-0 cli_3.6.2 magrittr_2.0.3 base64enc_0.1-3
[49] crul_1.4.2 utf8_1.2.4 broom_1.0.6 withr_3.0.0
[53] gdtools_0.3.7 promises_1.3.0 backports_1.4.1 lubridate_1.9.3
[57] timechange_0.3.0 rmarkdown_2.27 officer_0.6.6 ragg_1.3.2
[61] askpass_1.2.0 R.methodsS3_1.8.2 evaluate_0.23 shiny_1.8.1.1
[65] knitr_1.46 rlang_1.1.3 Rcpp_1.0.12 xtable_1.8-4
[69] glue_1.7.0 httpcode_0.3.0 xml2_1.3.6 rstudioapi_0.16.0
[73] jsonlite_1.8.8 R6_2.5.1 systemfonts_1.1.0