Open kenarab opened 4 years ago
#install.packages("devtools") #devtools::install_github("ROpenStats/COVID19analytics") library(COVID19analytics) #> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'dplyr::intersect' by 'lubridate::intersect' #> when loading 'COVID19analytics' #> Warning: replacing previous import 'dplyr::union' by 'lubridate::union' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'dplyr::setdiff' by 'lubridate::setdiff' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor' #> when loading 'COVID19analytics' #> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'magrittr::is_less_than' by #> 'testthat::is_less_than' when loading 'COVID19analytics' #> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when #> loading 'COVID19analytics' #> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when #> loading 'COVID19analytics' library(dplyr) #> #> Attaching package: 'dplyr' #> The following objects are masked from 'package:stats': #> #> filter, lag #> The following objects are masked from 'package:base': #> #> intersect, setdiff, setequal, union library(ggplot2) # Generate daily plots processor <- COVID19DataProcessor$new(provider.id = "JohnsHopkingsUniversity", missing.values.model.id = "imputation") dummy <- processor$setupData() #> INFO [17:21:29.236] {stage: processor-setup} #> INFO [17:21:29.263] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE} #> INFO [17:21:29.353] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE} #> INFO [17:21:29.374] Checking required downloaded {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE} #> INFO [17:21:29.417] {stage: data loaded} #> INFO [17:21:29.419] {stage: data-setup} dummy <- processor$transform() #> INFO [17:21:29.421] Executing transform #> INFO [17:21:29.421] Executing consolidate #> INFO [17:21:30.758] {stage: consolidated} #> INFO [17:21:30.759] Executing standarize #> INFO [17:21:30.814] gathering DataModel #> INFO [17:21:30.815] {stage: datamodel-setup} dummy <- processor$curate() #> INFO [17:21:30.818] {stage: loading-aggregated-data-model} #> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam #> INFO [17:21:32.584] {stage: calculating-rates} #> INFO [17:21:32.727] {stage: making-data-comparison} #> INFO [17:21:33.832] {stage: applying-missing-values-method} #> INFO [17:21:33.833] {stage: Starting first imputation} #> INFO [17:21:33.837] {stage: calculating-rates} #> INFO [17:21:34.066] {stage: making-data-comparison-2} #> INFO [17:21:35.068] {stage: calculating-top-countries} #> INFO [17:21:35.084] {stage: processed} data.significative <- processor$data.agg %>% filter(confirmed >= 1000) data.country.avg <- data.significative %>% group_by(country) %>% summarize(confirmed = max(confirmed), fatality.rate.min.mean = mean(fatality.rate.min), fatality.rate.min.cv = sd(fatality.rate.min)/fatality.rate.min.mean, fatality.rate.max.mean =mean(fatality.rate.max), fatality.rate.max.cv = sd(fatality.rate.max)/fatality.rate.max.mean) %>% arrange(fatality.rate.min.mean) data.country.avg #> # A tibble: 108 x 6 #> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m… #> <chr> <int> <dbl> <dbl> <dbl> #> 1 Qatar 48947 0.00118 0.646 0.00223 #> 2 Singap… 32876 0.00170 0.750 0.00304 #> 3 Bahrain 9692 0.00261 0.439 0.00403 #> 4 Djibou… 2697 0.00309 0.451 0.00439 #> 5 Maldiv… 1457 0.00338 0.0925 0.00646 #> 6 Uzbeki… 3369 0.00408 0.115 0.00569 #> 7 Oman 8373 0.00480 0.0814 0.00840 #> 8 Iceland 1805 0.00488 0.236 0.00575 #> 9 Guinea… 1195 0.00545 0.110 0.0107 #> 10 Guinea 3275 0.00563 0.104 0.00908 #> # … with 98 more rows, and 1 more variable: fatality.rate.max.cv <dbl> ggplot(data.country.avg) + geom_histogram(aes(x = fatality.rate.min.mean), bins = 60)
least.letality <- data.country.avg %>% arrange(fatality.rate.min.mean) %>% filter(fatality.rate.min.mean <= 0.05 & confirmed >= 30000) %>% arrange(desc(confirmed)) least.letality #> # A tibble: 17 x 6 #> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m… #> <chr> <int> <dbl> <dbl> <dbl> #> 1 US 1699176 0.0452 0.358 0.0828 #> 2 Russia 370680 0.00868 0.164 0.0160 #> 3 Germany 181524 0.0262 0.654 0.0328 #> 4 Turkey 159797 0.0241 0.154 0.0387 #> 5 India 158086 0.0314 0.0748 0.0543 #> 6 Peru 135905 0.0285 0.134 0.0463 #> 7 China 84106 0.0409 0.288 0.0488 #> 8 Chile 82289 0.0103 0.323 0.0163 #> 9 Saudi … 78541 0.00831 0.397 0.0146 #> 10 Pakist… 59151 0.0187 0.238 0.0328 #> 11 Qatar 48947 0.00118 0.646 0.00223 #> 12 Belarus 38956 0.00704 0.264 0.0127 #> 13 Bangla… 38292 0.0220 0.425 0.0415 #> 14 Singap… 32876 0.00170 0.750 0.00304 #> 15 United… 31969 0.00774 0.236 0.0134 #> 16 Portug… 31292 0.0331 0.309 0.0612 #> 17 Switze… 30776 0.0431 0.454 0.0541 #> # … with 1 more variable: fatality.rate.max.cv <dbl> most.letality <- data.country.avg %>% arrange(fatality.rate.min.mean) %>% filter(fatality.rate.min.mean > 0.05 & confirmed >= 30000) %>% arrange(desc(confirmed)) most.letality #> # A tibble: 11 x 6 #> country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m… #> <chr> <int> <dbl> <dbl> <dbl> #> 1 Brazil 411821 0.0561 0.289 0.0903 #> 2 United… 268619 0.124 0.311 0.231 #> 3 Spain 236259 0.0941 0.315 0.137 #> 4 Italy 231139 0.112 0.313 0.175 #> 5 France 183067 0.109 0.465 0.173 #> 6 Iran 141591 0.0590 0.195 0.0806 #> 7 Mexico 78023 0.0842 0.294 0.117 #> 8 Belgium 57592 0.117 0.474 0.190 #> 9 Nether… 45970 0.102 0.325 0.192 #> 10 Ecuador 38103 0.0532 0.376 0.0971 #> 11 Sweden 35088 0.0874 0.491 0.157 #> # … with 1 more variable: fatality.rate.max.cv <dbl> compared.countries <- unique(c(least.letality$country, "Argentina", "Brazil", "Chile", "US", "Japan", "Korea, South", "Germany", "Japan")) compared.countries #> [1] "US" "Russia" "Germany" #> [4] "Turkey" "India" "Peru" #> [7] "China" "Chile" "Saudi Arabia" #> [10] "Pakistan" "Qatar" "Belarus" #> [13] "Bangladesh" "Singapore" "United Arab Emirates" #> [16] "Portugal" "Switzerland" "Argentina" #> [19] "Brazil" "Japan" "Korea, South" rg <- ReportGeneratorEnhanced$new(data.processor = processor) ggplot <- rg$ggplotCountriesLines(included.countries = compared.countries, min.confirmed = 100, field.description = "Death Rates min", field = "fatality.rate.min", countries.text = "Compared Countries", log.scale = FALSE) ggplot
ggplot <- rg$ggplotCrossSection(included.countries = compared.countries, field.x = "confirmed", field.y = "fatality.rate.min", plot.description = "Cross section Confirmed vs Death rate min", log.scale.x = TRUE, log.scale.y = FALSE) ggplot
ggplot <- rg$ggplotCrossSection(included.countries = most.letality$country, field.x = "confirmed", field.y = "fatality.rate.min", plot.description = "Cross section Confirmed vs Death rate min", log.scale.x = TRUE, log.scale.y = FALSE) ggplot
Created on 2020-05-28 by the reprex package (v0.3.0)
Created on 2020-05-28 by the reprex package (v0.3.0)