rOpenStats / COVID19analyticsBak2109

31 stars 9 forks source link

Sweden R0 and fatality rate analytics #11

Open kenarab opened 4 years ago

kenarab commented 4 years ago
#install.packages("devtools")
#devtools::install_github("ROpenStats/COVID19analytics")

library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::intersect' by 'lubridate::intersect'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::union' by 'lubridate::union' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::setdiff' by 'lubridate::setdiff' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(ggplot2)
# Generate daily plots
processor <- COVID19DataProcessor$new(provider.id = "JohnsHopkingsUniversity", missing.values.model.id = "imputation")
dummy <- processor$setupData()
#> INFO  [17:21:29.236]  {stage: processor-setup}
#> INFO  [17:21:29.263] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.353] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.374] Checking required downloaded  {downloaded.max.date: 2020-05-27, daily.update.time: 21:00:00, current.datetime: 2020-05-28 1.., download.flag: FALSE}
#> INFO  [17:21:29.417]  {stage: data loaded}
#> INFO  [17:21:29.419]  {stage: data-setup}
dummy <- processor$transform()
#> INFO  [17:21:29.421] Executing transform 
#> INFO  [17:21:29.421] Executing consolidate 
#> INFO  [17:21:30.758]  {stage: consolidated}
#> INFO  [17:21:30.759] Executing standarize 
#> INFO  [17:21:30.814] gathering DataModel 
#> INFO  [17:21:30.815]  {stage: datamodel-setup}
dummy <- processor$curate()
#> INFO  [17:21:30.818]  {stage: loading-aggregated-data-model}
#> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam
#> INFO  [17:21:32.584]  {stage: calculating-rates}
#> INFO  [17:21:32.727]  {stage: making-data-comparison}
#> INFO  [17:21:33.832]  {stage: applying-missing-values-method}
#> INFO  [17:21:33.833]  {stage: Starting first imputation}
#> INFO  [17:21:33.837]  {stage: calculating-rates}
#> INFO  [17:21:34.066]  {stage: making-data-comparison-2}
#> INFO  [17:21:35.068]  {stage: calculating-top-countries}
#> INFO  [17:21:35.084]  {stage: processed}

data.significative <- processor$data.agg %>% filter(confirmed >= 1000)
data.country.avg <- data.significative %>%
 group_by(country) %>%
 summarize(confirmed = max(confirmed),
           fatality.rate.min.mean = mean(fatality.rate.min),
           fatality.rate.min.cv   = sd(fatality.rate.min)/fatality.rate.min.mean,
           fatality.rate.max.mean =mean(fatality.rate.max),
           fatality.rate.max.cv   = sd(fatality.rate.max)/fatality.rate.max.mean) %>%
 arrange(fatality.rate.min.mean)
data.country.avg
#> # A tibble: 108 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 Qatar       48947          0.00118           0.646           0.00223
#>  2 Singap…     32876          0.00170           0.750           0.00304
#>  3 Bahrain      9692          0.00261           0.439           0.00403
#>  4 Djibou…      2697          0.00309           0.451           0.00439
#>  5 Maldiv…      1457          0.00338           0.0925          0.00646
#>  6 Uzbeki…      3369          0.00408           0.115           0.00569
#>  7 Oman         8373          0.00480           0.0814          0.00840
#>  8 Iceland      1805          0.00488           0.236           0.00575
#>  9 Guinea…      1195          0.00545           0.110           0.0107 
#> 10 Guinea       3275          0.00563           0.104           0.00908
#> # … with 98 more rows, and 1 more variable: fatality.rate.max.cv <dbl>

ggplot(data.country.avg) + geom_histogram(aes(x = fatality.rate.min.mean), bins = 60)


least.letality <- data.country.avg %>%
                    arrange(fatality.rate.min.mean) %>%
                    filter(fatality.rate.min.mean <= 0.05 & confirmed >= 30000) %>%
                    arrange(desc(confirmed))
least.letality
#> # A tibble: 17 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 US        1699176          0.0452            0.358           0.0828 
#>  2 Russia     370680          0.00868           0.164           0.0160 
#>  3 Germany    181524          0.0262            0.654           0.0328 
#>  4 Turkey     159797          0.0241            0.154           0.0387 
#>  5 India      158086          0.0314            0.0748          0.0543 
#>  6 Peru       135905          0.0285            0.134           0.0463 
#>  7 China       84106          0.0409            0.288           0.0488 
#>  8 Chile       82289          0.0103            0.323           0.0163 
#>  9 Saudi …     78541          0.00831           0.397           0.0146 
#> 10 Pakist…     59151          0.0187            0.238           0.0328 
#> 11 Qatar       48947          0.00118           0.646           0.00223
#> 12 Belarus     38956          0.00704           0.264           0.0127 
#> 13 Bangla…     38292          0.0220            0.425           0.0415 
#> 14 Singap…     32876          0.00170           0.750           0.00304
#> 15 United…     31969          0.00774           0.236           0.0134 
#> 16 Portug…     31292          0.0331            0.309           0.0612 
#> 17 Switze…     30776          0.0431            0.454           0.0541 
#> # … with 1 more variable: fatality.rate.max.cv <dbl>

most.letality <- data.country.avg %>%
  arrange(fatality.rate.min.mean) %>%
  filter(fatality.rate.min.mean > 0.05 & confirmed >= 30000) %>%
  arrange(desc(confirmed))
most.letality
#> # A tibble: 11 x 6
#>    country confirmed fatality.rate.m… fatality.rate.m… fatality.rate.m…
#>    <chr>       <int>            <dbl>            <dbl>            <dbl>
#>  1 Brazil     411821           0.0561            0.289           0.0903
#>  2 United…    268619           0.124             0.311           0.231 
#>  3 Spain      236259           0.0941            0.315           0.137 
#>  4 Italy      231139           0.112             0.313           0.175 
#>  5 France     183067           0.109             0.465           0.173 
#>  6 Iran       141591           0.0590            0.195           0.0806
#>  7 Mexico      78023           0.0842            0.294           0.117 
#>  8 Belgium     57592           0.117             0.474           0.190 
#>  9 Nether…     45970           0.102             0.325           0.192 
#> 10 Ecuador     38103           0.0532            0.376           0.0971
#> 11 Sweden      35088           0.0874            0.491           0.157 
#> # … with 1 more variable: fatality.rate.max.cv <dbl>

compared.countries <- unique(c(least.letality$country,
                               "Argentina", "Brazil", "Chile", "US", "Japan", "Korea, South", "Germany", "Japan"))
compared.countries
#>  [1] "US"                   "Russia"               "Germany"             
#>  [4] "Turkey"               "India"                "Peru"                
#>  [7] "China"                "Chile"                "Saudi Arabia"        
#> [10] "Pakistan"             "Qatar"                "Belarus"             
#> [13] "Bangladesh"           "Singapore"            "United Arab Emirates"
#> [16] "Portugal"             "Switzerland"          "Argentina"           
#> [19] "Brazil"               "Japan"                "Korea, South"

rg <- ReportGeneratorEnhanced$new(data.processor = processor)

ggplot <- rg$ggplotCountriesLines(included.countries = compared.countries,
                                  min.confirmed = 100,
                                  field.description  = "Death Rates min",
                                  field = "fatality.rate.min", countries.text = "Compared Countries",
                                  log.scale = FALSE)
ggplot


ggplot <- rg$ggplotCrossSection(included.countries = compared.countries,
                                field.x = "confirmed",
                                field.y = "fatality.rate.min",
                                plot.description  = "Cross section Confirmed vs  Death rate min",
                                log.scale.x = TRUE,
                                log.scale.y = FALSE)
ggplot


ggplot <- rg$ggplotCrossSection(included.countries = most.letality$country,
                                field.x = "confirmed",
                                field.y = "fatality.rate.min",
                                plot.description  = "Cross section Confirmed vs  Death rate min",
                                log.scale.x = TRUE,
                                log.scale.y = FALSE)
ggplot

Created on 2020-05-28 by the reprex package (v0.3.0)