rOpenStats / COVID19analyticsBak2109

31 stars 9 forks source link

Top Countries daily incremental deaths #12

Open kenarab opened 4 years ago

kenarab commented 4 years ago
library(COVID19analytics)
#> Warning: replacing previous import 'ggplot2::Layout' by 'lgr::Layout' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'readr::col_factor' by 'scales::col_factor'
#> when loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::equals' by 'testthat::equals' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::not' by 'testthat::not' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::is_less_than' by
#> 'testthat::is_less_than' when loading 'COVID19analytics'
#> Warning: replacing previous import 'dplyr::matches' by 'testthat::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'testthat::matches' by 'tidyr::matches' when
#> loading 'COVID19analytics'
#> Warning: replacing previous import 'magrittr::extract' by 'tidyr::extract' when
#> loading 'COVID19analytics'
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
data.processor <- COVID19DataProcessor$new(provider = "JohnsHopkingsUniversity", missing.values = "imputation")

#dummy <- data.processor$preprocess() is setupData + transform is the preprocess made by data provider
dummy <- data.processor$setupData()
#> INFO  [18:42:12.269]  {stage: processor-setup}
#> INFO  [18:42:12.307] Checking required downloaded  {downloaded.max.date: 2020-05-30, daily.update.time: 21:00:00, current.datetime: 2020-05-31 1.., download.flag: FALSE}
#> INFO  [18:42:12.392] Checking required downloaded  {downloaded.max.date: 2020-05-30, daily.update.time: 21:00:00, current.datetime: 2020-05-31 1.., download.flag: FALSE}
#> INFO  [18:42:12.412] Checking required downloaded  {downloaded.max.date: 2020-05-30, daily.update.time: 21:00:00, current.datetime: 2020-05-31 1.., download.flag: FALSE}
#> INFO  [18:42:12.530]  {stage: data loaded}
#> INFO  [18:42:12.531]  {stage: data-setup}
dummy <- data.processor$transform()
#> INFO  [18:42:12.533] Executing transform 
#> INFO  [18:42:12.534] Executing consolidate 
#> INFO  [18:42:14.083]  {stage: consolidated}
#> INFO  [18:42:14.084] Executing standarize 
#> INFO  [18:42:14.545] gathering DataModel 
#> INFO  [18:42:14.546]  {stage: datamodel-setup}
# Curate is the process made by missing values method
dummy <- data.processor$curate()
#> INFO  [18:42:14.549]  {stage: loading-aggregated-data-model}
#> Warning in countrycode(x, origin = "country.name", destination = "continent"): Some values were not matched unambiguously: MS Zaandam
#> INFO  [18:42:16.315]  {stage: calculating-rates}
#> INFO  [18:42:16.507]  {stage: making-data-comparison}
#> INFO  [18:42:20.967]  {stage: applying-missing-values-method}
#> INFO  [18:42:20.968]  {stage: Starting first imputation}
#> INFO  [18:42:20.975]  {stage: calculating-rates}
#> INFO  [18:42:21.187]  {stage: making-data-comparison-2}
#> INFO  [18:42:25.729]  {stage: calculating-top-countries}
#> INFO  [18:42:25.754]  {stage: processed}

current.date <- max(data.processor$getData()$date)

rg <- ReportGeneratorEnhanced$new(data.processor)

top.countries <- data.processor$top.countries

top.countries
#>  [1] "US"             "Brazil"         "Russia"         "United Kingdom"
#>  [5] "Spain"          "Italy"          "France"         "Germany"       
#>  [9] "India"          "Turkey"         "Peru"           "Others"
# Top 10 daily deaths increment
(data.processor$getData() %>%
  filter(date == current.date) %>%
  select(country, date, rate.inc.daily, confirmed.inc, confirmed, deaths, deaths.inc) %>%
  arrange(desc(deaths.inc)))[1:10,]
#> # A tibble: 10 x 7
#> # Groups:   country [10]
#>    country   date       rate.inc.daily confirmed.inc confirmed deaths deaths.inc
#>    <chr>     <date>              <dbl>         <int>     <int>  <int>      <int>
#>  1 US        2020-05-30          0.014         24146   1770165 103776        967
#>  2 Brazil    2020-05-30          0.072         33274    498440  28834        956
#>  3 Mexico    2020-05-30          0.034          2885     87512   9779        364
#>  4 Peru      2020-05-30          0.098         13892    155671   4371        272
#>  5 United K… 2020-05-30          0.006          1612    274219  38458        215
#>  6 India     2020-05-30          0.048          8336    181827   5185        205
#>  7 Russia    2020-05-30          0.023          8952    396575   4555        181
#>  8 Italy     2020-05-30          0.002           416    232664  33340        111
#>  9 Pakistan  2020-05-30          0.038          2429     66457   1395         78
#> 10 France    2020-05-30          0.01           1829    188752  28774         57

rg$ggplotCountriesLines(field = "deaths.inc",
                        field.description = "new Deaths", log.scale = FALSE)

Created on 2020-05-31 by the reprex package (v0.3.0)