epiverse-trace / epiparameter

R package with library of epidemiological parameters for infectious diseases and functions and classes for working with parameters
https://epiverse-trace.github.io/epiparameter
Other
33 stars 11 forks source link

Converting one of the ebola serial intervals from {epireview} to `<epidist>` errors #303

Closed wzmli closed 5 months ago

wzmli commented 6 months ago

Please include a brief description of the issue and any proposed solutions you may have.

Error message: Error in is_epidist_params(prob_dist, prob_dist_params) : Assertion on 'prob_dist_params' failed: Must have unique names, but element 3 is duplicated. Calls: as_epidist ... is_epidist_params -> -> makeAssertion -> mstop

Code:

ebola_data <- epireview::load_epidata("ebola")

ebola_si <- (ebola_data$params %>% filter(parameter_type == "Human delay - serial interval") %>% filter(distribution_type == "Gamma") )

print(as.data.frame(ebola_si[1,]))

epiobj <- as_epidist(ebola_si[1,])

Here is a zoomed in version.

Some parameter_data_id worked and some doesn't.

ebola_si <- (ebola_data$params %>% filter(parameter_type == "Human delay - serial interval") %>% filter(parameter_data_id == "5c8d68c39d1c3b9870ecaaff0280d02e") ## failed )

epiobj <- as_epidist(ebola_si[1,])

Here is an parameter_data_id that works:

ebola_si <- (ebola_data$params %>% filter(parameter_type == "Human delay - serial interval") %>% filter(parameter_data_id == "0c3e02f80addfccc1017fa619fba76c5") ## work )

epiobj <- as_epidist(ebola_si[1,])

joshwlambert commented 5 months ago

Thanks for reporting this issue! Here is a reproducible example of the issue using the most up-to-date versions of the {epiparameter} and {epireview} packages (there have been some updates to the packages since the issue was first opened).

library(epiparameter)
library(epireview)
#> Loading required package: epitrix
#> Loading required package: ggplot2
#> Loading required package: ggforce
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
ebola_data <- epireview::load_epidata("ebola")
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
#> Warning in load_epidata_raw(pathogen, "outbreak"): No data found for ebola
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
#> Warning in epireview::load_epidata("ebola"): No outbreaks information found for
#> ebola
#> Data loaded for ebola

ebola_si <- (ebola_data$params
             %>% filter(parameter_type == "Human delay - serial interval")
             %>% filter(distribution_type == "Gamma")
)

print(as.data.frame(ebola_si[1,]))
#>                                 id                parameter_data_id
#> 1 86e39ecd9f503068d69402cbf1395cca 5c8d68c39d1c3b9870ecaaff0280d02e
#>   covidence_id    pathogen                parameter_type parameter_value
#> 1        15896 Ebola virus Human delay - serial interval            15.3
#>   exponent parameter_unit parameter_lower_bound parameter_upper_bound
#> 1        0           Days                    NA                    NA
#>   parameter_value_type parameter_uncertainty_single_value
#> 1                 Mean                                 NA
#>   parameter_uncertainty_singe_type parameter_uncertainty_lower_value
#> 1                             <NA>                                NA
#>   parameter_uncertainty_upper_value parameter_uncertainty_type
#> 1                                NA                       <NA>
#>   cfr_ifr_numerator cfr_ifr_denominator distribution_type
#> 1                NA                  NA             Gamma
#>   distribution_par1_value distribution_par1_type distribution_par1_uncertainty
#> 1                    15.3                   Mean                          TRUE
#>   distribution_par2_value distribution_par2_type distribution_par2_uncertainty
#> 1                     2.3                Mean sd                          TRUE
#>   method_from_supplement method_moment_value cfr_ifr_method method_r
#> 1                  FALSE       Post outbreak           <NA>     <NA>
#>   method_disaggregated_by method_disaggregated method_disaggregated_only
#> 1                    <NA>                FALSE                     FALSE
#>   riskfactor_outcome riskfactor_name riskfactor_occupation
#> 1               <NA>            <NA>                  <NA>
#>   riskfactor_significant riskfactor_adjusted population_sex
#> 1                   <NA>                <NA>    Unspecified
#>   population_sample_type population_group population_age_min population_age_max
#> 1            Unspecified      Unspecified                 NA                 NA
#>   population_sample_size population_country population_location
#> 1                     20            Nigeria                <NA>
#>   population_study_start_day population_study_start_month
#> 1                         17                          Jul
#>   population_study_start_year population_study_end_day
#> 1                        2014                       20
#>   population_study_end_month population_study_end_year genome_site
#> 1                        Oct                      2014        <NA>
#>   genomic_sequence_available other_delay_start other_delay_end inverse_param
#> 1                      FALSE              <NA>            <NA>         FALSE
#>   parameter_from_figure parameter_class ebola_variant other_delay
#> 1                 FALSE     Human delay   Unspecified        <NA>
#>       delay_short       delay_start other_rf_outcome attack_rate_type
#> 1 Serial interval Infection process             <NA>             <NA>
#>   survey_start_date survey_end_date               survey_date parameter_bounds
#> 1       17 Jul 2014     20 Oct 2014 17 Jul 2014 - 20 Oct 2014             <NA>
#>   comb_uncertainty_type comb_uncertainty article_qa_score              outbreak
#> 1                  <NA>             <NA>         85.71429 West Africa 2013-2016
#>   ebola_species parameter_type_short first_author_surname year_publication
#> 1         Zaire                 <NA>                 Chan             2020
#>   article_label
#> 1 Chan 2020 (1)

epiobj <- as_epidist(ebola_si[1,])
#> Using Chan (2020). "<title not available>." _<journal not available>_. 
#> To retrieve the citation use the 'get_citation' function
#> Warning: Cannot create full citation for epidemiological parameters without bibliographic information 
#>  see ?as_epidist for help.
#> Error in extract_param(type = "range", values = median_range, distribution = prob_dist, : values vector should be c(median, min, max) check values

Created on 2024-06-14 with reprex v2.1.0

The issue persists, but the error message is now different.

joshwlambert commented 5 months ago

The fix to this issue has been implemented in PR #334.

Here is the same reproducible example as above using the new code that will be available in the main branch of the package once PR #334 is merged.

library(epiparameter)
library(epireview)
#> Loading required package: epitrix
#> Loading required package: ggplot2
#> Loading required package: ggforce
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
ebola_data <- epireview::load_epidata("ebola")
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
#> Warning in load_epidata_raw(pathogen, "outbreak"): No data found for ebola
#> Warning: One or more parsing issues, call `problems()` on your data frame for details,
#> e.g.:
#>   dat <- vroom(...)
#>   problems(dat)
#> Warning in epireview::load_epidata("ebola"): No outbreaks information found for
#> ebola
#> Data loaded for ebola
ebola_si <- (ebola_data$params
             %>% filter(parameter_type == "Human delay - serial interval")
             %>% filter(distribution_type == "Gamma")
)

print(as.data.frame(ebola_si[1,]))
#>                                 id                parameter_data_id
#> 1 86e39ecd9f503068d69402cbf1395cca 5c8d68c39d1c3b9870ecaaff0280d02e
#>   covidence_id    pathogen                parameter_type parameter_value
#> 1        15896 Ebola virus Human delay - serial interval            15.3
#>   exponent parameter_unit parameter_lower_bound parameter_upper_bound
#> 1        0           Days                    NA                    NA
#>   parameter_value_type parameter_uncertainty_single_value
#> 1                 Mean                                 NA
#>   parameter_uncertainty_singe_type parameter_uncertainty_lower_value
#> 1                             <NA>                                NA
#>   parameter_uncertainty_upper_value parameter_uncertainty_type
#> 1                                NA                       <NA>
#>   cfr_ifr_numerator cfr_ifr_denominator distribution_type
#> 1                NA                  NA             Gamma
#>   distribution_par1_value distribution_par1_type distribution_par1_uncertainty
#> 1                    15.3                   Mean                          TRUE
#>   distribution_par2_value distribution_par2_type distribution_par2_uncertainty
#> 1                     2.3                Mean sd                          TRUE
#>   method_from_supplement method_moment_value cfr_ifr_method method_r
#> 1                  FALSE       Post outbreak           <NA>     <NA>
#>   method_disaggregated_by method_disaggregated method_disaggregated_only
#> 1                    <NA>                FALSE                     FALSE
#>   riskfactor_outcome riskfactor_name riskfactor_occupation
#> 1               <NA>            <NA>                  <NA>
#>   riskfactor_significant riskfactor_adjusted population_sex
#> 1                   <NA>                <NA>    Unspecified
#>   population_sample_type population_group population_age_min population_age_max
#> 1            Unspecified      Unspecified                 NA                 NA
#>   population_sample_size population_country population_location
#> 1                     20            Nigeria                <NA>
#>   population_study_start_day population_study_start_month
#> 1                         17                          Jul
#>   population_study_start_year population_study_end_day
#> 1                        2014                       20
#>   population_study_end_month population_study_end_year genome_site
#> 1                        Oct                      2014        <NA>
#>   genomic_sequence_available other_delay_start other_delay_end inverse_param
#> 1                      FALSE              <NA>            <NA>         FALSE
#>   parameter_from_figure parameter_class ebola_variant other_delay
#> 1                 FALSE     Human delay   Unspecified        <NA>
#>       delay_short       delay_start other_rf_outcome attack_rate_type
#> 1 Serial interval Infection process             <NA>             <NA>
#>   survey_start_date survey_end_date               survey_date parameter_bounds
#> 1       17 Jul 2014     20 Oct 2014 17 Jul 2014 - 20 Oct 2014             <NA>
#>   comb_uncertainty_type comb_uncertainty article_qa_score              outbreak
#> 1                  <NA>             <NA>         85.71429 West Africa 2013-2016
#>   ebola_species parameter_type_short first_author_surname year_publication
#> 1         Zaire                 <NA>                 Chan             2020
#>   article_label
#> 1 Chan 2020 (1)
epiobj <- as_epidist(ebola_si[1,])
#> Using Chan (2020). "<title not available>." _<journal not available>_. 
#> To retrieve the citation use the 'get_citation' function
#> Warning: Cannot create full citation for epidemiological parameters without bibliographic information 
#>  see ?as_epidist for help.
epiobj
#> Disease: Ebola Virus Disease
#> Pathogen: Ebola virus
#> Epi Distribution: human delay   serial interval
#> Study: Chan (2020). "<title not available>." _<journal not available>_.
#> Distribution: gamma
#> Parameters:
#>   shape: 44.251
#>   scale: 0.346
unclass(epiobj)
#> $disease
#> [1] "Ebola Virus Disease"
#> 
#> $pathogen
#> [1] "Ebola virus"
#> 
#> $epi_dist
#> [1] "Human delay - serial interval"
#> 
#> $prob_dist
#> <distribution[1]>
#> [1] Γ(44, 2.9)
#> 
#> $uncertainty
#> $uncertainty$shape
#> $uncertainty$shape$ci_limits
#> [1] NA
#> 
#> $uncertainty$shape$ci
#> [1] NA NA
#> 
#> $uncertainty$shape$ci_type
#> [1] NA
#> 
#> 
#> $uncertainty$scale
#> $uncertainty$scale$ci_limits
#> [1] NA
#> 
#> $uncertainty$scale$ci
#> [1] NA NA
#> 
#> $uncertainty$scale$ci_type
#> [1] NA
#> 
#> 
#> 
#> $summary_stats
#> $summary_stats$mean
#> [1] 15.3
#> 
#> $summary_stats$mean_ci_limits
#> [1] NA NA
#> 
#> $summary_stats$mean_ci
#> [1] NA
#> 
#> $summary_stats$sd
#> [1] 2.3
#> 
#> $summary_stats$sd_ci_limits
#> [1] NA NA
#> 
#> $summary_stats$sd_ci
#> [1] NA
#> 
#> $summary_stats$median
#> [1] NA
#> 
#> $summary_stats$median_ci_limits
#> [1] NA NA
#> 
#> $summary_stats$median_ci
#> [1] NA
#> 
#> $summary_stats$quantiles
#> [1] NA
#> 
#> $summary_stats$range
#> [1] NA NA
#> 
#> 
#> $citation
#> Chan (2020). "<title not available>." _<journal not available>_.
#> 
#> $metadata
#> $metadata$sample_size
#> [1] 20
#> 
#> $metadata$region
#> [1] "Nigeria"
#> 
#> $metadata$transmission_mode
#> [1] NA
#> 
#> $metadata$vector
#> [1] NA
#> 
#> $metadata$extrinsic
#> [1] FALSE
#> 
#> $metadata$inference_method
#> [1] NA
#> 
#> 
#> $method_assess
#> $method_assess$censored
#> [1] NA
#> 
#> $method_assess$right_truncated
#> [1] NA
#> 
#> $method_assess$phase_bias_adjusted
#> [1] NA
#> 
#> 
#> $notes
#> [1] "No additional notes"
#> 
#> attr(,".epiparameter_namespace")
#> function () 
#> NULL
#> <bytecode: 0x11d8500e0>
#> <environment: namespace:epiparameter>

Created on 2024-06-14 with reprex v2.1.0

joshwlambert commented 5 months ago

PR #334 is now merged, closing this issue. If you have any other issues when using the {epiparameter} R package please open a new issue and I will address them as soon as possible.