nflverse / nflfastR

A Set of Functions to Efficiently Scrape NFL Play by Play Data
https://www.nflfastr.com/
Other
414 stars 50 forks source link

`calculate_series_conversion_rates(weekly = FALSE)` incorrectly summarizes percentages to season level #439

Closed mrcaseb closed 10 months ago

mrcaseb commented 11 months ago

https://github.com/nflverse/nflfastR/blob/c1ca9f234f459fa79330af0ba32bcf43242fb210/R/calculate_series_conversion_rates.R#L170

We summarize the week level data in the above line to the season level. This calculates a mean over means without weights and therefore falsely summarizes the percentages.

mrcaseb commented 11 months ago

Replace https://github.com/nflverse/nflfastR/blob/c1ca9f234f459fa79330af0ba32bcf43242fb210/R/calculate_series_conversion_rates.R#L168-L173

with


dplyr::summarise(
        # OFFENSE
        off_scr =     weighted.mean(.data$off_scr, .data$off_n, na.rm = TRUE),
        off_scr_1st = weighted.mean(.data$off_scr_1st, .data$off_n, na.rm = TRUE),
        off_scr_2nd = weighted.mean(.data$off_scr_2nd, .data$off_n, na.rm = TRUE),
        off_scr_3rd = weighted.mean(.data$off_scr_3rd, .data$off_n, na.rm = TRUE),
        off_scr_4th = weighted.mean(.data$off_scr_4th, .data$off_n, na.rm = TRUE),
        off_1st =     weighted.mean(.data$off_1st, .data$off_n, na.rm = TRUE),
        off_td =      weighted.mean(.data$off_td, .data$off_n, na.rm = TRUE),
        off_fg =      weighted.mean(.data$off_fg, .data$off_n, na.rm = TRUE),
        off_punt =    weighted.mean(.data$off_punt, .data$off_n, na.rm = TRUE),
        off_to =      weighted.mean(.data$off_to, .data$off_n, na.rm = TRUE),
        off_n =       sum(.data$off_n, na.rm = TRUE),
        # DEFENSE
        def_scr =     weighted.mean(.data$def_scr, .data$def_n, na.rm = TRUE),
        def_scr_1st = weighted.mean(.data$def_scr_1st, .data$def_n, na.rm = TRUE),
        def_scr_2nd = weighted.mean(.data$def_scr_2nd, .data$def_n, na.rm = TRUE),
        def_scr_3rd = weighted.mean(.data$def_scr_3rd, .data$def_n, na.rm = TRUE),
        def_scr_4th = weighted.mean(.data$def_scr_4th, .data$def_n, na.rm = TRUE),
        def_1st =     weighted.mean(.data$def_1st, .data$def_n, na.rm = TRUE),
        def_td =      weighted.mean(.data$def_td, .data$def_n, na.rm = TRUE),
        def_fg =      weighted.mean(.data$def_fg, .data$def_n, na.rm = TRUE),
        def_punt =    weighted.mean(.data$def_punt, .data$def_n, na.rm = TRUE),
        def_to =      weighted.mean(.data$def_to, .data$def_n, na.rm = TRUE),
        def_n =       sum(.data$def_n, na.rm = TRUE),
        .groups = "drop"
      ) %>%
      dplyr::relocate("off_n", .after = "team") %>%
      dplyr::relocate("def_n", .after = "off_to")