andrewallenbruce / provider

Public Healthcare Provider APIs :stethoscope:
https://andrewallenbruce.github.io/provider/
Other
18 stars 2 forks source link

Fix `open_payments` pivot output #55

Closed andrewallenbruce closed 7 months ago

andrewallenbruce commented 7 months ago

Problem: Find the smallest group of variables to indicate a change, row-to-row:

library(provider)
library(furrr)
library(dplyr)
plan(multisession, workers = 4)
open <- open_payments_(npi = 1043218118, pivot = FALSE)
plan(sequential)

open |> 
  select(program_year, 
         payer_name, 
         pay_total, 
         pay_date, 
         pay_count,
         pay_form, 
         pay_nature,
         context,
         name_1:category_5)
#> # A tibble: 423 × 29
#>    program_year payer_name    pay_total pay_date   pay_count pay_form pay_nature
#>           <int> <chr>             <dbl> <date>     <chr>     <chr>    <chr>     
#>  1         2016 Shire North …     18.8  2016-06-06 1         In-kind… Food and …
#>  2         2016 Allergan Inc.    126.   2016-06-09 1         In-kind… Food and …
#>  3         2016 Allergan Inc.    127.   2016-10-24 1         In-kind… Food and …
#>  4         2016 Sun Pharmace…     16.1  2016-08-31 1         Cash or… Food and …
#>  5         2016 CLARITY MEDI…     14.7  2016-01-13 1         Cash or… Food and …
#>  6         2016 Ellex, Inc      2000    2016-08-16 1         Cash or… Honoraria 
#>  7         2016 Abbott Labor…      8.57 2016-03-11 1         In-kind… Food and …
#>  8         2016 Abbott Labor…     17.5  2016-09-26 1         In-kind… Food and …
#>  9         2016 Abbott Labor…     95.4  2016-05-07 1         In-kind… Food and …
#> 10         2016 Glaukos Corp…     20.2  2016-08-12 1         In-kind… Food and …
#> # ℹ 413 more rows
#> # ℹ 22 more variables: context <chr>, name_1 <chr>, covered_1 <chr>,
#> #   type_1 <chr>, category_1 <chr>, ndc_1 <chr>, name_2 <chr>, covered_2 <chr>,
#> #   type_2 <chr>, category_2 <chr>, name_3 <chr>, covered_3 <chr>,
#> #   type_3 <chr>, category_3 <chr>, name_4 <chr>, covered_4 <chr>,
#> #   type_4 <chr>, category_4 <chr>, name_5 <chr>, covered_5 <chr>,
#> #   type_5 <chr>, category_5 <chr>

open[1, ] |> glimpse()
#> Rows: 1
#> Columns: 62
#> $ program_year          <int> 2016
#> $ npi                   <chr> "1043218118"
#> $ covered_recipient     <fct> Physician
#> $ first                 <chr> "AHAD"
#> $ last                  <chr> "MAHOOTCHI"
#> $ address               <chr> "6739 GALL BLVD"
#> $ city                  <chr> "ZEPHYRHILLS"
#> $ state                 <ord> FL
#> $ zip                   <chr> "33542-2522"
#> $ country               <chr> "United States"
#> $ primary               <chr> "Medical Doctor"
#> $ specialty             <chr> "Allopathic & Osteopathic Physicians|Ophthalmolo…
#> $ license_state         <ord> FL
#> $ payer_id              <chr> "100000061292"
#> $ payer_sub             <chr> "Shire North American Group Inc"
#> $ payer_name            <chr> "Shire North American Group Inc"
#> $ payer_state           <ord> MA
#> $ payer_country         <chr> "United States"
#> $ pay_total             <dbl> 18.82
#> $ pay_date              <date> 2016-06-06
#> $ pay_count             <chr> "1"
#> $ pay_form              <chr> "In-kind items and services"
#> $ pay_nature            <chr> "Food and Beverage"
#> $ physician_ownership   <lgl> FALSE
#> $ third_party_payment   <chr> "No Third Party Payment"
#> $ charity               <lgl> NA
#> $ context               <chr> NA
#> $ publish_date          <date> 2023-06-30
#> $ publish_delay         <lgl> FALSE
#> $ publish_dispute       <lgl> FALSE
#> $ related_product       <lgl> FALSE
#> $ name_1                <chr> NA
#> $ covered_1             <chr> NA
#> $ type_1                <chr> NA
#> $ category_1            <chr> NA
#> $ ndc_1                 <chr> NA
#> $ name_2                <chr> NA
#> $ covered_2             <chr> NA
#> $ type_2                <chr> NA
#> $ category_2            <chr> NA
#> $ name_3                <chr> NA
#> $ covered_3             <chr> NA
#> $ type_3                <chr> NA
#> $ category_3            <chr> NA
#> $ name_4                <chr> NA
#> $ covered_4             <chr> NA
#> $ type_4                <chr> NA
#> $ category_4            <chr> NA
#> $ name_5                <chr> NA
#> $ covered_5             <chr> NA
#> $ type_5                <chr> NA
#> $ category_5            <chr> NA
#> $ travel_city           <chr> NA
#> $ travel_state          <ord> NA
#> $ travel_country        <chr> NA
#> $ third_party_name      <chr> NA
#> $ third_party_recipient <chr> NA
#> $ ndc_2                 <chr> NA
#> $ pdi_1                 <chr> NA
#> $ pdi_2                 <chr> NA
#> $ ndc_3                 <chr> NA
#> $ pdi_3                 <chr> NA

Created on 2023-11-13 with reprex v2.0.2

Using some implementation of dplyr::row_number and/or dplyr::consecutive_id or similar.

andrewallenbruce commented 7 months ago

old code

## ------------------------------------------------------------------------
    if (pivot) {
      pcol <- c(paste0('name_', 1:5),
                paste0('covered_', 1:5),
                paste0('type_', 1:5),
                paste0('category_', 1:5),
                paste0('ndc_', 1:5),
                paste0('pdi_', 1:5))

      results <- results |>
        dplyr::mutate(top_id = dplyr::row_number(), .before = name_1) |>
        tidyr::pivot_longer(
          cols = dplyr::any_of(pcol),
          names_to = c("attr", "group"),
          names_pattern = "(.*)_(.)",
          values_to = "val") |>
        dplyr::arrange(id) |>
        tidyr::pivot_wider(names_from = attr,
                           values_from = val,
                           values_fn = list) |>
        tidyr::unnest(cols = dplyr::any_of(c('name', 'covered', 'type', 'category', 'ndc', 'pdi'))) |>
        dplyr::mutate(covered = dplyr::case_match(covered, "Covered" ~ TRUE, "Non-Covered" ~ FALSE, .default = NA)) |>
        dplyr::filter(!is.na(name)) |>
        dplyr::mutate(group = as.integer(group),
                      pay_total = dplyr::if_else(group > 1, NA, pay_total))

      if (rlang::has_name(results, "pdi")) results$pdi <- dplyr::na_if(results$pdi, "N/A")
      ## ------------------------------------------------------------------------