Feature: Metadata

andrewallenbruce commented 7 months ago

Metadata Wishlist


``` r
library(provider)
library(httr2)
library(dplyr)

metadata <- function() {
  
  urlx <- paste0("",
                 "provider-data/api/1/datastore/query/",
                 "78125945-ea51-5ee0-b3f1-5f46292467b1",
                 "?limit=1&offset=0&count=true&results=true",
                 "&schema=true&keys=true&format=json&rowIds=true")
  
  urly <- paste0("",
                 "provider-data/api/1/metastore/schemas/",
                 "dataset/items/27ea-46a8?show-reference-ids=false")
  
  x <- httr2::request(urlx) |> 
    httr2::req_perform() |> 
    httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
  
  y <- httr2::request(urly) |> 
    httr2::req_perform() |> 
    httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
  
  list(
    title = y$title,
    description = y$description,
    uuid = "27ea-46a8",
    identifier = y$keyword$identifier,
    distribution = y$distribution$identifier,
    date_issued = y$issued,
    date_modified = y$modified,
    datetime_modified = y$`%modified`,
    date_released = y$released,
    publisher = y$publisher$data$name,
    format = x$query$format,
    landing_page = y$landingPage,
    dictionary = "",
    dimensions = paste0(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$record_number$length, ' columns x ', x$count, ' rows'),
    fields = c(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$npi$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$ind_pac_id$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_last_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_first_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_middle_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$suff$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_affiliations_certification_number$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type_certification_number$description),
    example = dplyr::tibble(x$results))
}
```

``` r
metadata()
#> $title
#> [1] "Facility Affiliation Data"
#> 
#> $description
#> [1] "This is the facility affiliations data publicly reported in the Provider Data Catalog."
#> 
#> $uuid
#> [1] "27ea-46a8"
#> 
#> $identifier
#> [1] "f62da856-f3e0-565a-a6e0-f6aefcafde00"
#> 
#> $distribution
#> [1] "78125945-ea51-5ee0-b3f1-5f46292467b1"
#> 
#> $date_issued
#> [1] "2023-08-17"
#> 
#> $date_modified
#> [1] "2023-11-02"
#> 
#> $datetime_modified
#> [1] "2023-11-02T20:05:23-0400"
#> 
#> $date_released
#> [1] "2023-11-16"
#> 
#> $publisher
#> [1] "Centers for Medicare & Medicaid Services (CMS)"
#> 
#> $format
#> [1] "json"
#> 
#> $landing_page
#> [1] ""
#> 
#> $dictionary
#> [1] ""
#> 
#> $dimensions
#> [1] "10 columns x 1563152 rows"
#> 
#> $fields
#> [1] "NPI"                                       
#> [2] "Ind_PAC_ID"                                
#> [3] "Provider Last Name"                        
#> [4] "Provider First Name"                       
#> [5] "Provider Middle Name"                      
#> [6] "Facility Affiliations Certification Number"
#> [7] "Facility Type Certification Number"        
#> 
#> $example
#> # A tibble: 1 × 10
#>   record_number npi        ind_pac_id provider_last_name provider_first_name
#>           <int> <chr>      <chr>      <chr>              <chr>              
#> 1             1 1003000126 7517003643 ENKESHAFI          ARDALAN            
#> # ℹ 5 more variables: provider_middle_name <chr>, suff <chr>,
#> #   facility_type <chr>, facility_affiliations_certification_number <chr>,
#> #   facility_type_certification_number <chr>
```

andrewallenbruce commented 7 months ago

``` r
library(provider)
library(httr2)
library(dplyr)

metadata.affiliations <- function() {
  
  urlx <- paste0("",
                 "provider-data/api/1/datastore/query/",
                 "78125945-ea51-5ee0-b3f1-5f46292467b1",
                 "?limit=1&offset=0&count=true&results=true",
                 "&schema=true&keys=true&format=json&rowIds=true")
  
  urly <- paste0("",
                 "provider-data/api/1/metastore/schemas/",
                 "dataset/items/27ea-46a8?show-reference-ids=false")
  
  x <- httr2::request(urlx) |> 
    httr2::req_perform() |> 
    httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
  
  y <- httr2::request(urly) |> 
    httr2::req_perform() |> 
    httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE)
  
  list(
    title = y$title,
    description = y$description,
    uuid = "27ea-46a8",
    identifier = y$keyword$identifier,
    distribution = y$distribution$identifier,
    date_issued = lubridate::ymd(y$issued),
    date_modified = lubridate::ymd(y$modified),
    datetime_modified = lubridate::ymd_hms(y$`%modified`),
    date_released = lubridate::ymd(y$released),
    last_updated = make_interval(dplyr::tibble(date = lubridate::ymd(y$released)), start = date),
    publisher = y$publisher$data$name,
    format = x$query$format,
    landing_page = y$landingPage,
    dictionary = "",
    dimensions = paste0(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$record_number$length, ' columns x ', x$count, ' rows'),
    fields = c(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$npi$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$ind_pac_id$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_last_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_first_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_middle_name$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$suff$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_affiliations_certification_number$description,
               x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type_certification_number$description),
    example = display_long(x$results))
}
```

#> $title
#> [1] "Facility Affiliation Data"
#> $description
#> [1] "This is the facility affiliations data publicly reported in the Provider Data Catalog."
#> $uuid
#> [1] "27ea-46a8"
#> $identifier
#> [1] "f62da856-f3e0-565a-a6e0-f6aefcafde00"
#> $distribution
#> [1] "78125945-ea51-5ee0-b3f1-5f46292467b1"
#> $date_issued
#> [1] "2023-08-17"
#> $date_modified
#> [1] "2023-11-02"
#> $datetime_modified
#> [1] "2023-11-03 00:05:23 UTC"
#> $date_released
#> [1] "2023-11-16"
#> $last_updated
#> # A tibble: 1 × 4
#>   date       interval                       period      timelength_days
#>   <date>     <Interval>                     <Period>              <dbl>
#> 1 2023-11-16 2023-11-16 UTC--2023-11-21 UTC 5d 0H 0M 0S               5
#> $publisher
#> [1] "Centers for Medicare & Medicaid Services (CMS)"
#> $format
#> [1] "json"
#> $landing_page
#> [1] ""
#> $dictionary
#> [1] ""
#> $dimensions
#> [1] "10 columns x 1563152 rows"
#> $fields
#> [1] "NPI"                                       
#> [2] "Ind_PAC_ID"                                
#> [3] "Provider Last Name"                        
#> [4] "Provider First Name"                       
#> [5] "Provider Middle Name"                      
#> [6] "Facility Affiliations Certification Number"
#> [7] "Facility Type Certification Number"        
#> $example
#> # A tibble: 10 × 2
#>    name                                       value       
#>    <chr>                                      <chr>       
#>  1 record_number                              "1"         
#>  2 npi                                        "1003000126"
#>  3 ind_pac_id                                 "7517003643"
#>  4 provider_last_name                         "ENKESHAFI" 
#>  5 provider_first_name                        "ARDALAN"   
#>  6 provider_middle_name                       ""          
#>  7 suff                                       ""          
#>  8 facility_type                              "Hospital"  
#>  9 facility_affiliations_certification_number "490063"    
#> 10 facility_type_certification_number         ""

andrewallenbruce commented 7 months ago

```r library(pointblank) library(provider) affiliations(npi = '1558595660', na.rm = FALSE) |> create_informant( tbl_name = "affiliations()", label = "Provider <-> Facility Affiliations" ) |> info_columns( columns = npi, info = "10-digit National Provider Identifier.") |> info_columns( columns = pac, info = "10-digit PECOS Associate Control ID.") |> info_columns( columns = vars(first, middle, last, suffix), info = "Individual Provider's Name.") |> info_columns( columns = facility_type, info = "Individual Provider's Name.") |> info_section( section_name = "Notes", usage = "`affiliations(parent_ccn = '670055')`", Source = c( "- From the **(provider)** package.", "- [CMS Affiliations API](" )) |> get_informant_report( title = "**`affiliations()`** Data Dictionary", size = "standard" ) ```

andrewallenbruce commented 7 months ago

``` r <- function(api, first = FALSE) { resp <- httr2::request("") |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) resp <- resp$dataset |> dplyr::tibble() |> dplyr::select(title, description, describedBy, distribution, landingPage, modified, references) |> dplyr::filter(title == {{ api }}) |> tidyr::unnest(references) dst <- resp |> dplyr::select(title, distribution) |> tidyr::unnest(cols = distribution, names_sep = "_") |> #dplyr::filter(distribution_format == "API") |> dplyr::select(title, distribution_title, distribution_modified, distribution_accessURL) |> dplyr::mutate(distribution_accessURL = strex::str_after_last(distribution_accessURL, "dataset/"), distribution_accessURL = strex::str_before_last(distribution_accessURL, "/data")) |> dplyr::rename(distribution = distribution_accessURL) resp$distribution <- NULL results <- dplyr::left_join(resp, dst, by = dplyr::join_by(title)) |> dplyr::select(-title) |> dplyr::select(title = distribution_title, description, dictionary = describedBy, methodology = references, landing_page = landingPage, distribution, modified = distribution_modified) |> dplyr::mutate(modified = lubridate::ymd(modified)) |> provider::make_interval(start = modified) |> tidyr::separate_wider_delim(title, delim = " : ", names = c("title", NA)) if (first) results <- dplyr::slice_head(results) return(results) } ```

x <-
  api = "Medicare Fee-For-Service  Public Provider Enrollment", 
  first = TRUE)

x |> dplyr::glimpse()
#> Rows: 1
#> Columns: 10
#> $ title           <chr> "Medicare Fee-For-Service Public Provider Enrollment"
#> $ description     <chr> "The Medicare Fee-For-Service Public Provider Enrollme…
#> $ dictionary      <chr> "…
#> $ methodology     <chr> "…
#> $ landing_page    <chr> "…
#> $ distribution    <chr> "2457ea29-fc82-48b0-86ec-3b0755de7515"
#> $ modified        <date> 2023-10-16
#> $ interval        <Interval> 2023-10-16 UTC--2023-12-03 UTC
#> $ period          <Period> 1m 17d 0H 0M 0S
#> $ timelength_days <dbl> 48

list(title        = x$title,
     description  = x$description,
     dictionary   = x$dictionary,
     methodology  = x$methodology,
     landing      = x$landing_page,
     distribution = x$distribution,
     date         = list(updated       = x$modified,
                         length.period = x$period,
                         length.days   = x$timelength_days))
#> $title
#> [1] "Medicare Fee-For-Service Public Provider Enrollment"
#> $description
#> [1] "The Medicare Fee-For-Service Public Provider Enrollment dataset includes information on providers who are actively approved to bill Medicare or have completed the 855O at the time the data was pulled from the Provider Enrollment, Chain, and Ownership System (PECOS). The release of this provider enrollment data is not related to other provider information releases such as Physician Compare or Data Transparency.\n\n \n\nNote: This full dataset contains more records than most spreadsheet programs can handle, which will result in an incomplete load of data. Use of a database or statistical software is required."
#> $dictionary
#> [1] ""
#> $methodology
#> [1] ""
#> $landing
#> [1] ""
#> $distribution
#> [1] "2457ea29-fc82-48b0-86ec-3b0755de7515"
#> $date
#> $date$updated
#> [1] "2023-10-16"
#> $date$length.period
#> [1] "1m 17d 0H 0M 0S"
#> $date$length.days
#> [1] 48

