andrewallenbruce / provider

Public Healthcare Provider APIs :stethoscope:
https://andrewallenbruce.github.io/provider/
Other
18 stars 2 forks source link

Feature: Metadata #60

Closed andrewallenbruce closed 3 months ago

andrewallenbruce commented 7 months ago

Metadata Wishlist



Example

``` r library(provider) library(httr2) library(dplyr) metadata <- function() { urlx <- paste0("https://data.cms.gov/", "provider-data/api/1/datastore/query/", "78125945-ea51-5ee0-b3f1-5f46292467b1", "?limit=1&offset=0&count=true&results=true", "&schema=true&keys=true&format=json&rowIds=true") urly <- paste0("https://data.cms.gov/", "provider-data/api/1/metastore/schemas/", "dataset/items/27ea-46a8?show-reference-ids=false") x <- httr2::request(urlx) |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) y <- httr2::request(urly) |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) list( title = y$title, description = y$description, uuid = "27ea-46a8", identifier = y$keyword$identifier, distribution = y$distribution$identifier, date_issued = y$issued, date_modified = y$modified, datetime_modified = y$`%modified`, date_released = y$released, publisher = y$publisher$data$name, format = x$query$format, landing_page = y$landingPage, dictionary = "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf", dimensions = paste0(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$record_number$length, ' columns x ', x$count, ' rows'), fields = c(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$npi$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$ind_pac_id$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_last_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_first_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_middle_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$suff$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_affiliations_certification_number$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type_certification_number$description), example = dplyr::tibble(x$results)) } ``` ``` r metadata() #> $title #> [1] "Facility Affiliation Data" #> #> $description #> [1] "This is the facility affiliations data publicly reported in the Provider Data Catalog." #> #> $uuid #> [1] "27ea-46a8" #> #> $identifier #> [1] "f62da856-f3e0-565a-a6e0-f6aefcafde00" #> #> $distribution #> [1] "78125945-ea51-5ee0-b3f1-5f46292467b1" #> #> $date_issued #> [1] "2023-08-17" #> #> $date_modified #> [1] "2023-11-02" #> #> $datetime_modified #> [1] "2023-11-02T20:05:23-0400" #> #> $date_released #> [1] "2023-11-16" #> #> $publisher #> [1] "Centers for Medicare & Medicaid Services (CMS)" #> #> $format #> [1] "json" #> #> $landing_page #> [1] "https://data.cms.gov/provider-data/dataset/27ea-46a8" #> #> $dictionary #> [1] "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf" #> #> $dimensions #> [1] "10 columns x 1563152 rows" #> #> $fields #> [1] "NPI" #> [2] "Ind_PAC_ID" #> [3] "Provider Last Name" #> [4] "Provider First Name" #> [5] "Provider Middle Name" #> [6] "Facility Affiliations Certification Number" #> [7] "Facility Type Certification Number" #> #> $example #> # A tibble: 1 × 10 #> record_number npi ind_pac_id provider_last_name provider_first_name #> #> 1 1 1003000126 7517003643 ENKESHAFI ARDALAN #> # ℹ 5 more variables: provider_middle_name , suff , #> # facility_type , facility_affiliations_certification_number , #> # facility_type_certification_number ``` Created on 2023-11-20 with [reprex v2.0.2](https://reprex.tidyverse.org)

andrewallenbruce commented 7 months ago
Function

``` r library(provider) library(httr2) library(dplyr) metadata.affiliations <- function() { urlx <- paste0("https://data.cms.gov/", "provider-data/api/1/datastore/query/", "78125945-ea51-5ee0-b3f1-5f46292467b1", "?limit=1&offset=0&count=true&results=true", "&schema=true&keys=true&format=json&rowIds=true") urly <- paste0("https://data.cms.gov/", "provider-data/api/1/metastore/schemas/", "dataset/items/27ea-46a8?show-reference-ids=false") x <- httr2::request(urlx) |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) y <- httr2::request(urly) |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) list( title = y$title, description = y$description, uuid = "27ea-46a8", identifier = y$keyword$identifier, distribution = y$distribution$identifier, date_issued = lubridate::ymd(y$issued), date_modified = lubridate::ymd(y$modified), datetime_modified = lubridate::ymd_hms(y$`%modified`), date_released = lubridate::ymd(y$released), last_updated = make_interval(dplyr::tibble(date = lubridate::ymd(y$released)), start = date), publisher = y$publisher$data$name, format = x$query$format, landing_page = y$landingPage, dictionary = "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf", dimensions = paste0(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$record_number$length, ' columns x ', x$count, ' rows'), fields = c(x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$npi$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$ind_pac_id$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_last_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_first_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$provider_middle_name$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$suff$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_affiliations_certification_number$description, x$schema$`78125945-ea51-5ee0-b3f1-5f46292467b1`$fields$facility_type_certification_number$description), example = display_long(x$results)) } ```

metadata.affiliations()
#> $title
#> [1] "Facility Affiliation Data"
#> 
#> $description
#> [1] "This is the facility affiliations data publicly reported in the Provider Data Catalog."
#> 
#> $uuid
#> [1] "27ea-46a8"
#> 
#> $identifier
#> [1] "f62da856-f3e0-565a-a6e0-f6aefcafde00"
#> 
#> $distribution
#> [1] "78125945-ea51-5ee0-b3f1-5f46292467b1"
#> 
#> $date_issued
#> [1] "2023-08-17"
#> 
#> $date_modified
#> [1] "2023-11-02"
#> 
#> $datetime_modified
#> [1] "2023-11-03 00:05:23 UTC"
#> 
#> $date_released
#> [1] "2023-11-16"
#> 
#> $last_updated
#> # A tibble: 1 × 4
#>   date       interval                       period      timelength_days
#>   <date>     <Interval>                     <Period>              <dbl>
#> 1 2023-11-16 2023-11-16 UTC--2023-11-21 UTC 5d 0H 0M 0S               5
#> 
#> $publisher
#> [1] "Centers for Medicare & Medicaid Services (CMS)"
#> 
#> $format
#> [1] "json"
#> 
#> $landing_page
#> [1] "https://data.cms.gov/provider-data/dataset/27ea-46a8"
#> 
#> $dictionary
#> [1] "https://data.cms.gov/provider-data/sites/default/files/data_dictionaries/physician/DOC_Data_Dictionary.pdf"
#> 
#> $dimensions
#> [1] "10 columns x 1563152 rows"
#> 
#> $fields
#> [1] "NPI"                                       
#> [2] "Ind_PAC_ID"                                
#> [3] "Provider Last Name"                        
#> [4] "Provider First Name"                       
#> [5] "Provider Middle Name"                      
#> [6] "Facility Affiliations Certification Number"
#> [7] "Facility Type Certification Number"        
#> 
#> $example
#> # A tibble: 10 × 2
#>    name                                       value       
#>    <chr>                                      <chr>       
#>  1 record_number                              "1"         
#>  2 npi                                        "1003000126"
#>  3 ind_pac_id                                 "7517003643"
#>  4 provider_last_name                         "ENKESHAFI" 
#>  5 provider_first_name                        "ARDALAN"   
#>  6 provider_middle_name                       ""          
#>  7 suff                                       ""          
#>  8 facility_type                              "Hospital"  
#>  9 facility_affiliations_certification_number "490063"    
#> 10 facility_type_certification_number         ""

Created on 2023-11-21 with reprex v2.0.2

andrewallenbruce commented 7 months ago
Code

```r library(pointblank) library(provider) affiliations(npi = '1558595660', na.rm = FALSE) |> create_informant( tbl_name = "affiliations()", label = "Provider <-> Facility Affiliations" ) |> info_columns( columns = npi, info = "10-digit National Provider Identifier.") |> info_columns( columns = pac, info = "10-digit PECOS Associate Control ID.") |> info_columns( columns = vars(first, middle, last, suffix), info = "Individual Provider's Name.") |> info_columns( columns = facility_type, info = "Individual Provider's Name.") |> info_section( section_name = "Notes", usage = "`affiliations(parent_ccn = '670055')`", Source = c( "- From the **(provider)** package.", "- [CMS Affiliations API](https://data.cms.gov/provider-data/dataset/27ea-46a8)" )) |> get_informant_report( title = "**`affiliations()`** Data Dictionary", size = "standard" ) ```

Screenshot 2023-11-30 164832

andrewallenbruce commented 7 months ago
Function

``` r metadata.one <- function(api, first = FALSE) { resp <- httr2::request("https://data.cms.gov/data.json") |> httr2::req_perform() |> httr2::resp_body_json(check_type = FALSE, simplifyVector = TRUE) resp <- resp$dataset |> dplyr::tibble() |> dplyr::select(title, description, describedBy, distribution, landingPage, modified, references) |> dplyr::filter(title == {{ api }}) |> tidyr::unnest(references) dst <- resp |> dplyr::select(title, distribution) |> tidyr::unnest(cols = distribution, names_sep = "_") |> #dplyr::filter(distribution_format == "API") |> dplyr::select(title, distribution_title, distribution_modified, distribution_accessURL) |> dplyr::mutate(distribution_accessURL = strex::str_after_last(distribution_accessURL, "dataset/"), distribution_accessURL = strex::str_before_last(distribution_accessURL, "/data")) |> dplyr::rename(distribution = distribution_accessURL) resp$distribution <- NULL results <- dplyr::left_join(resp, dst, by = dplyr::join_by(title)) |> dplyr::select(-title) |> dplyr::select(title = distribution_title, description, dictionary = describedBy, methodology = references, landing_page = landingPage, distribution, modified = distribution_modified) |> dplyr::mutate(modified = lubridate::ymd(modified)) |> provider::make_interval(start = modified) |> tidyr::separate_wider_delim(title, delim = " : ", names = c("title", NA)) if (first) results <- dplyr::slice_head(results) return(results) } ```

x <- metadata.one(
  api = "Medicare Fee-For-Service  Public Provider Enrollment", 
  first = TRUE)

x |> dplyr::glimpse()
#> Rows: 1
#> Columns: 10
#> $ title           <chr> "Medicare Fee-For-Service Public Provider Enrollment"
#> $ description     <chr> "The Medicare Fee-For-Service Public Provider Enrollme…
#> $ dictionary      <chr> "https://data.cms.gov/resources/medicare-fee-for-servi…
#> $ methodology     <chr> "https://data.cms.gov/resources/fee-for-service-public…
#> $ landing_page    <chr> "https://data.cms.gov/provider-characteristics/medicar…
#> $ distribution    <chr> "2457ea29-fc82-48b0-86ec-3b0755de7515"
#> $ modified        <date> 2023-10-16
#> $ interval        <Interval> 2023-10-16 UTC--2023-12-03 UTC
#> $ period          <Period> 1m 17d 0H 0M 0S
#> $ timelength_days <dbl> 48

list(title        = x$title,
     description  = x$description,
     dictionary   = x$dictionary,
     methodology  = x$methodology,
     landing      = x$landing_page,
     distribution = x$distribution,
     date         = list(updated       = x$modified,
                         length.period = x$period,
                         length.days   = x$timelength_days))
#> $title
#> [1] "Medicare Fee-For-Service Public Provider Enrollment"
#> 
#> $description
#> [1] "The Medicare Fee-For-Service Public Provider Enrollment dataset includes information on providers who are actively approved to bill Medicare or have completed the 855O at the time the data was pulled from the Provider Enrollment, Chain, and Ownership System (PECOS). The release of this provider enrollment data is not related to other provider information releases such as Physician Compare or Data Transparency.\n\n \n\nNote: This full dataset contains more records than most spreadsheet programs can handle, which will result in an incomplete load of data. Use of a database or statistical software is required."
#> 
#> $dictionary
#> [1] "https://data.cms.gov/resources/medicare-fee-for-service-public-provider-enrollment-data-dictionary"
#> 
#> $methodology
#> [1] "https://data.cms.gov/resources/fee-for-service-public-provider-enrollment-methodology"
#> 
#> $landing
#> [1] "https://data.cms.gov/provider-characteristics/medicare-provider-supplier-enrollment/medicare-fee-for-service-public-provider-enrollment"
#> 
#> $distribution
#> [1] "2457ea29-fc82-48b0-86ec-3b0755de7515"
#> 
#> $date
#> $date$updated
#> [1] "2023-10-16"
#> 
#> $date$length.period
#> [1] "1m 17d 0H 0M 0S"
#> 
#> $date$length.days
#> [1] 48

Created on 2023-12-03 with reprex v2.0.2