2DegreesInvesting / tiltToyData

Toy datasets for TILT
https://2degreesinvesting.github.io/tiltToyData/
GNU General Public License v3.0
0 stars 0 forks source link

Add single quotes around values of `*isic*` #17

Closed maurolepore closed 7 months ago

maurolepore commented 7 months ago

This PR adds single-quotes around the values of all *isic* columns across all datasets.

Note we need to consider missing values:

good_single_quote <- function(x) ifelse(is.na(x), x, glue::glue("'{x}'"))
bad_single_quote <- function(x) glue::glue("'{x}'")

isic <- c(NA, "0123")
good_single_quote(isic)
#> [1] NA       "'0123'"

bad_single_quote(isic)
#> 'NA'
#> '0123'

Created on 2023-12-06 with reprex v2.0.2

reprex

devtools::load_all()
#> ℹ Loading tiltToyData
library(dplyr, warn.conflicts = FALSE)
library(readr, warn.conflicts = FALSE)
options(readr.show_col_types = FALSE)

files <- c(
  "emissions_profile_products.csv.gz",
  "emissions_profile_upstream_products.csv.gz",
  "sector_profile_companies.csv.gz",
  "sector_profile_upstream_products.csv.gz"
)

files |>
  toy_path() |>
  setNames(files) |>
  lapply(\(x) read_csv(x)) |>
  lapply(\(x) relocate(x, matches("isic")))
#> $emissions_profile_products.csv.gz
#> # A tibble: 5 × 7
#>   isic_4digit co2_footprint tilt_sector    tilt_subsector unit 
#>   <chr>               <dbl> <chr>          <chr>          <chr>
#> 1 '2560'             176.   Industry       Other          unit 
#> 2 '2560'              58.1  Industry       Other          unit 
#> 3 '2870'               4.95 Steel & Metals Steel          kg   
#> 4 '1780'              12.5  Agriculture    Agriculture    kg   
#> 5 '2679'               2.07 Industry       Other          kg   
#> # ℹ 2 more variables: activity_uuid_product_uuid <chr>, ei_activity_name <chr>
#> 
#> $emissions_profile_upstream_products.csv.gz
#> # A tibble: 33 × 7
#>    input_isic_4digit input_co2_footprint input_tilt_sector input_tilt_subsector
#>    <chr>                           <dbl> <chr>             <chr>               
#>  1 '2560'                        7.07e+0 Inudstry          Other               
#>  2 '2560'                        3.99e+1 Inudstry          Other               
#>  3 '2560'                        5.12e-1 Inudstry          Other               
#>  4 '2560'                        1.24e+0 Inudstry          Other               
#>  5 '2560'                        2.12e+1 Inudstry          Other               
#>  6 '2560'                        1.24e-9 Inudstry          Other               
#>  7 '2560'                        7   e-9 Inudstry          Other               
#>  8 '2560'                        1.04e+0 Inudstry          Other               
#>  9 '2560'                        1.12e+0 Inudstry          Other               
#> 10 '2560'                        3.51e+0 Inudstry          Other               
#> # ℹ 23 more rows
#> # ℹ 3 more variables: input_unit <chr>, input_activity_uuid_product_uuid <chr>,
#> #   activity_uuid_product_uuid <chr>
#> 
#> $sector_profile_companies.csv.gz
#> # A tibble: 28 × 10
#>    isic_4digit companies_id        company_name clustered activity_uuid_produc…¹
#>    <chr>       <chr>               <chr>        <chr>     <chr>                 
#>  1 '2410'      fleischerei-stiefs… fleischerei… steel     0faa7ecb-fef2-5117-89…
#>  2 '2410'      fleischerei-stiefs… fleischerei… steel     0faa7ecb-fef2-5117-89…
#>  3 '2029'      pecheries-basques_… pecheries-b… nitrogen  03fbf989-9a1a-5e3d-a5…
#>  4 '2029'      pecheries-basques_… pecheries-b… nitrogen  03fbf989-9a1a-5e3d-a5…
#>  5 <NA>        hoche-butter-gmbh_… hoche-butte… waste     <NA>                  
#>  6 <NA>        hoche-butter-gmbh_… hoche-butte… waste     <NA>                  
#>  7 <NA>        hoche-butter-gmbh_… hoche-butte… car       <NA>                  
#>  8 <NA>        hoche-butter-gmbh_… hoche-butte… car       <NA>                  
#>  9 <NA>        hoche-butter-gmbh_… hoche-butte… heater    <NA>                  
#> 10 <NA>        hoche-butter-gmbh_… hoche-butte… heater    <NA>                  
#> # ℹ 18 more rows
#> # ℹ abbreviated name: ¹​activity_uuid_product_uuid
#> # ℹ 5 more variables: tilt_sector <chr>, tilt_subsector <chr>, type <chr>,
#> #   sector <chr>, subsector <chr>
#> 
#> $sector_profile_upstream_products.csv.gz
#> # A tibble: 74 × 10
#>    input_isic_4digit activity_uuid_product_uuid           input_activity_uuid_…¹
#>    <chr>             <chr>                                <chr>                 
#>  1 '3821'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 5de8c337-dea9-5c1f-9d…
#>  2 '3821'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 5de8c337-dea9-5c1f-9d…
#>  3 '2011'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 1aeb18b9-8355-560f-82…
#>  4 '2011'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 1aeb18b9-8355-560f-82…
#>  5 '1201'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 22704506-7707-5ae7-99…
#>  6 '1201'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 22704506-7707-5ae7-99…
#>  7 '4141'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 92078219-1ed3-5215-9f…
#>  8 '4141'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 92078219-1ed3-5215-9f…
#>  9 '1050'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 9d483329-b09a-5513-b1…
#> 10 '1050'            0a242b09-772a-5edf-8e82-9cb4ba52a25… 9d483329-b09a-5513-b1…
#> # ℹ 64 more rows
#> # ℹ abbreviated name: ¹​input_activity_uuid_product_uuid
#> # ℹ 7 more variables: input_reference_product_name <chr>, input_unit <chr>,
#> #   input_tilt_sector <chr>, input_tilt_subsector <chr>, type <chr>,
#> #   sector <chr>, subsector <chr>

Created on 2023-12-06 with reprex v2.0.2

For the record, done with this code

devtools::load_all()
library(dplyr, warn.conflicts = FALSE)
library(readr, warn.conflicts = FALSE)
options(readr.show_col_types = FALSE)

paths <- toy_path(c(
  "emissions_profile_products.csv.gz",
  "emissions_profile_upstream_products.csv.gz",
  "sector_profile_companies.csv.gz",
  "sector_profile_upstream_products.csv.gz"
))

single_quote <- function(x) ifelse(is.na(x), x, glue::glue("'{x}'"))

paths |>
  setNames(paths) |>
  lapply(\(x) read_csv(x)) |>
  lapply(\(x) mutate(x, across(matches("isic"), single_quote))) |>
  purrr::walk2(paths, write_csv)

TODO

EXCEPTIONS

maurolepore commented 7 months ago

Note the interaction with tiltIndicatorAfter

... check-r-package > Show testthat output:

══ Warnings ════════════════════════════════════════════════════════════════════
  ── Warning ('test-prepare_ictr_company.R:30:3'): handles numeric `isic*` in `co2` ──
  NAs introduced by coercion
...  
  [ FAIL 0 | WARN 2 | SKIP 5 | PASS 48 ]
maurolepore commented 7 months ago

FYI @ysherstyuk