2DegreesInvesting / tiltWorkflows

Make it easy to run tilt workflows
https://2degreesinvesting.github.io/tiltWorkflows/
GNU General Public License v3.0
0 stars 0 forks source link

`profile_emissions()` yields different output than `tiltIndicatorAfter::profile_emissions()` #143

Closed maurolepore closed 6 months ago

maurolepore commented 6 months ago

I expected both funcitons to output the same but this reprex shows otherwise.

reprex

library(readr)
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

# Refresh all packages
# remove.packages(c(
#   "tiltIndicator",
#   "tiltIndicator",
#   "tiltIndicatorAfter",
#   "tiltWorkflows"
# ))
# pak::pak("2DegreesInvesting/tiltWorkflows")

packageVersion("tiltToyData")
#> [1] '0.0.0.9203'
packageVersion("tiltIndicator")
#> [1] '0.0.0.9213'
packageVersion("tiltIndicatorAfter")
#> [1] '0.0.0.9024'
packageVersion("tiltWorkflows")
#> [1] '0.0.0.9032'

library(tiltIndicatorAfter)

withr::local_options(readr.show_col_types = FALSE)

toy_emissions_profile_products_ecoinvent <- read_csv("emissions_profile_products_ecoinvent.csv")
toy_emissions_profile_any_companies <- read_csv("emissions_profile_any_companies_ecoinvent.csv") |>
  filter(companies_id == "-fred-sl_00000005407085-741049001")
toy_europages_companies <- read_csv("ep_companies.csv")
toy_ecoinvent_activities <- read_csv("ei_activities_overview.csv")
toy_ecoinvent_europages <- read_csv("mapper_ep_ei.csv")
toy_isic_name <- read_csv("isic_4digit_name.csv")

tilt_indicator_after <- tiltIndicatorAfter::profile_emissions(
  companies = toy_emissions_profile_any_companies,
  co2 = toy_emissions_profile_products_ecoinvent,
  europages_companies = toy_europages_companies,
  ecoinvent_activities = toy_ecoinvent_activities,
  ecoinvent_europages = toy_ecoinvent_europages,
  isic = toy_isic_name
) |> 
  unnest_product()
#> ℹ Adding 32% and 63% noise to `co2e_lower` and `co2e_upper`, respectively.

tilt_workflows <- tiltWorkflows::profile_emissions(
  companies = toy_emissions_profile_any_companies,
  co2 = toy_emissions_profile_products_ecoinvent,
  europages_companies = toy_europages_companies,
  ecoinvent_activities = toy_ecoinvent_activities,
  ecoinvent_europages = toy_ecoinvent_europages,
  isic = toy_isic_name
) |> 
  unnest_product()
#> Warning: Splitting `companies` into 12 chunks.

problematic <- c(
  "country",
  "matched_reference_product",
  "main_activity",
  "matched_activity_name",
  "unit"
)

# Good
tilt_indicator_after |> relocate(matches(problematic))
#> # A tibble: 12 × 25
#>    country matched_reference_product main_activity matched_activity_name   unit 
#>    <chr>   <chr>                     <chr>         <chr>                   <chr>
#>  1 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  2 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  3 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  4 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  5 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  6 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  7 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  8 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  9 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 10 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 11 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 12 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> # ℹ 20 more variables: companies_id <chr>, company_name <chr>,
#> #   emission_profile <chr>, benchmark <chr>, ep_product <chr>,
#> #   co2e_lower <dbl>, co2e_upper <dbl>, multi_match <lgl>,
#> #   matching_certainty <chr>, matching_certainty_company_average <chr>,
#> #   tilt_sector <chr>, tilt_subsector <chr>, isic_4digit <chr>,
#> #   isic_4digit_name <chr>, company_city <chr>, postcode <chr>, address <chr>,
#> #   activity_uuid_product_uuid <chr>, profile_ranking <dbl>, …

# Bad
tilt_workflows |> relocate(matches(problematic))
#> # A tibble: 6 × 28
#>   country matched_reference_product main_activity matched_activity_name unit 
#>   <chr>   <chr>                     <chr>         <chr>                 <chr>
#> 1 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> 2 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> 3 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> 4 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> 5 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> 6 <NA>    <NA>                      <NA>          <NA>                  <NA> 
#> # ℹ 23 more variables: companies_id <chr>, company_name <chr>,
#> #   emission_profile <chr>, benchmark <chr>, ep_product <chr>,
#> #   co2e_lower <dbl>, co2e_upper <dbl>, multi_match <lgl>,
#> #   matching_certainty <chr>, matching_certainty_company_average <chr>,
#> #   tilt_sector <chr>, tilt_subsector <chr>, isic_4digit <chr>,
#> #   isic_4digit_name <chr>, company_city <chr>, postcode <dbl>, address <chr>,
#> #   activity_uuid_product_uuid <chr>, profile_ranking <dbl>, ep_id <chr>, …

Thanks @kalashsinghal (https://github.com/2DegreesInvesting/tiltIndicatorAfter/issues/153#issuecomment-1999580203)

maurolepore commented 6 months ago

@kalashsinghal

Most likely the problem was the cache. It must have been contamined with data from previous runs.

I deleted the cache and now both functions output the same.

library(readr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)
library(tiltWorkflows)
#> Loading required package: tiltIndicatorAfter
#> Loading required package: tiltToyData
#> 
#> Attaching package: 'tiltWorkflows'
#> The following objects are masked from 'package:tiltIndicatorAfter':
#> 
#>     profile_emissions, profile_emissions_upstream, profile_sector,
#>     profile_sector_upstream

# IMPORTANT
cache_delete()
#> Warning: Deleting `cache_dir`: 
#> • ~/.cache/tiltWorkflows

withr::local_options(readr.show_col_types = FALSE)
read_input <- function(...) readr::read_csv(here::here("input", ...))

toy_emissions_profile_products_ecoinvent <- read_input("emissions_profile_products_ecoinvent.csv")
toy_emissions_profile_any_companies <- read_input("emissions_profile_any_companies_ecoinvent.csv") |>
  filter(companies_id == "-fred-sl_00000005407085-741049001")
toy_europages_companies <- read_input("ep_companies.csv")
toy_ecoinvent_activities <- read_input("ei_activities_overview.csv")
toy_ecoinvent_europages <- read_input("mapper_ep_ei.csv")
toy_isic_name <- read_input("isic_4digit_name.csv")

tilt_indicator_after <- tiltIndicatorAfter::profile_emissions(
  companies = toy_emissions_profile_any_companies,
  co2 = toy_emissions_profile_products_ecoinvent,
  europages_companies = toy_europages_companies,
  ecoinvent_activities = toy_ecoinvent_activities,
  ecoinvent_europages = toy_ecoinvent_europages,
  isic = toy_isic_name
) |> 
  unnest_product()
#> ℹ Adding 52% and 56% noise to `co2e_lower` and `co2e_upper`, respectively.

tilt_workflows <- tiltWorkflows::profile_emissions(
  companies = toy_emissions_profile_any_companies,
  co2 = toy_emissions_profile_products_ecoinvent,
  europages_companies = toy_europages_companies,
  ecoinvent_activities = toy_ecoinvent_activities,
  ecoinvent_europages = toy_ecoinvent_europages,
  isic = toy_isic_name
) |> 
  unnest_product()
#> Warning: Splitting `companies` into 12 chunks.
#> ℹ Adding 55% and 102% noise to `co2e_lower` and `co2e_upper`, respectively.

problematic <- c(
  "country",
  "matched_reference_product",
  "main_activity",
  "matched_activity_name",
  "unit"
)

tilt_indicator_after |> relocate(matches(problematic))
#> # A tibble: 12 × 25
#>    country matched_reference_product main_activity matched_activity_name   unit 
#>    <chr>   <chr>                     <chr>         <chr>                   <chr>
#>  1 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  2 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  3 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  4 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  5 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  6 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  7 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  8 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  9 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 10 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 11 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 12 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> # ℹ 20 more variables: companies_id <chr>, company_name <chr>,
#> #   emission_profile <chr>, benchmark <chr>, ep_product <chr>,
#> #   co2e_lower <dbl>, co2e_upper <dbl>, multi_match <lgl>,
#> #   matching_certainty <chr>, matching_certainty_company_average <chr>,
#> #   tilt_sector <chr>, tilt_subsector <chr>, isic_4digit <chr>,
#> #   isic_4digit_name <chr>, company_city <chr>, postcode <chr>, address <chr>,
#> #   activity_uuid_product_uuid <chr>, profile_ranking <dbl>, …

tilt_workflows |> relocate(matches(problematic))
#> # A tibble: 12 × 25
#>    country matched_reference_product main_activity matched_activity_name   unit 
#>    <chr>   <chr>                     <chr>         <chr>                   <chr>
#>  1 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  2 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  3 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  4 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  5 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  6 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  7 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  8 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#>  9 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 10 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 11 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> 12 spain   fish freezing, small fish wholesaler    market for fish freezi… kg   
#> # ℹ 20 more variables: companies_id <chr>, company_name <chr>,
#> #   emission_profile <chr>, benchmark <chr>, ep_product <chr>,
#> #   co2e_lower <dbl>, co2e_upper <dbl>, multi_match <lgl>,
#> #   matching_certainty <chr>, matching_certainty_company_average <chr>,
#> #   tilt_sector <chr>, tilt_subsector <chr>, isic_4digit <chr>,
#> #   isic_4digit_name <chr>, company_city <chr>, postcode <chr>, address <chr>,
#> #   activity_uuid_product_uuid <chr>, profile_ranking <dbl>, …

identical(
  tilt_indicator_after |> select(-matches(c("co2"))), 
  tilt_workflows |> select(-matches(c("co2")))
)
#> [1] TRUE