Closed maurolepore closed 2 years ago
This is adapted from the original reprex. The result is as expected, suggesting the issue that Jackson noticed is indeed fixed.
@georgeharris2deg,
library(dplyr, warn.conflicts = FALSE)
library(r2dii.data)
packageVersion("r2dii.data")
#> [1] '0.1.6'
library(r2dii.analysis)
# The original bug was fixed in 0.1.2
# https://2degreesinvesting.github.io/r2dii.analysis/news/index.html#r2dii-analysis-0-1-2-2020-12-05
packageVersion("r2dii.analysis")
#> [1] '0.1.3'
# Toy data
one_loan <- tibble(
id_loan = 1,
name_ald = "shaanxi auto",
loan_size_outstanding = 1,
loan_size_outstanding_currency = "EUR",
loan_size_credit_limit = 2,
loan_size_credit_limit_currency = "EUR",
id_2dii = "UP1",
level = "ultimate_parent",
score = 1,
sector = "automotive",
sector_ald = "automotive"
)
# The only difference is the first column `id_loan`
two_loan <- one_loan %>%
bind_rows(one_loan) %>%
mutate(
id_loan = 1:2,
# I suspect the problem might be here
# (some cases may make no sense, but it seems like a good place to start)
# What happens if more columns are different? # x: expected; v: not
# name_ald = "shaanxi auto", # must be the same
# loan_size_outstanding = 1:2, # x
# loan_size_outstanding_currency = c("EUR", "USD"), # x
# loan_size_credit_limit = 2:3, # x
# loan_size_credit_limit_currency = c("EUR", "USD"), # x
# id_2dii = c("UP1", "UP2"), # x
# level = c("ultimate_parent", "direct_loantaker"), # v
# score = 1:0.9, # v
# sector = c("automotive", "other"), # x
# sector_ald = c("automotive", "other") # v
)
ald <- tibble(
name_company = "shaanxi auto",
sector = "automotive",
technology = "ice",
year = 2025,
production = 1,
emission_factor = 1,
plant_location = "BF",
is_ultimate_owner = TRUE
)
scenario <- tibble(
scenario = "sds",
sector = "automotive",
technology = "ice",
region = "global",
year = 2025,
tmsr = 0.5,
smsp = -0.08,
scenario_source = "demo_2020"
)
one_loan
#> # A tibble: 1 x 11
#> id_loan name_ald loan_size_outst… loan_size_outst… loan_size_credi…
#> <dbl> <chr> <dbl> <chr> <dbl>
#> 1 1 shaanxi… 1 EUR 2
#> # … with 6 more variables: loan_size_credit_limit_currency <chr>,
#> # id_2dii <chr>, level <chr>, score <dbl>, sector <chr>, sector_ald <chr>
two_loan
#> # A tibble: 2 x 11
#> id_loan name_ald loan_size_outst… loan_size_outst… loan_size_credi…
#> <int> <chr> <dbl> <chr> <dbl>
#> 1 1 shaanxi… 1 EUR 2
#> 2 2 shaanxi… 1 EUR 2
#> # … with 6 more variables: loan_size_credit_limit_currency <chr>,
#> # id_2dii <chr>, level <chr>, score <dbl>, sector <chr>, sector_ald <chr>
one_result <- one_loan %>%
target_market_share(
ald,
scenario,
region_isos_demo,
by_company = TRUE,
weight_production = FALSE
)
two_result <- two_loan %>%
target_market_share(
ald,
scenario,
region_isos_demo,
by_company = TRUE,
weight_production = FALSE
)
# > I would expect the unweighted company-level production output to be
# identical -- @jdhoffa
# https://github.com/2DegreesInvesting/r2dii.analysis/issues/239#issuecomment-737278398
identical(one_result, two_result)
#> [1] TRUE
testthat::expect_equal(one_result, two_result)
Created on 2020-12-23 by the reprex package (v0.3.0)
@maurolepore as discussed earlier - here is an anonymised and striped down reprex of the error. Thanks !
library(tidyverse)
#> Warning: package 'tidyverse' was built under R version 4.0.3
library(r2dii.data)
#> Warning: package 'r2dii.data' was built under R version 4.0.3
library(r2dii.match)
#> Warning: package 'r2dii.match' was built under R version 4.0.3
library(r2dii.analysis)
#> Warning: package 'r2dii.analysis' was built under R version 4.0.3
packageVersion("r2dii.data")
#> [1] '0.1.6'
packageVersion("r2dii.match")
#> [1] '0.0.7'
packageVersion("r2dii.analysis")
#> [1] '0.1.3'
#loanbook with multiple loans to one company
lbk <- tibble::tribble(
~id_loan, ~id_direct_loantaker, ~name_direct_loantaker, ~id_intermediate_parent_1, ~name_intermediate_parent_1, ~id_ultimate_parent, ~name_ultimate_parent, ~loan_size_outstanding, ~loan_size_outstanding_currency, ~loan_size_credit_limit, ~loan_size_credit_limit_currency, ~sector_classification_system, ~sector_classification_input_type, ~sector_classification_direct_loantaker, ~fi_type, ~flag_project_finance_loan, ~name_project, ~lei_direct_loantaker, ~isin_direct_loantaker,
"L1", "DL1", "company A", NA, NA, "UP16", "company A", 225626, "EUR", 18968806, "EUR", "NACE", "Code", 3511, "Loan", "No", NA, NA, NA,
"L2", "DL1", "company A", NA, NA, "UP16", "company A", 4321, "EUR", 44333, "EUR", "NACE", "Code", 3511, "Loan", "No", NA, NA, NA,
)
# ald of one company
ald <- tibble::tribble(
~name_company, ~sector, ~technology, ~year, ~production, ~production_unit, ~emission_factor, ~ald_emission_factor_unit, ~plant_location, ~is_ultimate_owner, ~ald_timestamp,
"company A", "power", "coalcap", 2020, 50, "MW", NA, NA, "IT", TRUE, "2019Q4"
)
# load scenerio file
scenario <- r2dii.data::scenario_demo_2020
scenario <- scenario %>% mutate(scenario_source = "weo_2019")
# load region file
region <- r2dii.data::region_isos
# match ALD to lbk
match_file <- match_name(lbk,ald)
# prooritize matches
lbk_ready <- prioritize(match_file)
# run company level results
company_results <- target_market_share(lbk_ready, ald, scenario, region, by_company = TRUE, weight_production = FALSE)
# expected value for company level results should be the total production present in the ad for a given company
expected <- ald$production
# the actual value is outputed in the company level results output and recorded as the metric = projected with all the respective filters
actual <- company_results %>% filter(
region == "global",
scenario_source == "weo_2019",
technology == "coalcap",
metric == "projected"
)
# logical test to see is expected = actual
identical(actual$production, expected)
#> [1] FALSE
Created on 2020-12-23 by the reprex package (v0.3.0)
Awesome reprex @georgeharris2deg! I see the problem is exposed only when loanbook has the column loan_size_credit_limit
, or loan_size_outstanding
, or both.
Does this give any clue about what the solution might be?
library(dplyr, warn.conflicts = FALSE)
library(r2dii.data)
packageVersion("r2dii.data")
#> [1] '0.1.6'
library(r2dii.match)
packageVersion("r2dii.match")
#> [1] '0.0.8'
library(r2dii.analysis)
packageVersion("r2dii.analysis")
#> [1] '0.1.3'
# Helpers to run all code with different loanbooks -- used at the --------
target_market_share_by_company_unweighted <- function(lbk, ald) {
target_market_share(
lbk, ald,
scenario = mutate(scenario_demo_2020, scenario_source = "weo_2019"),
region_isos = region_isos,
by_company = TRUE,
weight_production = FALSE
)
}
pick_global_projections_for_coalcap <- function(data) {
data %>%
filter(
region == "global",
scenario_source == "weo_2019",
technology == "coalcap",
metric == "projected"
)
}
testit <- function(lbk, ald) {
lbk %>%
match_name(ald) %>%
prioritize() %>%
target_market_share_by_company_unweighted(ald) %>%
pick_global_projections_for_coalcap()
}
# Minial data -------------------------------------------------------------
lbk <- tribble(
# bad # bad
~id_loan, ~loan_size_credit_limit, ~loan_size_outstanding, ~id_ultimate_parent, ~name_ultimate_parent, ~id_direct_loantaker, ~name_direct_loantaker, ~sector_classification_system, ~sector_classification_direct_loantaker,
"L1", 18968806, 225626, "UP16", "comp a", "DL1", "comp a", "NACE", 3511,
"L2", 44333, 4321, "UP16", "comp a", "DL1", "comp a", "NACE", 3511
)
ald <- tribble(
~name_company, ~sector, ~is_ultimate_owner, ~plant_location, ~technology, ~year, ~production,
"comp a", "power", TRUE, "IT", "coalcap", 2020, 50
)
# Expose the issue --------------------------------------------------------
# The issue is exposed only when loanbook has one or both of these columns:
# * loan_size_credit_limit
# * loan_size_outstanding
good <- lbk %>% select(-loan_size_credit_limit, -loan_size_outstanding) %>% testit(ald)
identical(good$production, ald$production)
#> [1] TRUE
bad_both <- lbk %>% testit(ald)
identical(bad_both$production, ald$production)
#> [1] FALSE
bad_credit_limit <- lbk %>% select(-loan_size_credit_limit) %>% testit(ald)
identical(bad_credit_limit$production, ald$production)
#> [1] FALSE
bad_outstanding <- lbk %>% select(-loan_size_outstanding) %>% testit(ald)
identical(bad_outstanding$production, ald$production)
#> [1] FALSE
# Does this give any clue about what the solution might be?
Created on 2020-12-23 by the reprex package (v0.3.0)
Hmmm not really I am afraid - not that I can think of anyway. To add context in most loan books they will have both loan_size_credit_limit and loan_size_outstanding. In the r2dii.analysis::targte_market_share function the default will be to calculate the weighting for the portfolio level results i.e. when the argument by_company = FALSE (also the default) using the loan_size_outstanding. However, if a user would like to use the loan_size_credit_limit they can set the argument "credit_limit = TRUE"
Despite this, the company level results i.e By_company =TRUE should not use either loan_size_credit_limit or the loan_size_outstanding as it should be outputting an unweighted value - i.e the expected in the reprex above.
I hope that makes sense and please let me know if not.
Thanks
--
What is the issue you need help with?
Make a reprex for https://github.com/2DegreesInvesting/r2dii.analysis/issues/255.
Can you provide a reproducible example? Why not?
We plan to develop the reprex during the meeting.
Problem:
Questions:
Resources
Debugging techniques.
cc' @georgeharris2deg