Open cjyetman opened 1 year ago
We recently learned that the masterdata_debt
CSV is actually showing direct ownership, not rolled up financial control nor credit parent control. Comparing "Direct Ownership" in the Advanced Indicators XLS to the masterdata_debt
data gives a much better picture, but we would still rely on the masterdata_debt
file to get the credit parent ID to do the credit parent roll-up we currently rely on.
library(tidyverse)
library(pacta.data.preparation)
ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")
ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)
# -------------------------------------------------------------------------
direct_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Direct Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2016 = sum(`2016`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_debt %>%
filter(company_id %in% ar_advanced_company_indicators$company_id) %>%
group_by(sector) %>%
summarise(old_2016 = sum(`_2016`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2016 = if_else(is.na(new_2016), as.numeric(0), as.numeric(new_2016)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2016 - new_2016) %>%
mutate(percent_diff = round(diff / old_2016 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2016 old_n new_2016 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 3.24e12 1302 3.24e12 1302 -1.46e-3 0
#> 2 Cement 2.89e 9 1459 2.73e 9 1448 1.53e+8 5.29
#> 3 Coal 6.31e 9 1349 6.31e 9 1359 0 0
#> 4 HDV 0 283 0 283 0 NaN
#> 5 LDV 9.36e 7 194 9.36e 7 228 0 0
#> 6 Oil&Gas 3.06e11 2864 3.06e11 2864 -2.48e-2 0
#> 7 Power 6.12e 6 29582 6.12e 6 29611 -8.47e-2 0
#> 8 Shipping 1.70e14 9844 1.70e14 9846 -4.38e-1 0
#> 9 Steel 1.66e 9 757 1.66e 9 757 -3.81e-3 0
The bespoke masterdata_*
files seem to include data for Cement - Grinding assets, while the Advanced Company Indicators does not. Filtering those rows out of the masterdata_*
files appears to remove the discrepancies apart from very tiny, probable rounding errors and Shipping discrepancy in Equity Ownership (small relative to the sector (% diff), but still substantial). There are also still discrepancies in the number of rows in some sectors which I still do not have an explanation for.
library(tidyverse)
library(pacta.data.preparation)
ar_data_path <- "~/Documents/Data/Asset Resolution/2022-08-15_AR_2021Q4"
ar_advanced_company_indicators_path <- file.path(ar_data_path, "2022-08-24_AR_2021Q4_RMI-Company-Indicators.xlsx")
masterdata_debt_path <- file.path(ar_data_path, "2022-10-05_rmi_masterdata_debt_2021q4.csv")
masterdata_ownership_path <- file.path(ar_data_path, "2022-08-15_rmi_masterdata_ownership_2021q4.csv")
ar_advanced_company_indicators <- import_ar_advanced_company_indicators(ar_advanced_company_indicators_path, fix_names = TRUE)
masterdata_debt <- readr::read_csv(masterdata_debt_path, na = "", show_col_types = FALSE)
masterdata_ownership <- readr::read_csv(masterdata_ownership_path, na = "", show_col_types = FALSE)
# -------------------------------------------------------------------------
equity_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Equity Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_ownership %>%
filter(technology != "Grinding") %>%
group_by(sector) %>%
summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(equity_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2022 - new_2022) %>%
mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2022 old_n new_2022 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 1.00e13 1820 1.00e13 1820 1.17e-2 0
#> 2 Cement 4.67e 9 2035 4.67e 9 2035 -6.73e-4 0
#> 3 Coal 1.15e10 2051 1.15e10 2061 -1.07e-2 0
#> 4 HDV 9.22e 6 575 9.22e 6 575 -6.36e-2 0
#> 5 LDV 1.26e 8 394 1.26e 8 435 -3.52e-1 0
#> 6 Oil&Gas 7.82e11 4568 7.82e11 4563 2.14e-2 0
#> 7 Power 1.60e 7 36917 1.60e 7 36875 -4.49e-1 0
#> 8 Shipping 2.69e14 11167 2.69e14 11169 3.06e+9 0
#> 9 Steel 2.75e 9 1105 2.75e 9 1105 -5.96e-3 0
# -------------------------------------------------------------------------
direct_ownership_data <-
ar_advanced_company_indicators %>%
filter(consolidation_method == "Direct Ownership") %>%
filter(value_type == "production") %>%
filter(
asset_sector == "Aviation" & activity_unit %in% c("pkm", "tkm") |
asset_sector == "Cement" & activity_unit == "t cement" |
asset_sector == "Coal" & activity_unit == "t coal" |
asset_sector == "HDV" & activity_unit == "# vehicles" |
asset_sector == "LDV" & activity_unit == "# vehicles" |
asset_sector == "Oil&Gas" & activity_unit == "GJ" |
asset_sector == "Power" & activity_unit == "MW" |
asset_sector == "Shipping" & activity_unit == "dwt km" |
asset_sector == "Steel" & activity_unit == "t steel"
) %>%
pivot_wider(names_from = "year", values_fill = 0) %>%
group_by(asset_sector) %>%
summarise(new_2022 = sum(`2022`, na.rm = TRUE), new_n = length(unique(company_id)))
masterdata_debt %>%
filter(technology != "Grinding") %>%
group_by(sector) %>%
summarise(old_2022 = sum(`_2022`, na.rm = TRUE), old_n = length(unique(company_id))) %>%
full_join(direct_ownership_data, by = c(sector = "asset_sector")) %>%
mutate(
new_2022 = if_else(is.na(new_2022), as.numeric(0), as.numeric(new_2022)),
new_n = if_else(is.na(new_n), as.numeric(0), as.numeric(new_n))
) %>%
mutate(diff = old_2022 - new_2022) %>%
mutate(percent_diff = round(diff / old_2022 * 100, digits = 2))
#> # A tibble: 9 × 7
#> sector old_2022 old_n new_2022 new_n diff percent_diff
#> <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl>
#> 1 Aviation 5.35e12 1302 5.35e12 1302 -0.00684 0
#> 2 Cement 2.74e 9 1448 2.74e 9 1448 0 0
#> 3 Coal 6.62e 9 1349 6.62e 9 1359 0 0
#> 4 HDV 3.63e 6 283 3.63e 6 283 0 0
#> 5 LDV 8.56e 7 194 8.56e 7 228 0 0
#> 6 Oil&Gas 3.51e11 2869 3.51e11 2864 0.0242 0
#> 7 Power 7.59e 6 29641 7.59e 6 29611 -0.122 0
#> 8 Shipping 1.79e14 9844 1.79e14 9846 -0.0625 0
#> 9 Steel 1.69e 9 757 1.69e 9 757 -0.00299 0
Blocked: Until AI adds a new consolidation method to their advanced company indicators, which likely will happen in Q3 this year Consolidation method is likely to be called "Credit Risk Parent" consolidation method, or something similar.
Especially with the recent change with AR, we're probably going to eventually need to switch to their new format. We should investigate the differences and see if we can switch to using the new format.