Open dt-woods opened 1 year ago
The List of Why's
- Why are we using esupy's Path in globals.py?
Our team moved all StEWI path management to our common esupy library which serves all the tools in our ecosystems. Most likely this is an update to provide the same functionality as before from StEWI
Sure, @WesIngwersen, but it's only good if it works. See error message below:
>>> upstream_df = electricitylci.get_upstream_process_df(
... config.model_specs.eia_gen_year)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
File "~/ElectricityLCI/electricitylci/__init__.py", line 301, in get_upstream_process_df
from electricitylci.combinator import concat_map_upstream_databases
File "~/ElectricityLCI/electricitylci/combinator.py", line 4, in <module>
import electricitylci.generation as gen
File "~/ElectricityLCI/electricitylci/generation.py", line 21, in <module>
from electricitylci.elementaryflows import map_emissions_to_fedelemflows
File "~/ElectricityLCI/electricitylci/elementaryflows.py", line 2, in <module>
import fedelemflowlist
File "~/Envs/ebm/lib/python3.11/site-packages/fedelemflowlist/__init__.py", line 11, in <module>
from fedelemflowlist.flowlist import read_in_flowclass_file
File "~/Envs/ebm/lib/python3.11/site-packages/fedelemflowlist/flowlist.py", line 8, in <module>
from fedelemflowlist.globals import log, inputpath, flow_list_specs,\
File "~/Envs/ebm/lib/python3.11/site-packages/fedelemflowlist/globals.py", line 23, in <module>
fedefl_path.local_path + "/fedelemflowlist/")
~~~~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~
TypeError: unsupported operand type(s) for +: 'PosixPath' and 'str'
Current status of testing ELCI_1.
eia_trans_dist_grid_loss
is having trouble matching dictionary for:
Testing 'Electricity - HYDRO - Alcoa Power Generating, Inc. - Yadkin Division' in olcaschema_genmix
in generation_mix.py.
Why are there two Carbon dioxide emissions to air (internalIds 1 and 2)? I thought aggregation included combining same/similar emissions. Same concern with methane (internalIds 4 and 5).
>>> matching_dict['exchanges']
[{'amount': 5.3867483297713465e-21,
'description': 'ap42, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'b6f010fb-a764-3063-af2d-bcb8309a97b7',
'category': 'Elementary Flows/emission/air',
'name': 'Carbon dioxide'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 1,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 4.310418323231294,
'description': 'netl, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'b6f010fb-a764-3063-af2d-bcb8309a97b7',
'category': 'Elementary Flows/emission/air',
'name': 'Carbon dioxide'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 2,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 9740.278957008288,
'description': 'eGRID, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': '8c8fb17a-8d64-3fd0-9a4c-9acbca4abe07',
'category': 'Elementary Flows/resource',
'name': 'Heat'},
'flowProperty': {'@type': 'FlowProperty',
'@id': 'f6811440-ee37-11de-8a39-0800200c9a66',
'category': 'Technical flow properties',
'name': 'Energy'},
'internalId': 3,
'isAvoidedProduct': False,
'isInput': True,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '52765a6c-3896-43c2-b2f4-c679acf13efe',
'name': 'MJ'}},
{'amount': 5.3867483297713465e-21,
'description': 'ap42, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'aab83476-ec6c-3742-af85-15d320b7ce80',
'category': 'Elementary Flows/emission/air',
'name': 'Methane'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 4,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 0.009277911629821063,
'description': 'netl, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'aab83476-ec6c-3742-af85-15d320b7ce80',
'category': 'Elementary Flows/emission/air',
'name': 'Methane'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 5,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 5.3867483297713465e-21,
'description': 'ap42, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': '4382ba18-dd21-3837-80b2-94283ef5490e',
'category': 'Elementary Flows/emission/air',
'name': 'Nitrogen oxides'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 6,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 5.3867483297713465e-21,
'description': 'ap42, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'cfee0524-7ad6-300b-b050-6249135a2492',
'category': 'Elementary Flows/emission/air',
'name': 'Nitrous oxide'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 7,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 5.3867483297713465e-21,
'description': 'ap42, 2016',
'dqEntry': '(1;1;1;1;2)',
'flow': {'@type': 'Flow',
'@id': 'f4973035-59f5-3bdc-b257-b274dcc04e0f',
'category': 'Elementary Flows/emission/air',
'name': 'Sulfur dioxide'},
'flowProperty': {'@type': 'FlowProperty',
'@id': '93a60a56-a3c8-11da-a746-0800200b9a66',
'category': 'Technical flow properties',
'name': 'Mass'},
'internalId': 8,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': False,
'unit': {'@type': 'Unit',
'@id': '20aadc24-a391-41cf-b340-3e4529f44bde',
'name': 'kg'}},
{'amount': 1.0,
'flow': {'@type': 'Flow',
'@id': 'fc406690-160c-37d5-bf36-added9542164',
'category': 'Technosphere Flows/22: Utilities/2211: Electric Power Generation, Transmission and Distribution',
'name': 'Electricity, AC, 2300-7650 V'},
'flowProperty': {'@type': 'FlowProperty',
'@id': 'f6811440-ee37-11de-8a39-0800200c9a66',
'category': 'Technical flow properties',
'name': 'Energy'},
'internalId': 9,
'isAvoidedProduct': False,
'isInput': False,
'isQuantitativeReference': True,
'unit': {'@type': 'Unit',
'@id': '92e3bd49-8ed5-4885-9db6-fc88c7afcfcb',
'name': 'MWh'}}]
Regarding the "duplicate" emissions - those emissions are from two different sources. The reported emissions from the hydro plants are 0, converted here to X x 10^-21 to avoid issues with generating log-normal distributions.
Thet NETL-sourced data is from our own analysis of hydro plants.
I think because I wanted to make clear that the non-zero emissions were coming from an NETL analysis, I left this as-is. I'm open to changing. Can maybe add something to the hydro process metadata that describes all of this.
My guess as to the missing BAAs, etc. is that the BA_Codes_930.xlsx needs an update. Some of the BAAs have been retired since the 2016 results. That's at least a first pass.
I'm still testing ELCI_1, so it's the 2016 data that's not working.
My guess as to the missing BAAs, etc. is that the BA_Codes_930.xlsx needs an update. Some of the BAAs have been retired since the 2016 results. That's at least a first pass.
I see "BA_Codes_930.xlsx" referenced in combinatory.py to create the ba_codes data frame, which is used for mapping names to codes, and in eia_io_trading.py to create the US and NA balancing authority data frames. I only see "US" and "Canada" worksheets being referenced.
In the latest EIA-930 data reference tables you linked to has these data in a single worksheet, "BAs" that includes US, CAN, and MEX. There are activation and retirement dates that could be used to filter the list based on the analysis year (or eGRID/EIA year).
I can create a method for retrieving this Excel workbook, rather than use the dated workbook provided in the data folder, and write methods for filtering the BAs so they can be easily parsed as US and non-US. Is that the path you want to take?
I'd say it's probably the best way forward - provided they don't move/change that excel workbook!
Regarding the "duplicate" emissions - those emissions are from two different sources. The reported emissions from the hydro plants are 0, converted here to X x 10^-21 to avoid issues with generating log-normal distributions.
Thet NETL-sourced data is from our own analysis of hydro plants.
I think because I wanted to make clear that the non-zero emissions were coming from an NETL analysis, I left this as-is. I'm open to changing. Can maybe add something to the hydro process metadata that describes all of this.
Okay. So, it's intentional and it's not the problem. I see it now in openLCA:
I'd say it's probably the best way forward - provided they don't move/change that excel workbook!
I can make it an untracked data file in the repository, so the user only needs to generate it once, it'll stay local to limit the number (and need) for internet connection.
I'm surprised we don't have something from stewi that could be used to get a full list of BA names and codes for a given generation year!
Regarding the "duplicate" emissions - those emissions are from two different sources. The reported emissions from the hydro plants are 0, converted here to X x 10^-21 to avoid issues with generating log-normal distributions.
Regarding those log-normal distributions, they feel a little bogus. I'm seeing about 20% success rate in their calculation and I'm not sure what uncertainty they are capturing.
Hit a point of constant segmentation faults, which appeared to be linked to combinator.py. Tried importing required modules and got errors during stewi phase. Deleted all facilitymatcher, stewi, and stewicombo folders and hit the following error message (after including the download_if_missing
parameter).
The linkage is:
2024-01-09 16:24:58.377:INFO:processed_data_mgmt:load_preprocessed_output:Returning /Users/davi/Library/Application Support/facilitymatcher/FRS Data Files/NATIONAL_ENVIRONMENTAL_INTEREST_FILE.CSV
---------------------------------------------------------------------------
UnicodeDecodeError Traceback (most recent call last)
Cell In[5], line 1
----> 1 gen_df, gen_dict = run_generation()
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/main.py:168, in run_generation()
162 if config.model_specs.include_upstream_processes is True:
163 # Create dataframe with all generation process data; includes
164 # upstream and Canadian data.
165 # NOTE: Only nuclear ('NUC') stage codes have electricity data;
166 # all others are nans.
167 logging.info("get upstream process")
--> 168 upstream_df = get_upstream_process_df(config.model_specs.eia_gen_year)
169 logging.info("write upstream process to dict")
170 upstream_dict = write_upstream_process_database_to_dict(upstream_df)
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/__init__.py:450, in get_upstream_process_df(eia_gen_year)
448 import electricitylci.nuclear_upstream as nuke
449 import electricitylci.power_plant_construction as ppc
--> 450 import electricitylci.combinator as combine
452 logging.info("Generating upstream inventories...")
453 coal_df = coal.generate_upstream_coal(eia_gen_year)
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/combinator.py:18
16 from electricitylci.model_config import model_specs
17 from electricitylci.eia860_facilities import eia860_balancing_authority
---> 18 from electricitylci.generation import add_temporal_correlation_score
19 import fedelemflowlist as fedefl
22
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/generation.py:42
40 from electricitylci.utils import make_valid_version_num
41 from electricitylci.utils import set_dir
---> 42 from electricitylci.egrid_filter import (
43 electricity_for_selected_egrid_facilities,
44 emissions_and_waste_for_selected_egrid_facilities,
45 )
46 from electricitylci.egrid_emissions_and_waste_by_facility import (
47 emissions_and_wastes_by_facility,
48 )
49 import facilitymatcher.globals as fmglob # package under development
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/egrid_filter.py:24
15 from electricitylci.egrid_facilities import (
16 egrid_facilities,
17 list_facilities_w_percent_generation_from_primary_fuel_category_greater_than_min
18 )
19 from electricitylci.egrid_energy import (
20 list_egrid_facilities_with_positive_generation,
21 list_egrid_facilities_in_efficiency_range,
22 egrid_net_generation
23 )
---> 24 from electricitylci.egrid_emissions_and_waste_by_facility import (
25 emissions_and_wastes_by_facility
26 )
27 from electricitylci.egrid_FRS_matches import list_FRS_ids_filtered_for_NAICS
File ~/Repositories/keylogiclca/ElectricityLCI/electricitylci/egrid_emissions_and_waste_by_facility.py:76
72 base_inventory = "RCRAInfo"
74 # HOTFIX: work-around ParseError [2023-12-19; TWD]
75 # Ref: https://github.com/USEPA/standardizedinventories/issues/151
---> 76 emissions_and_wastes_by_facility = cbi(
77 base_inventory,
78 model_specs.inventories_of_interest,
79 filter_for_LCI=True,
80 download_if_missing=True,
81 )
82 # Drop SRS fields
83 emissions_and_wastes_by_facility = emissions_and_wastes_by_facility.drop(
84 columns=['SRS_ID', 'SRS_CAS'])
File ~/Envs/ebm/lib/python3.11/site-packages/stewicombo/__init__.py:83, in combineInventoriesforFacilitiesinBaseInventory(base_inventory, inventory_dict, filter_for_LCI, remove_overlap, keep_sec_cntx, **kwargs)
69 """Combine stewi inventories for all facilities present in base_inventory.
70
71 The base_inventory must be in the inventory_dict
(...)
80 :return: Flow-By-Facility Combined Format
81 """
82 inventory_acronyms = list(inventory_dict.keys())
---> 83 facilitymatches = facilitymatcher.get_matches_for_inventories(
84 inventory_acronyms)
85 inventories = getInventoriesforFacilityMatches(inventory_dict,
86 facilitymatches,
87 filter_for_LCI,
88 base_inventory,
89 keep_sec_cntx=keep_sec_cntx,
90 **kwargs)
91 inventories = addChemicalMatches(inventories)
File ~/Envs/ebm/lib/python3.11/site-packages/facilitymatcher/__init__.py:20, in get_matches_for_inventories(inventory_list)
13 def get_matches_for_inventories(inventory_list=stewi_inventories):
14 """Return all facility matches for given inventories.
15
16 :param inventory_list: list of inventories for desired matches using
17 StEWI inventory names e.g. ['NEI','TRI']
18 :return: dataframe in FacilityMatches standard output format
19 """
---> 20 facilitymatches = get_fm_file('FacilityMatchList_forStEWI')
21 facilitymatches = filter_by_inventory_list(facilitymatches, inventory_list)
22 return facilitymatches
File ~/Envs/ebm/lib/python3.11/site-packages/facilitymatcher/globals.py:116, in get_fm_file(file_name, download_if_missing)
114 download_from_remote(file_meta, paths)
115 elif file_name == 'FacilityMatchList_forStEWI':
--> 116 write_fm.write_facility_matches()
117 elif file_name == 'FRS_NAICSforStEWI':
118 write_naics.write_NAICS_matches()
File ~/Envs/ebm/lib/python3.11/site-packages/facilitymatcher/WriteFacilityMatchesforStEWI.py:24, in write_facility_matches()
20 # Import FRS bridge which provides ID matches
21 col_dict = {'REGISTRY_ID': "str",
22 'PGM_SYS_ACRNM': "str",
23 'PGM_SYS_ID': "str"}
---> 24 FRS_Bridges = fmg.read_FRS_file(file, col_dict)
26 # Programs of interest
27 stewi_programs = fmg.get_programs_for_inventory_list(fmg.stewi_inventories)
File ~/Envs/ebm/lib/python3.11/site-packages/facilitymatcher/globals.py:74, in read_FRS_file(file_name, col_dict)
72 file_meta.name_data = strip_file_extension(file_meta.name_data)
73 file_meta.ext = 'csv'
---> 74 df = load_preprocessed_output(file_meta, paths)
75 df_FRS = pd.DataFrame()
76 for k, v in col_dict.items():
File ~/Envs/ebm/lib/python3.11/site-packages/esupy/processed_data_mgmt.py:51, in load_preprocessed_output(file_meta, paths)
49 if isinstance(f, Path):
50 log.info(f'Returning {f}')
---> 51 df = read_into_df(f)
52 return df
53 else:
File ~/Envs/ebm/lib/python3.11/site-packages/esupy/processed_data_mgmt.py:233, in read_into_df(fpath)
231 df = pd.read_parquet(fpath)
232 elif ext == '.csv':
--> 233 df = pd.read_csv(fpath)
234 elif ext == '.rds':
235 try:
File ~/Envs/ebm/lib/python3.11/site-packages/pandas/io/parsers/readers.py:948, in read_csv(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)
935 kwds_defaults = _refine_defaults_read(
936 dialect,
937 delimiter,
(...)
944 dtype_backend=dtype_backend,
945 )
946 kwds.update(kwds_defaults)
--> 948 return _read(filepath_or_buffer, kwds)
File ~/Envs/ebm/lib/python3.11/site-packages/pandas/io/parsers/readers.py:617, in _read(filepath_or_buffer, kwds)
614 return parser
616 with parser:
--> 617 return parser.read(nrows)
File ~/Envs/ebm/lib/python3.11/site-packages/pandas/io/parsers/readers.py:1748, in TextFileReader.read(self, nrows)
1741 nrows = validate_integer("nrows", nrows)
1742 try:
1743 # error: "ParserBase" has no attribute "read"
1744 (
1745 index,
1746 columns,
1747 col_dict,
-> 1748 ) = self._engine.read( # type: ignore[attr-defined]
1749 nrows
1750 )
1751 except Exception:
1752 self.close()
File ~/Envs/ebm/lib/python3.11/site-packages/pandas/io/parsers/c_parser_wrapper.py:234, in CParserWrapper.read(self, nrows)
232 try:
233 if self.low_memory:
--> 234 chunks = self._reader.read_low_memory(nrows)
235 # destructive to chunks
236 data = _concatenate_chunks(chunks)
File parsers.pyx:843, in pandas._libs.parsers.TextReader.read_low_memory()
File parsers.pyx:904, in pandas._libs.parsers.TextReader._read_rows()
File parsers.pyx:879, in pandas._libs.parsers.TextReader._tokenize_rows()
File parsers.pyx:890, in pandas._libs.parsers.TextReader._check_tokenize_status()
File parsers.pyx:2050, in pandas._libs.parsers.raise_parser_error()
UnicodeDecodeError: 'utf-8' codec can't decode byte 0xb0 in position 16484: invalid start byte
^^^ The error appears to be in the 1 GB CSV, NATIONAL_ENVIRONMENTAL_INTEREST_FILE.CSV
^^^ The error appears to be in the 1 GB CSV, NATIONAL_ENVIRONMENTAL_INTEREST_FILE.CSV
This is "new" right? You had gotten the 2016 data to run (existing ELCI_1) without hitting this correct? So this is something we run into with new data? Also interesting because I've run a 2020 result before and not run into this.
If this is a stewi issue we can post it there and I can take a closer look - especially if you can show a reproducible example within stewi/facilitymatcher
The road to Version 2.0:
The 2020 model config
Notes following the testing of ElectricityLCI development branch using the ELCI_2020_config.yml (2/22/24).
primary_fuel["primary fuel percent gen"].fillna(value=0, inplace=True)
in eia923_generation.pydatabase_reg = database_reg[database_filt]
in eia_io_trading.py:989233