KeyError when creating NDC databases

marcosdbw commented 2 years ago

Hello, I am running the most updated version of premise. When I try to create SSP2-NDC databases, an error appears when extracting the IAM Data (as shown below). I did not have any issues when creating databases for Base, PkBudg, etc using ecoinvent 3.6. Could you please help me to solve this? Thanks again!

//////////////////// EXTRACTING SOURCE DATABASE //////////////////// Done!

////////////////// IMPORTING DEFAULT INVENTORIES /////////////////// Done!

/////////////////////// EXTRACTING IAM DATA ////////////////////////

KeyError Traceback (most recent call last) Input In [17], in 3 from premise import * 4 import brightway2 as bw ----> 6 ndb = NewDatabase( 7 scenarios=[ 8 {"model":"remind", "pathway":"SSP2-NDC", "year":2050} 9 ], 10 source_db="ecoinvent_3.6cutoff", # <-- name of the database in the BW2 project. Must be a string. 11 source_version="3.6", # <-- version of ecoinvent. Can be "3.5", "3.6", "3.7" or "3.7.1". Must be a string. 12 key='' # <-- decryption key 13 # to be requested from the library maintainers if you want ot use default scenarios included in premise 14 ) 15 16 ndb.update_all()

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\ecoinvent_modification.py:549, in NewDatabase.init(self, scenarios, source_version, source_type, key, source_db, source_file_path, additional_inventories, system_model, time_horizon, use_cached_inventories, use_cached_database) 546 print("\n/////////////////////// EXTRACTING IAM DATA ////////////////////////") 548 for scenario in self.scenarios: --> 549 scenario["external data"] = IAMDataCollection( 550 model=scenario["model"], 551 pathway=scenario["pathway"], 552 year=scenario["year"], 553 filepath_iam_files=scenario["filepath"], 554 key=key, 555 system_model=self.system_model, 556 time_horizon=self.time_horizon, 557 ) 558 scenario["database"] = copy.deepcopy(self.database) 560 print("Done!")

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\data_collection.py:173, in IAMDataCollection.init(self, model, pathway, year, filepath_iam_files, key, system_model, time_horizon) 170 self.gnr_data = get_gnr_data() 172 self.electricity_markets = self.get_iam_electricity_markets(data=data) --> 173 self.fuel_markets = self.get_iam_fuel_markets(data=data) 175 prod_vars = self.__get_iam_variable_labels(IAM_ELEC_VARS, key="iam_aliases") 176 prod_vars.update( 177 self.__get_iam_variable_labels(IAM_FUELS_VARS, key="iam_aliases") 178 )

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\data_collection.py:948, in IAMDataCollection.__get_iam_fuel_markets(self, data) 939 raise KeyError( 940 f"{self.year} is outside of the boundaries " 941 f"of the IAM file: {data.year.values.min()}-{data.year.values.max()}" 942 ) 944 # Finally, if the specified year falls in between two periods provided by the IAM 945 # sometimes, the World region is either neglected 946 # or wrongly evaluated so we fix that here --> 948 data.loc[dict(region="World", variables=list_technologies)] = data.loc[ 949 dict( 950 region=[r for r in data.coords["region"].values if r != "World"], 951 variables=list_technologies, 952 ) 953 ].sum(dim="region") 955 # Interpolation between two periods 956 data_to_return = data.loc[:, list_technologies, :]

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\dataarray.py:198, in _LocIndexer.getitem(self, key) 196 labels = indexing.expanded_indexer(key, self.data_array.ndim) 197 key = dict(zip(self.data_array.dims, labels)) --> 198 return self.data_array.sel(key)

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\dataarray.py:1328, in DataArray.sel(self, indexers, method, tolerance, drop, indexers_kwargs) 1219 def sel( 1220 self, 1221 indexers: Mapping[Any, Any] = None, (...) 1225 indexers_kwargs: Any, 1226 ) -> DataArray: 1227 """Return a new DataArray whose data is given by selecting index 1228 labels along the specified dimension(s). 1229 (...) 1326 Dimensions without coordinates: points 1327 """ -> 1328 ds = self._to_temp_dataset().sel( 1329 indexers=indexers, 1330 drop=drop, 1331 method=method, 1332 tolerance=tolerance, 1333 **indexers_kwargs, 1334 ) 1335 return self._from_temp_dataset(ds)

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\dataset.py:2500, in Dataset.sel(self, indexers, method, tolerance, drop, **indexers_kwargs) 2439 """Returns a new dataset with each array indexed by tick labels 2440 along the specified dimension(s). 2441 (...) 2497 DataArray.sel 2498 """ 2499 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "sel") -> 2500 pos_indexers, new_indexes = remap_label_indexers( 2501 self, indexers=indexers, method=method, tolerance=tolerance 2502 ) 2503 # TODO: benbovy - flexible indexes: also use variables returned by Index.query 2504 # (temporary dirty fix). 2505 new_indexes = {k: v[0] for k, v in new_indexes.items()}

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\coordinates.py:421, in remap_label_indexers(obj, indexers, method, tolerance, **indexers_kwargs) 414 indexers = either_dict_or_kwargs(indexers, indexers_kwargs, "remap_label_indexers") 416 v_indexers = { 417 k: v.variable.data if isinstance(v, DataArray) else v 418 for k, v in indexers.items() 419 } --> 421 pos_indexers, new_indexes = indexing.remap_label_indexers( 422 obj, v_indexers, method=method, tolerance=tolerance 423 ) 424 # attach indexer's coordinate to pos_indexers 425 for k, v in indexers.items():

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\indexing.py:121, in remap_label_indexers(data_obj, indexers, method, tolerance) 119 for dim, index in indexes.items(): 120 labels = grouped_indexers[dim] --> 121 idxr, new_idx = index.query(labels, method=method, tolerance=tolerance) 122 pos_indexers[dim] = idxr 123 if new_idx is not None:

File ~\Miniconda3\envs\bw2\lib\site-packages\xarray\core\indexes.py:247, in PandasIndex.query(self, labels, method, tolerance) 245 indexer = get_indexer_nd(self.index, label, method, tolerance) 246 if np.any(indexer < 0): --> 247 raise KeyError(f"not all values found in index {coord_name!r}") 249 return indexer, None

KeyError: "not all values found in index 'variables'"

romainsacchi commented 2 years ago

Hi, thanks for spotting this.

It seems the NDC scenario, being a bit old, does not have some of the necessary variables. I just push a commit that adds warning messages when the variables are missing.

/////////////////////// EXTRACTING IAM DATA ////////////////////////
The following variables cannot be found in the IAM file: ['SE|Gases|Non-Biomass']
The process continues with the remaining variables, but certain transformation functions may not work.
The following variables cannot be found in the IAM file: ['SE|Gases|Non-Biomass', 'Production|Industry|Cement', 'Production|Industry|Steel|Primary', 'Production|Industry|Steel|Secondary']
The process continues with the remaining variables, but certain transformation functions may not work.
No efficiency variables is given for the cement sector.
No efficiency variables is given for the primary steel sector.
No efficiency variables is given for the secondary steel sector.
Done!

Consequently, certain transformation, like update_steel() or update_cement() won't work with this scenario. A solution would be to ask REMIND's team to provide us with an updated NDC scenario.

marcosdbw commented 2 years ago

Hi @romainsacchi, thank you very much for your help. I will install premise again and run the NDC scenarios.

romainsacchi commented 2 years ago

Yes, but if you use the NDC scenario, you cannot run update_all(), since update_cement() and update_steel(), which are triggered by update_all(), will fail. As I see it, you can only run update_electricity() and update_fuels().

marcosdbw commented 2 years ago

Got it, thanks, @romainsacchi !

marcosdbw commented 2 years ago

Hi. After installing premise again (using conda install -c romainsacchi premise), there is still an error appearing when running the code for NDC scenario, for update_fuels(), as described below. thanks!

IndexError Traceback (most recent call last) Input In [10], in

---> 21 ndb.update_fuels() 22 #ndb.update_solar_PV() 23 #ndb.update_cars() 24 #ndb.update_trucks() 25 # ndb.update_cement() 26 #ndb.update_steel()

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\ecoinvent_modification.py:756, in NewDatabase.update_fuels(self) 746 if "exclude" not in scenario or "update_fuels" not in scenario["exclude"]: 748 fuels = Fuels( 749 database=scenario["database"], 750 iam_data=scenario["external data"], (...) 754 version=self.version, 755 ) --> 756 fuels.generate_fuel_markets() 757 scenario["database"] = fuels.database

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1444, in Fuels.generate_fuel_markets(self) 1440 """Create new fuel supply chains 1441 and update existing fuel markets""" 1443 # Create new fuel supply chains -> 1444 self.generate_fuel_supply_chains() 1446 print("Generate new fuel markets.") 1448 # we start by creating region-specific "diesel, burned in" markets

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1437, in Fuels.generate_fuel_supply_chains(self) 1435 # biofuels 1436 print("Generate region-specific biofuel supply chains.") -> 1437 self.generate_biofuel_activities()

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1120, in Fuels.generate_biofuel_activities(self) 1109 for name in names: 1111 prod_label = [ 1112 l 1113 for l in self.fuel_labels (...) 1117 ) 1118 ][0] -> 1120 new_ds = self.fetch_proxies( 1121 name=name, 1122 ref_prod=" ", 1123 production_variable=prod_label, 1124 relink=True, 1125 regions=regions, 1126 ) 1128 for region in regions: 1129 1130 # if this is a fuel production activity 1131 # we need to adjust the process efficiency 1132 if any( 1133 i in new_ds[region]["name"] 1134 for i in ["Ethanol production", "Biodiesel production"] 1135 ):

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\transformation.py:375, in BaseTransformation.fetch_proxies(self, name, ref_prod, production_variable, relink, regions) 356 def fetch_proxies( 357 self, name, ref_prod, production_variable=None, relink=True, regions=None 358 ) -> Dict[str, dict]: 359 """ 360 Fetch dataset proxies, given a dataset name and reference product. 361 Store a copy for each IAM region. (...) 372 :return: dictionary with IAM regions as keys, proxy datasets as values. 373 """ --> 375 d_iam_to_eco = self.region_to_proxy_dataset_mapping( 376 name=name, ref_prod=ref_prod, regions=regions 377 ) 379 d_act = {} 381 ds_name, ds_ref_prod = [None, None]

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\transformation.py:352, in BaseTransformation.region_to_proxy_dataset_mapping(self, name, ref_prod, regions) 350 fallback_loc = "GLO" 351 else: --> 352 fallback_loc = list(d_map.values())[0] 354 return {region: d_map.get(region, fallback_loc) for region in regions}

IndexError: list index out of range

Lmachinferrero commented 2 years ago

Hi. After installing premise again (using conda install -c romainsacchi premise),

there is still an error appearing when running the code for NDC scenario, for update_fuels(), as described below. thanks! IndexError Traceback (most recent call last) Input In [10], in

---> 21 ndb.update_fuels() 22 #ndb.update_solar_PV() 23 #ndb.update_cars() 24 #ndb.update_trucks() 25 # ndb.update_cement() 26 #ndb.update_steel()

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\ecoinvent_modification.py:756, in NewDatabase.update_fuels(self) 746 if "exclude" not in scenario or "update_fuels" not in scenario["exclude"]: 748 fuels = Fuels( 749 database=scenario["database"], 750 iam_data=scenario["external data"], (...) 754 version=self.version, 755 ) --> 756 fuels.generate_fuel_markets() 757 scenario["database"] = fuels.database

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1444, in Fuels.generate_fuel_markets(self) 1440 """Create new fuel supply chains 1441 and update existing fuel markets""" 1443 # Create new fuel supply chains -> 1444 self.generate_fuel_supply_chains() 1446 print("Generate new fuel markets.") 1448 # we start by creating region-specific "diesel, burned in" markets

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1437, in Fuels.generate_fuel_supply_chains(self) 1435 # biofuels 1436 print("Generate region-specific biofuel supply chains.") -> 1437 self.generate_biofuel_activities()

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\fuels.py:1120, in Fuels.generate_biofuel_activities(self) 1109 for name in names: 1111 prod_label = [ 1112 l 1113 for l in self.fuel_labels (...) 1117 ) 1118 ][0] -> 1120 new_ds = self.fetch_proxies( 1121 name=name, 1122 ref_prod=" ", 1123 production_variable=prod_label, 1124 relink=True, 1125 regions=regions, 1126 ) 1128 for region in regions: 1129 1130 # if this is a fuel production activity 1131 # we need to adjust the process efficiency 1132 if any( 1133 i in new_ds[region]["name"] 1134 for i in ["Ethanol production", "Biodiesel production"] 1135 ):

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\transformation.py:375, in BaseTransformation.fetch_proxies(self, name, ref_prod, production_variable, relink, regions) 356 def fetch_proxies( 357 self, name, ref_prod, production_variable=None, relink=True, regions=None 358 ) -> Dict[str, dict]: 359 """ 360 Fetch dataset proxies, given a dataset name and reference product. 361 Store a copy for each IAM region. (...) 372 :return: dictionary with IAM regions as keys, proxy datasets as values. 373 """ --> 375 d_iam_to_eco = self.region_to_proxy_dataset_mapping( 376 name=name, ref_prod=ref_prod, regions=regions 377 ) 379 d_act = {} 381 ds_name, ds_ref_prod = [None, None]

File ~\Miniconda3\envs\bw2\lib\site-packages\premise\transformation.py:352, in BaseTransformation.region_to_proxy_dataset_mapping(self, name, ref_prod, regions) 350 fallback_loc = "GLO" 351 else: --> 352 fallback_loc = list(d_map.values())[0] 354 return {region: d_map.get(region, fallback_loc) for region in regions}

IndexError: list index out of range

Hi! Did you fix the error? or find a solution?

marcosdbw commented 2 years ago

Hi @Lmachinferrero . Nope, the error still appears.

romainsacchi commented 2 years ago

Hi @Lmachinferrero, there's no really fix or solution to this issue, as it is stemming from the fact that the NDC scenarios shipped with premise do not contain the necessary data to operate all the transforming functions. Hence, only update_electricity() works with this scenario. We depend on IAM developers to provide us with updated scenario files.

polca / premise

KeyError when creating NDC databases #61

/////////////////////// EXTRACTING IAM DATA ////////////////////////

Hi. After installing premise again (using conda install -c romainsacchi premise), there is still an error appearing when running the code for NDC scenario, for update_fuels(), as described below. thanks!

Hi. After installing premise again (using conda install -c romainsacchi premise),