I want to use a quantity to initialize a dataframe using a .loc indexer. I can use a floating point number in the expected way, but not a Quantity. What is the work-around?
import pandas as pd
import numpy as np
import pint_pandas
from pint import UnitRegistry
ureg = UnitRegistry()
Q_ = ureg.Quantity
ureg.define("Fe_ton = [produced_ton]")
ureg.define("CO2 = [emissions]")
hist_dict = {2009: {('US6293775085', 'Productions', 'Production'): np.nan, ('US6293775085', 'Emissions', 'S1'): Q_(np.nan, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(np.nan, 'CO2 * metric_ton')},
2010: {('US6293775085', 'Productions', 'Production'): np.nan, ('US6293775085', 'Emissions', 'S1'): Q_(1.65226001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(19599001.4, 'CO2 * metric_ton')},
2011: {('US6293775085', 'Productions', 'Production'): np.nan, ('US6293775085', 'Emissions', 'S1'): Q_(1.62028001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(17902001.4, 'CO2 * metric_ton')},
2012: {('US6293775085', 'Productions', 'Production'): np.nan, ('US6293775085', 'Emissions', 'S1'): Q_(1.58192001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(17256001.4, 'CO2 * metric_ton')},
2013: {('US6293775085', 'Productions', 'Production'): np.nan, ('US6293775085', 'Emissions', 'S1'): Q_(1.69000001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(21000001.4, 'CO2 * metric_ton')},
2014: {('US6293775085', 'Productions', 'Production'): Q_(91200001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.74000001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(17000001.4, 'CO2 * metric_ton')},
2015: {('US6293775085', 'Productions', 'Production'): Q_(92479001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.76000001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(16000001.4, 'CO2 * metric_ton')},
2016: {('US6293775085', 'Productions', 'Production'): Q_(90800001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.76000001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(14000001.4, 'CO2 * metric_ton')},
2017: {('US6293775085', 'Productions', 'Production'): Q_(93100001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.79700001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(15100001.4, 'CO2 * metric_ton')},
2018: {('US6293775085', 'Productions', 'Production'): Q_(92500001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.74900001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(13900001.4, 'CO2 * metric_ton')},
2019: {('US6293775085', 'Productions', 'Production'): Q_(89800001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.69800001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(12100001.4, 'CO2 * metric_ton')},
2020: {('US6293775085', 'Productions', 'Production'): Q_(71500001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.41300001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(9500001.4, 'CO2 * metric_ton')},
2021: {('US6293775085', 'Productions', 'Production'): Q_(71500001.4, 'Fe_ton'), ('US6293775085', 'Emissions', 'S1'): Q_(1.41300001e+08, 'CO2 * metric_ton'), ('US6293775085', 'Emissions', 'S2'): Q_(9500001.4, 'CO2 * metric_ton')}}
hist_data = pd.DataFrame(data=hist_dict)
ei_keys = {'S1': ('US6293775085', 'Emission Intensities', 'S1'),
'S2': ('US6293775085', 'Emission Intensities', 'S2'),
'S3': ('US6293775085', 'Emission Intensities', 'S3'),
'S1S2': ('US6293775085', 'Emission Intensities', 'S1S2'),
'S1S2S3': ('US6293775085', 'Emission Intensities', 'S1S2S3')}
scope = 'S1'
production_units='Fe_ton'
try:
hist_data.loc[ei_keys[scope], 2014] = Q_(np.nan, 't CO2') / Q_(np.nan, production_units)
# *** TypeError: object of type 'float' has no len()
except:
print("*** TypeError: object of type 'float' has no len()")
hist_data.loc[ei_keys[scope], 2009] = 1.
print(hist_data)
# US6293775085 Productions Production NaN NaN NaN ... 89800001.4 Fe_ton 71500001.4 Fe_ton 71500001.4 Fe_ton
# Emissions S1 nan CO2 * metric_ton 165226001.0 CO2 * metric_ton 162028001.0 CO2 * metric_ton ... 169800001.0 CO2 * metric_ton 141300001.0 CO2 * metric_ton 141300001.0 CO2 * metric_ton
# S2 nan CO2 * metric_ton 19599001.4 CO2 * metric_ton 17902001.4 CO2 * metric_ton ... 12100001.4 CO2 * metric_ton 9500001.4 CO2 * metric_ton 9500001.4 CO2 * metric_ton
# Emission Intensities S1 1.0 NaN NaN ... NaN NaN NaN
#
# [4 rows x 13 columns]
hist_data.loc[ei_keys[scope]] = 1.
print(hist_data)
# 2009 2010 2011 ... 2019 2020 2021
# US6293775085 Productions Production NaN NaN NaN ... 89800001.4 Fe_ton 71500001.4 Fe_ton 71500001.4 Fe_ton
# Emissions S1 nan CO2 * metric_ton 165226001.0 CO2 * metric_ton 162028001.0 CO2 * metric_ton ... 169800001.0 CO2 * metric_ton 141300001.0 CO2 * metric_ton 141300001.0 CO2 * metric_ton
# S2 nan CO2 * metric_ton 19599001.4 CO2 * metric_ton 17902001.4 CO2 * metric_ton ... 12100001.4 CO2 * metric_ton 9500001.4 CO2 * metric_ton 9500001.4 CO2 * metric_ton
# Emission Intensities S1 1.0 1.0 1.0 ... 1.0 1.0 1.0
#
# [4 rows x 13 columns]
This may be another manifestation of https://github.com/hgrecco/pint-pandas/issues/26
I want to use a quantity to initialize a dataframe using a .loc indexer. I can use a floating point number in the expected way, but not a Quantity. What is the work-around?