Closed sbrugman closed 5 years ago
For reference, the bottom-up version
import pandas as pd
import numpy as np
series = pd.Series([np.nan, 0, 1.0, 2.0, 3.0, np.nan, np.inf, -np.inf])
def int_summarize(series, mask):
return {'ints': len(series[mask])}
def nan_summarize(series, mask):
return {'nans': len(series[mask])}
def inf_summarize(series, mask):
return {'infs': len(series[mask])}
def base_summarize(series, mask):
return {'n_records': len(series[mask])}
def unique_summarize(series, mask):
return {'n_unique': series[mask].nunique()}
def zeros_summarize(series, mask):
return {'n_zeros': series[mask].count()}
def infstuff(series, summary):
mask = np.isinf(series)
summary.update(inf_summarize(series, mask))
return series[~mask]
def nanstuff(series, summary):
mask = ~series.notna()
summary.update(nan_summarize(series, mask))
return series[~mask]
def intstuff(series, summary):
summary.update(int_summarize(series, series))
return series
def basestuff(series, summary):
summary.update(base_summarize(series, series))
return series
def uniquestuff(series, summary):
summary.update(unique_summarize(series, series))
return series
def zerosstuff(series, summary):
mask = series == 0
summary.update(zeros_summarize(series, mask))
return series
# Bottom-up
summary = {}
baseseries = basestuff(series, summary)
nanseries = nanstuff(series, summary)
infseries = infstuff(nanseries, summary)
_ = uniquestuff(infseries, summary)
_ = zerosstuff(infseries, summary)
_ = intstuff(infseries, summary)
print(summary)
POC