Open aplotnikov2020 opened 1 year ago
At the moment the workaround is to pivot input DataFrame manually:
import pandas as pd
from datetime import datetime
from synthesized import MetaExtractor, HighDimSynthesizer
from synthesized.model import DataFrameModel
data = [
{'day': datetime(2022, 10, 1), 'country': 'NL', 'platform': 'android', 'y': 1.0},
{'day': datetime(2022, 10, 2), 'country': 'NL', 'platform': 'android', 'y': 2.0},
{'day': datetime(2022, 10, 3), 'country': 'NL', 'platform': 'android', 'y': 3.0},
{'day': datetime(2022, 10, 4), 'country': 'NL', 'platform': 'android', 'y': 2.5},
{'day': datetime(2022, 10, 5), 'country': 'NL', 'platform': 'android', 'y': 2.1},
{'day': datetime(2022, 10, 6), 'country': 'NL', 'platform': 'android', 'y': 2.2},
{'day': datetime(2022, 10, 1), 'country': 'ES', 'platform': 'ios', 'y': 10.0},
{'day': datetime(2022, 10, 2), 'country': 'ES', 'platform': 'ios', 'y': 20.0},
{'day': datetime(2022, 10, 3), 'country': 'ES', 'platform': 'ios', 'y': 30.0},
]
df = pd.DataFrame.from_records(data)
pivoted_df = df.pivot(index='day', columns=['country', 'platform'], values='y')
pivoted_df.columns = ['_'.join(str(s).strip() for s in col if s) for col in pivoted_df.columns]
pivoted_df.reset_index(inplace=True)
df_meta = MetaExtractor.extract(pivoted_df)
DataFrameModel(df_meta).fit(pivoted_df)
synth = HighDimSynthesizer(df_meta)
synth.learn(df_train=pivoted_df)
df_synth = synth.synthesize(num_rows=len(pivoted_df))
Describe the bug MetaExtractor throws the error below when using a multi-column index:
To Reproduce Steps to reproduce the behavior:
Environment (please complete the following information):
Additional context: