Closed jingsupo closed 2 years ago
For some projects, I have to use Python 3 6.4, version cannot be updated.
When I use the following code:
def get_es(*args):
"""
Creating an EntitySet
"""
es = ft.EntitySet(id="customer_data")
"""
Adding dataframes
"""
es = es.entity_from_dataframe(#add_dataframe
dataframe=args[0],
entity_id="transactions",#dataframe_name
index="transaction_id",
time_index="transaction_time",
variable_types={"product_id": Categorical},#, "zip_code": PostalCode},
)#logical_types
"""
Creating a dataframe from an existing table
"""
es = es.normalize_entity(#normalize_dataframe
base_entity_id="transactions",#base_dataframe_name
new_entity_id="sessions",#new_dataframe_name
index="session_id",
make_time_index="session_start",
additional_variables=[#additional_columns
"device",
"zip_code",
"session_start",
"join_date",
],
copy_variables=[#copy_columns
"customer_id",
],
)
es = es.normalize_entity(#normalize_dataframe
base_entity_id="sessions",#base_dataframe_name
new_entity_id="customers",#new_dataframe_name
index="customer_id",
make_time_index="join_date",
additional_variables=["zip_code", "join_date"],#additional_columns
)
# 关键
es = es.normalize_entity(#normalize_dataframe
base_entity_id="transactions",#base_dataframe_name
new_entity_id="cid_hour",#new_dataframe_name
index="cid_hour",
copy_variables=[#copy_columns
"customer_id",
],
)
es.add_relationship(ft.Relationship(es["customers"]["customer_id"], es["cid_hour"]["customer_id"]))
# es.add_relationships(
# [
# ("customers", "customer_id", "cid_hour", "customer_id"),
# ]
# )
return es
es = get_es(transactions_df)
def get_feature(target_dataframe_name, features_only=False):
feature = ft.dfs(
entityset=es,
target_entity=target_dataframe_name,#target_dataframe_name
# agg_primitives=[],
trans_primitives=[
"divide_numeric",
],
max_depth=2,
ignore_variables={"transactions": ["customer_id"]},#ignore_columns
primitive_options={
"sum": {"ignore_variables": {"transactions": ["customer_id"]}},#ignore_columns
"divide_numeric": {
"include_variables": {"transactions": ["amount", "product_id"]}#include_columns
},
"my_divide_numeric": {
"include_variables": {#include_columns
"transactions": ["amount"],
"customers": ["customer_id"],
}
},
},
where_primitives=[
"sum",
"max",
"min",
"mean",
"std",
"count",
"num_unique",
"percent_true",
],
seed_features=[],
drop_contains=[],
features_only=features_only,
)
if features_only:
return feature
return feature[0], feature[1]
feature = get_feature("customers", features_only=False)
The following error is reported:
ValueError: freq not specified and cannot be inferred
@jingsupo We do not support Python 3.6 with Featuretools. There are previous releases that support Python 3.6 with Featuretools (<=0.23.3). However, this was before our significant refactor and Featuretools 1.0.0 release. I would suggest upgrade your Python version in your environment.
When I use Python 3.6.4, the follow codes doesn't work:
And report the following errors:
The same code will run normally at Python 3.9.
What should I do? Thank you for help.