alteryx / featuretools

An open source python library for automated feature engineering
https://www.featuretools.com
BSD 3-Clause "New" or "Revised" License
7.2k stars 872 forks source link

Featuretools cannot runs normally with Python 3.6.4 #1983

Closed jingsupo closed 2 years ago

jingsupo commented 2 years ago

When I use Python 3.6.4, the follow codes doesn't work:

from woodwork.column_schema import ColumnSchema
from woodwork.logical_types import PostalCode

And report the following errors:

AttributeError: 'EntitySet' object has no attribute 'add_dataframe'

The same code will run normally at Python 3.9.

What should I do? Thank you for help.

jingsupo commented 2 years ago

For some projects, I have to use Python 3 6.4, version cannot be updated.

jingsupo commented 2 years ago

When I use the following code:

def get_es(*args):
    """
    Creating an EntitySet
    """
    es = ft.EntitySet(id="customer_data")

    """
    Adding dataframes
    """
    es = es.entity_from_dataframe(#add_dataframe
        dataframe=args[0],
        entity_id="transactions",#dataframe_name
        index="transaction_id",
        time_index="transaction_time",
        variable_types={"product_id": Categorical},#, "zip_code": PostalCode},
    )#logical_types

    """
    Creating a dataframe from an existing table
    """
    es = es.normalize_entity(#normalize_dataframe
        base_entity_id="transactions",#base_dataframe_name
        new_entity_id="sessions",#new_dataframe_name
        index="session_id",
        make_time_index="session_start",
        additional_variables=[#additional_columns
            "device",
            "zip_code",
            "session_start",
            "join_date",
        ],
        copy_variables=[#copy_columns
            "customer_id",
        ],
    )
    es = es.normalize_entity(#normalize_dataframe
        base_entity_id="sessions",#base_dataframe_name
        new_entity_id="customers",#new_dataframe_name
        index="customer_id",
        make_time_index="join_date",
        additional_variables=["zip_code", "join_date"],#additional_columns
    )
    # 关键
    es = es.normalize_entity(#normalize_dataframe
        base_entity_id="transactions",#base_dataframe_name
        new_entity_id="cid_hour",#new_dataframe_name
        index="cid_hour",
        copy_variables=[#copy_columns
            "customer_id",
        ],
    )
    es.add_relationship(ft.Relationship(es["customers"]["customer_id"], es["cid_hour"]["customer_id"]))
    # es.add_relationships(
        # [
            # ("customers", "customer_id", "cid_hour", "customer_id"),
        # ]
    # )
    return es

es = get_es(transactions_df)

def get_feature(target_dataframe_name, features_only=False):
    feature = ft.dfs(
        entityset=es,
        target_entity=target_dataframe_name,#target_dataframe_name
        # agg_primitives=[],
        trans_primitives=[
            "divide_numeric",
        ],
        max_depth=2,
        ignore_variables={"transactions": ["customer_id"]},#ignore_columns
        primitive_options={
            "sum": {"ignore_variables": {"transactions": ["customer_id"]}},#ignore_columns
            "divide_numeric": {
                "include_variables": {"transactions": ["amount", "product_id"]}#include_columns
            },
            "my_divide_numeric": {
                "include_variables": {#include_columns
                    "transactions": ["amount"],
                    "customers": ["customer_id"],
                }
            },
        },
        where_primitives=[
            "sum",
            "max",
            "min",
            "mean",
            "std",
            "count",
            "num_unique",
            "percent_true",
        ],
        seed_features=[],
        drop_contains=[],
        features_only=features_only,
    )
    if features_only:
        return feature
    return feature[0], feature[1]

feature = get_feature("customers", features_only=False)

The following error is reported:

ValueError: freq not specified and cannot be inferred
gsheni commented 2 years ago

@jingsupo We do not support Python 3.6 with Featuretools. There are previous releases that support Python 3.6 with Featuretools (<=0.23.3). However, this was before our significant refactor and Featuretools 1.0.0 release. I would suggest upgrade your Python version in your environment.