tusharchou / local-data-platform

python library for iceberg lake house on your local
MIT License
8 stars 5 forks source link

0.1.2 Reoccurring Customer Churn Analysis #47

Open tusharchou opened 1 month ago

tusharchou commented 1 month ago

Reporting

Churn Analyisis

This often involves identifying the percentage of customers who have stopped using a product or service over a certain period.

tusharchou commented 1 month ago
import pandas as pd

def calculate_churn_rate(data, customer_id_col, active_status_col, active_value, period_col):
    """
    Calculate churn rate based on customer data.

    :param data: pandas DataFrame containing customer data.
    :param customer_id_col: Column name for customer ID.
    :param active_status_col: Column name for customer activity status.
    :param active_value: Value indicating the customer is active.
    :param period_col: Column name for the period (e.g., month, quarter).
    :return: DataFrame with churn rate for each period.
    """
    # Mark customers who are active in each period
    data['is_active'] = data[active_status_col] == active_value

    # Get the total number of active customers at the start of each period
    total_customers = data.groupby(period_col)[customer_id_col].nunique().reset_index()
    total_customers.columns = [period_col, 'total_customers']

    # Get the number of customers who churned in each period
    churned_customers = data.groupby([period_col, customer_id_col])['is_active'].any().groupby(level=0).apply(
        lambda x: x[x == False].count()).reset_index()
    churned_customers.columns = [period_col, 'churned_customers']

    # Merge the total and churned customers data
    churn_data = pd.merge(total_customers, churned_customers, on=period_col)

    # Calculate churn rate
    churn_data['churn_rate'] = churn_data['churned_customers'] / churn_data['total_customers'] * 100

    return churn_data

# Example usage:
# Assuming you have a DataFrame named 'customer_data' with columns: 'customer_id', 'status', 'month'
customer_data = pd.DataFrame({
    'customer_id': [1, 2, 3, 4, 5, 1, 2, 6],
    'status': ['active', 'inactive', 'active', 'inactive', 'active', 'inactive', 'inactive', 'inactive'],
    'month': ['2023-01', '2023-01', '2023-01', '2023-01', '2023-01', '2023-02', '2023-02', '2023-02']
})
churn_rate = calculate_churn_rate(customer_data, 'customer_id', 'status', 'active', 'month')
print(churn_rate)