Open DrPepper8888 opened 4 months ago
import pandas as pd from sklearn.preprocessing import StandardScaler from sklearn.cluster import KMeans import matplotlib.pyplot as plt # 读取数据 df = pd.read_csv('dep_transaction_fx_20240226.csv') # 数据预处理 # 假设交易日期列名为'TRAN_DT',交易金额列名为'TRAN_AMT' df['TRAN_DT'] = pd.to_datetime(df['TRAN_DT']) # 特征选择 # 这里我们选择交易金额作为特征 features = df[['TRAN_AMT']] # 数据标准化 scaler = StandardScaler() scaled_features = scaler.fit_transform(features) # 聚类分析 # 假设我们想要将客户分为5个群体 kmeans = KMeans(n_clusters=5, random_state=0).fit(scaled_features) # 将聚类结果添加到原始数据框中 df['Cluster'] = kmeans.labels_ # 结果分析 # 打印每个聚类的中心点 print(kmeans.cluster_centers_) # 可视化聚类结果 plt.scatter(features[:, 0], features[:, 1], c=kmeans.labels_, cmap='rainbow') plt.title('Customer Clusters') plt.xlabel('Transaction Amount') plt.ylabel('Transaction Amount') plt.show() # 分析每个聚类的特征 cluster_analysis = df.groupby('Cluster').describe() print(cluster_analysis)