DrPepper8888 / IPMN

0 stars 0 forks source link

data aggression.py #3

Open DrPepper8888 opened 4 months ago

DrPepper8888 commented 4 months ago
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt

# 读取数据
df = pd.read_csv('dep_transaction_fx_20240226.csv')

# 数据预处理
# 假设交易日期列名为'TRAN_DT',交易金额列名为'TRAN_AMT'
df['TRAN_DT'] = pd.to_datetime(df['TRAN_DT'])

# 特征选择
# 这里我们选择交易金额作为特征
features = df[['TRAN_AMT']]

# 数据标准化
scaler = StandardScaler()
scaled_features = scaler.fit_transform(features)

# 聚类分析
# 假设我们想要将客户分为5个群体
kmeans = KMeans(n_clusters=5, random_state=0).fit(scaled_features)

# 将聚类结果添加到原始数据框中
df['Cluster'] = kmeans.labels_

# 结果分析
# 打印每个聚类的中心点
print(kmeans.cluster_centers_)

# 可视化聚类结果
plt.scatter(features[:, 0], features[:, 1], c=kmeans.labels_, cmap='rainbow')
plt.title('Customer Clusters')
plt.xlabel('Transaction Amount')
plt.ylabel('Transaction Amount')
plt.show()

# 分析每个聚类的特征
cluster_analysis = df.groupby('Cluster').describe()
print(cluster_analysis)