Open MiaoRain opened 4 years ago
import numpy as np
def kmeans_miao(ds, k):
"""k-means聚类算法
k - 指定分簇数量
ds - ndarray(m, n),m个样本的数据集,每个样本n个属性值
"""
m = len(ds) # m:样本数量,n:每个样本的属性值个数
print(m)
ds_np = np.array(ds)
result = np.empty(m, dtype=np.int) # m个样本的聚类结果
cores = [25, 40]
min_sum = 200
while True: # 迭代计算
#d1 = np.repeat(ds, k, axis=0).reshape(m, k) - cores
d0 = np.repeat(ds, k, axis=0).reshape(m, k)
d1 = np.square( d0 - cores)
#d2 = np.sum(d1, axis=1)
distance = np.sqrt(d1) # ndarray(m, k),每个样本距离k个质心的距离,共有m行
index_min = np.argmin(distance, axis=1) # 每个样本距离最近的质心索引序号
d_sum = np.sum(distance)
if (d_sum <= min_sum): # 如果样本聚类没有改变
return result, cores # 则返回聚类结果和质心数据
result[:] = index_min # 重新分类
for i in range(k): # 遍历质心集
items = ds_np[np.where(result == i)] # 找出对应当前质心的子样本集
cores[i] = np.mean(items, axis=0) # 以子样本集的均值作为当前质心的位置
ds = [10,20,25,35,40,50,70]
k =2
result, cores = kmeans_miao(ds, k)
print(cores)