MiaoRain / lund

10 stars 2 forks source link

李航机器学习之k-means算法 #47

Open MiaoRain opened 4 years ago

MiaoRain commented 4 years ago

image

MiaoRain commented 4 years ago

image

import numpy as np

def kmeans_miao(ds, k):
    """k-means聚类算法

    k       - 指定分簇数量
    ds      - ndarray(m, n),m个样本的数据集,每个样本n个属性值
    """

    m = len(ds) # m:样本数量,n:每个样本的属性值个数
    print(m)
    ds_np = np.array(ds) 
    result = np.empty(m, dtype=np.int) # m个样本的聚类结果
    cores = [25, 40] 
    min_sum = 200
    while True: # 迭代计算
        #d1 = np.repeat(ds, k, axis=0).reshape(m, k) - cores
        d0 = np.repeat(ds, k, axis=0).reshape(m, k)
        d1 = np.square( d0 - cores)
        #d2 = np.sum(d1, axis=1)
        distance = np.sqrt(d1) # ndarray(m, k),每个样本距离k个质心的距离,共有m行
        index_min = np.argmin(distance, axis=1) # 每个样本距离最近的质心索引序号
        d_sum = np.sum(distance)
        if (d_sum <= min_sum): # 如果样本聚类没有改变
            return result, cores # 则返回聚类结果和质心数据

        result[:] = index_min # 重新分类
        for i in range(k): # 遍历质心集
            items = ds_np[np.where(result == i)] # 找出对应当前质心的子样本集
            cores[i] = np.mean(items, axis=0) # 以子样本集的均值作为当前质心的位置

ds = [10,20,25,35,40,50,70]
k =2
result, cores = kmeans_miao(ds, k)
print(cores)