ML05 My_Kmeans

ML实战:手动实现Kmeans算法

代码实现

Kmeans类
import numpy as np
import random

np.set_printoptions(suppress=True)
class Kmeans:
    def __init__(self,x,cluster=2):
        '''
        :param x:训练集
        :param cluster:聚类的数量
        :param uc:每个样本对于的簇的索引
        :param u:簇心的向量
        '''
        self.x=x
        self.cluster=cluster
        u=[]
        self.uc=[-1]*len(x)
        randomlist = random.sample(range(1,len(x)), cluster)
        for item in randomlist:
            u.append(x[item,:])
        self.u = np.array(u)

    def find_uci(self,i):
        #寻找离样本xi最近的簇心,映射到uc中
        temp=self.x[i]-self.u
        dis=np.linalg.norm(temp,axis=1,keepdims=True)
        self.uc[i]=np.argmin(dis, axis=0)[0]

    def single_iter(self):
        #单次迭代,找到本次最近的簇心,并更新簇心
        for i in range(len(self.x)):
            self.find_uci(i)
        self.update_u()

    def update_u(self):
        #更新簇心
        tempu=[[0]*len(self.x[0,:]) for _ in range(self.cluster)]
        count=[0]*self.cluster
        for i in range(len(self.x)):
            j=self.uc[i]
            count[j]+=1
            x=self.x[i]
            for k in range(len(x)):
                tempu[j][k]+=x[k]
        tempu=np.array(tempu
        for i in range(self.cluster):
            if count[i]==0:
                continue
            tempu[i]/=count[i]
        self.u=tempu

    def fit(self,iter_count=500):
        #参数拟合
        for i in range(iter_count):
            self.single_iter()
        return np.array(self.uc)
主函数
import sys
from sklearn.datasets import make_blobs
import matplotlib.pyplot as plt
from Kmeas_class import Kmeans
import numpy as np

np.set_printoptions(suppress=True)

color=['red','pink','orange','gray']
cluster=4

#生成训练集
X, y = make_blobs(n_samples=500, n_features=2, centers=cluster, random_state=np.random.randint(0,30))

#生成的训练集可视化
plt.figure(1)
for i in range(cluster):
    plt.scatter(X[y==i, 0], X[y==i,1],
               marker='o',
               s=8,
               c=color[i]
               )
plt.title('Real Data')
plt.savefig('E:\python\ml\ml by myself\Kmeans\kmeans_real_myslef.png')

#调用fit函数,实现Kmeans算法
kmeans=Kmeans(X,cluster)
y_predict=kmeans.fit()

#实现预测结果可视化
for i in range(cluster):
    plt.scatter(X[y_predict==i, 0], X[y_predict==i,1],
               marker='o',
               s=8,
               c=color[i]
               )
plt.title('Predict Result')
plt.savefig('E:\python\ml\ml by myself\Kmeans\kmeans_predict_myslef.png')
sys.exit(0)

结果

posted @ 2021-10-01 21:10  MrDaddy  阅读(45)  评论(0)    收藏  举报