numpy实现k均值聚类

import numpy as np 
import matplotlib.pyplot as plt
train=np.random.randint(-300,300,(300,5))
w1=np.random.randint(-100,100,(1,4))
def cir(data):
    max=-1
    for i in range(len(data)-1):
        for j in range(i+1,len(data)):
            if max<(data[i,0]-data[j,0])*(data[i,0]-data[j,0])+(data[i,1]-data[j,1])*(data[i,1]-data[j,1])+(data[i,2]-data[j,2])*(data[i,2]-data[j,2])+(data[i,3]-data[j,3])*(data[i,3]-data[j,3]):
                max=(data[i,0]-data[j,0])*(data[i,0]-data[j,0])+(data[i,1]-data[j,1])*(data[i,1]-data[j,1])+(data[i,2]-data[j,2])*(data[i,2]-data[j,2])+(data[i,3]-data[j,3])*(data[i,3]-data[j,3])
    return max
def k(i):
    w1=np.random.randint(-100,100,(i+1,4))
    w1=np.double(w1)
    w2=np.copy(w1)
    flag=1
    while 1:
        if flag==0 :
            break
        for i1 in range(len(train)):
            index=0
            min=float('inf')
            for j in range(i+1):
                if min>(train[i1,0]-w1[j,0])*(train[i1,0]-w1[j,0])+(train[i1,1]-w1[j,1])*(train[i1,1]-w1[j,1])+(train[i1,2]-w1[j,2])*(train[i1,2]-w1[j,2])+(train[i1,3]-w1[j,3])*(train[i1,3]-w1[j,3]):
                    min=(train[i1,0]-w1[j,0])*(train[i1,0]-w1[j,0])+(train[i1,1]-w1[j,1])*(train[i1,1]-w1[j,1])+(train[i1,2]-w1[j,2])*(train[i1,2]-w1[j,2])+(train[i1,3]-w1[j,3])*(train[i1,3]-w1[j,3])
                    index=j
            train[i1,4]=index           
        flag=0
        w2=np.copy(w1)        
        for j in range(i+1):
            w=[]
            for i1 in range(len(train)):
                if train[i1,4]==j:
                    w.append(train[i1])
            w=np.array(w)
            if len(w)!=0:
                w1[j,0]=w[0:,0].mean()
                w1[j,1]=w[0:,1].mean()
                w1[j,2]=w[0:,2].mean()
                w1[j,3]=w[0:,3].mean()              
        for j in range(i):
            for k3 in range(4):
                if w1[j,k3]!=w2[j,k3]:
                    flag=1
    loss=0
    if flag==0:
        for j in range(i+1):
            w4=[]
            for k1 in range(len(train)):
                if train[k1,4]==j:
                    w4.append(train[k1])
            w4=np.array(w4)
            loss=loss+cir(w4)
    return loss/(i+1)
zer=np.zeros(200)
x=[]
for i in range(len(zer)):
    zer[i]=k(i)
    x.append(i+1)
x=np.array(x)
plt.plot(x,zer)
print(zer)
plt.show()
posted @ 2021-08-31 17:19  祥瑞哈哈哈  阅读(95)  评论(0)    收藏  举报