numpy实现k均值聚类
import numpy as np
import matplotlib.pyplot as plt
train=np.random.randint(-300,300,(300,5))
w1=np.random.randint(-100,100,(1,4))
def cir(data):
max=-1
for i in range(len(data)-1):
for j in range(i+1,len(data)):
if max<(data[i,0]-data[j,0])*(data[i,0]-data[j,0])+(data[i,1]-data[j,1])*(data[i,1]-data[j,1])+(data[i,2]-data[j,2])*(data[i,2]-data[j,2])+(data[i,3]-data[j,3])*(data[i,3]-data[j,3]):
max=(data[i,0]-data[j,0])*(data[i,0]-data[j,0])+(data[i,1]-data[j,1])*(data[i,1]-data[j,1])+(data[i,2]-data[j,2])*(data[i,2]-data[j,2])+(data[i,3]-data[j,3])*(data[i,3]-data[j,3])
return max
def k(i):
w1=np.random.randint(-100,100,(i+1,4))
w1=np.double(w1)
w2=np.copy(w1)
flag=1
while 1:
if flag==0 :
break
for i1 in range(len(train)):
index=0
min=float('inf')
for j in range(i+1):
if min>(train[i1,0]-w1[j,0])*(train[i1,0]-w1[j,0])+(train[i1,1]-w1[j,1])*(train[i1,1]-w1[j,1])+(train[i1,2]-w1[j,2])*(train[i1,2]-w1[j,2])+(train[i1,3]-w1[j,3])*(train[i1,3]-w1[j,3]):
min=(train[i1,0]-w1[j,0])*(train[i1,0]-w1[j,0])+(train[i1,1]-w1[j,1])*(train[i1,1]-w1[j,1])+(train[i1,2]-w1[j,2])*(train[i1,2]-w1[j,2])+(train[i1,3]-w1[j,3])*(train[i1,3]-w1[j,3])
index=j
train[i1,4]=index
flag=0
w2=np.copy(w1)
for j in range(i+1):
w=[]
for i1 in range(len(train)):
if train[i1,4]==j:
w.append(train[i1])
w=np.array(w)
if len(w)!=0:
w1[j,0]=w[0:,0].mean()
w1[j,1]=w[0:,1].mean()
w1[j,2]=w[0:,2].mean()
w1[j,3]=w[0:,3].mean()
for j in range(i):
for k3 in range(4):
if w1[j,k3]!=w2[j,k3]:
flag=1
loss=0
if flag==0:
for j in range(i+1):
w4=[]
for k1 in range(len(train)):
if train[k1,4]==j:
w4.append(train[k1])
w4=np.array(w4)
loss=loss+cir(w4)
return loss/(i+1)
zer=np.zeros(200)
x=[]
for i in range(len(zer)):
zer[i]=k(i)
x.append(i+1)
x=np.array(x)
plt.plot(x,zer)
print(zer)
plt.show()

浙公网安备 33010602011771号