轮廓系数

轮廓系数

 

 

import numpy as np
from sklearn.cluster import KMeans
from pylab import *
import codecs
import matplotlib.pyplot as plt
from sklearn.metrics import calinski_harabaz_score
import pandas as pd
from numpy.random import random
from sklearn import preprocessing 
from sklearn import metrics
import operator  

data = []
labels = []
number1=10
with codecs.open("red_nopca_nolabel.txt", "r") as f:
    for line in f.readlines():
        line1=line.strip()
        line2 = line1.split(',')
        x2 = []
        for i in range(0,number1):
            x1=line2[i]
            x2.append(float(x1))
        data.append(x2)
        x2 = []
        #label = line2[number1-1]
        #labels.append(float(label))
datas = np.array(data)
'''
kmeans_model = KMeans(n_clusters=3, random_state=1).fit(datas)
labels = kmeans_model.labels_
a = metrics.silhouette_score(datas, labels, metric='euclidean')
print(a)
'''
silhouette_all=[]

for k in range(2,25):
    kmeans_model = KMeans(n_clusters=k, random_state=1).fit(datas)
    labels = kmeans_model.labels_
    a = metrics.silhouette_score(datas, labels, metric='euclidean')
    silhouette_all.append(a)
    #print(a)
    print('这个是k={}次时的轮廓系数:'.format(k),a)
    

dic={}             #存放所有的互信息的键值对
mi_num=2  
for i in silhouette_all:
    dic['k={}时轮廓系数'.format(mi_num)]='{}'.format(i)
    mi_num=mi_num+1
#print(dic)
rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True)
print(rankdata)

 

posted @ 2019-01-29 17:26  星涅爱别离  阅读(428)  评论(0编辑  收藏  举报