轮廓系数
import numpy as npfrom sklearn.cluster import KMeansfrom pylab import *import codecsimport matplotlib.pyplot as pltfrom sklearn.metrics import calinski_harabaz_scoreimport pandas as pdfrom numpy.random import randomfrom sklearn import preprocessing from sklearn import metricsimport operator data = []labels = []number1=10with codecs.open("red_nopca_nolabel.txt", "r") as f: for line in f.readlines(): line1=line.strip() line2 = line1.split(',') x2 = [] for i in range(0,number1): x1=line2[i] x2.append(float(x1)) data.append(x2) x2 = [] #label = line2[number1-1] #labels.append(float(label))datas = np.array(data)'''kmeans_model = KMeans(n_clusters=3, random_state=1).fit(datas)labels = kmeans_model.labels_a = metrics.silhouette_score(datas, labels, metric='euclidean')print(a)'''silhouette_all=[]for k in range(2,25): kmeans_model = KMeans(n_clusters=k, random_state=1).fit(datas) labels = kmeans_model.labels_ a = metrics.silhouette_score(datas, labels, metric='euclidean') silhouette_all.append(a) #print(a) print('这个是k={}次时的轮廓系数:'.format(k),a) dic={} #存放所有的互信息的键值对mi_num=2 for i in silhouette_all: dic['k={}时轮廓系数'.format(mi_num)]='{}'.format(i) mi_num=mi_num+1#print(dic)rankdata=sorted(dic.items(),key=operator.itemgetter(1),reverse=True)print(rankdata)