# -*- coding: utf-8 -*- import numpy as np from sklearn.cluster import KMeans #导入Kmeans算法包 from sklearn.metrics import silhouette_score #计算轮廓系数 import matplotlib.pyplot as plt #画图工具
plt.subplot(3,2,1)
x1=np.array([1,2,3,1,5,6,5,5,6,7,8,9,7,9]) x2=np.array([1,3,2,2,8,6,7,6,7,1,2,1,1,3]) x=np.array(zip(x1,x2)).reshape(len(x1),2)#原始数据
plt.xlim([0,10]) plt.ylim([0,10]) plt.title('Instances') plt.scatter(x1,x2) #在1号子图做出原始数据点阵的分布
colors=['b','g','r','c','m','y','k','b'] markers=['o','s','D','v','^','p','*','+']
clusters=[2,3,4,5,8] subplot_counter=1 sc_scores=[]
for t in clusters: subplot_counter+=1 plt.subplot(3,2,subplot_counter) kmeans_model=KMeans(n_clusters=t).fit(x)
for i,l in enumerate(kmeans_model.labels_): plt.plot(x1[i],x2[i],color=colors[l],marker=markers[l],ls='None') plt.xlim([0,10]) plt.ylim([0,10]) sc_score=silhouette_score(x,kmeans_model.labels_,metric='euclidean') sc_scores.append(sc_score) plt.title('K=%s,silhouette coefficient=%0.03f'%(t,sc_score)) #绘制轮廓系数与不同类簇数量的直观显示图
plt.figure()
#绘制轮廓系数与不同类簇数量的直观显示图 plt.plot(clusters,sc_scores,'*-') plt.xlabel('Numbers of clusters') plt.ylabel('Silhouette Coefficient score') plt.show()
利用轮廓系数评价不同类簇数量的K-means聚类实例
轮廓系数与不同类簇数量的关系曲线
|