##载入相关模块
import math
import random
import numpy as np
from sklearn import datasets,cluster
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
##载入数据,获得标签信息
iris = load_iris()
gt = iris['target'];gt
获取属性信息
data = iris['data'][:,:2]
data
载入sklearn中的kmeans聚类模块
from sklearn.cluster import KMeans
##模型训练
kmeans = KMeans(n_clusters=3, max_iter=100).fit(data)
得到类标签
gt_labels__ = kmeans.labels_
gt_labels__
得到类中心
centers__ = kmeans.cluster_centers_
centers__
绘图及可视化
cat1 = data[gt_labels__ == 0]
cat2 = data[gt_labels__ == 1]
cat3 = data[gt_labels__ == 2]
for ix, p in enumerate(centers__):
plt.scatter(p[0], p[1], color='C{}'.format(ix), marker='^', edgecolor='black', s=256)
plt.scatter(cat1[:,0], cat1[:,1], color='green')
plt.scatter(cat2[:,0], cat2[:,1], color='red')
plt.scatter(cat3[:,0], cat3[:,1], color='blue')
plt.title('kmeans using sklearn with k=3')
plt.xlim(4, 8)
plt.ylim(1, 5)
plt.show()
寻找K值
from sklearn.cluster import KMeans
loss = []
for i in range(1, 10):
kmeans = KMeans(n_clusters=i, max_iter=100).fit(data)
loss.append(kmeans.inertia_ / len(data) / 3)
plt.title('K with loss')
plt.plot(range(1, 10), loss)
plt.show()