九、KMeans聚类

179 阅读1分钟

##载入相关模块

import math

import random

import numpy as np

from sklearn import datasets,cluster

import matplotlib.pyplot as plt

from sklearn.datasets import load_iris

##载入数据,获得标签信息

iris = load_iris()

gt = iris['target'];gt

获取属性信息

data = iris['data'][:,:2]

data

载入sklearn中的kmeans聚类模块

from sklearn.cluster import KMeans

##模型训练

kmeans = KMeans(n_clusters=3, max_iter=100).fit(data)

得到类标签

gt_labels__ = kmeans.labels_

gt_labels__

得到类中心

centers__ = kmeans.cluster_centers_

centers__

绘图及可视化

cat1 = data[gt_labels__ == 0]

cat2 = data[gt_labels__ == 1]

cat3 = data[gt_labels__ == 2]

for ix, p in enumerate(centers__):

plt.scatter(p[0], p[1], color='C{}'.format(ix), marker='^', edgecolor='black', s=256)

plt.scatter(cat1[:,0], cat1[:,1], color='green')

plt.scatter(cat2[:,0], cat2[:,1], color='red')

plt.scatter(cat3[:,0], cat3[:,1], color='blue')

plt.title('kmeans using sklearn with k=3')

plt.xlim(4, 8)

plt.ylim(1, 5)

plt.show()

寻找K值

from sklearn.cluster import KMeans

loss = []

for i in range(1, 10):

kmeans = KMeans(n_clusters=i, max_iter=100).fit(data)
loss.append(kmeans.inertia_ / len(data) / 3)

plt.title('K with loss')

plt.plot(range(1, 10), loss)

plt.show()