lines 代表 [[a,b],[c,d]] 、k 代表 聚合个数、m代表 聚合方法
1 为少量聚合法 KMeans
2 为大量聚合法 MiniBatchKMeans
import numpy as np
from sklearn.cluster import KMeans
from sklearn.cluster import MiniBatchKMeans
from enum import Enum
class Cluter_ways(Enum):
kMeans = 1
MiniBatchKMeans = 2
def kMeans(self, lines, km):
# 转换为矩阵
matrix = np.mat(lines)
# 聚类
km.fit(matrix)
# 获取聚类标签
label_map_list = dict()
for i in km.labels_:
count = label_map_list.get(i)
count = count is None and 1 or count + 1
label_map_list[i] = count
min = sys.maxint
max = 0
for value in label_map_list.values():
if (value < min):
min = value
if (value > max):
max = value
# 矩阵转换成数组
centroids = km.cluster_centers_.tolist()
for i, item in enumerate(centroids):
count = label_map_list.get(i)
score = round((int(count)+int(min))*100.0/(int(max)+int(min)), 2)
item.append(score)
return centroids
def k_ms(self, lines, k):
return self.kMeans(lines, KMeans(n_clusters=k))
def Minik_ms(self, lines, k):
return self.kMeans(lines, MiniBatchKMeans(n_clusters=k, batch_size=10000, reassignment_ratio=0.005))
def cluster(self, lines, k, m): print(time.time())
if m == Cluter_ways.kMeans:
print ('少量数据聚合')
all_data = self.k_ms(lines, k)
elif m == Cluter_ways.MiniBatchKMeans:
print ('大量数据聚合')
all_data = self.Minik_ms(lines, k)
else:
all_data = lines
print(time.time())
return all_data