sklearn GMM

390 阅读1分钟
import requests
import numpy as np
r = requests.get('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
with open('iris.data', 'w') as f:
    f.write(r.text)
import pandas as pd


data = pd.read_csv('iris.data', names =['e_cd', 'e_kd', 'b_cd', 'b_kd', 'cat'])
data.head(5)
e_cd e_kd b_cd b_kd cat
0 5.1 3.5 1.4 0.2 Iris-setosa
1 4.9 3.0 1.4 0.2 Iris-setosa
2 4.7 3.2 1.3 0.2 Iris-setosa
3 4.6 3.1 1.5 0.2 Iris-setosa
4 5.0 3.6 1.4 0.2 Iris-setosa
len(data)
150
from sklearn.mixture import GaussianMixture

data.columns
Index(['e_cd', 'e_kd', 'b_cd', 'b_kd', 'cat'], dtype='object')
data_train = data[['e_cd', 'e_kd', 'b_cd', 'b_kd']]
gmm=GaussianMixture(n_components=3,covariance_type='full', random_state=0)
gmm.fit(data_train)
GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
        means_init=None, n_components=3, n_init=1, precisions_init=None,
        random_state=0, reg_covar=1e-06, tol=0.001, verbose=0,
        verbose_interval=10, warm_start=False, weights_init=None)
gmm.means_
array([[5.006     , 3.418     , 1.464     , 0.244     ],
       [6.54639415, 2.94946365, 5.48364578, 1.98726565],
       [5.9170732 , 2.77804839, 4.20540364, 1.29848217]])
gmm.covariances_
array([[[0.121765  , 0.098292  , 0.015816  , 0.010336  ],
        [0.098292  , 0.142277  , 0.011448  , 0.011208  ],
        [0.015816  , 0.011448  , 0.029505  , 0.005584  ],
        [0.010336  , 0.011208  , 0.005584  , 0.011265  ]],

       [[0.38744093, 0.09223276, 0.30244302, 0.06087397],
        [0.09223276, 0.11040914, 0.08385112, 0.05574334],
        [0.30244302, 0.08385112, 0.32589574, 0.07276776],
        [0.06087397, 0.05574334, 0.07276776, 0.08484505]],

       [[0.2755171 , 0.09662295, 0.18547072, 0.05478901],
        [0.09662295, 0.09255152, 0.09103431, 0.04299899],
        [0.18547072, 0.09103431, 0.20235849, 0.06171383],
        [0.05478901, 0.04299899, 0.06171383, 0.03233775]]])
gmm.weights_
array([0.33333333, 0.36539574, 0.30127092])
pre_target = gmm.predict(data_train)
pre_target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
y_target = data['cat']
key_map = {'Iris-setosa':0, 'Iris-versicolor':2, 'Iris-virginica':1}
y_target_num = y_target.map(lambda x : key_map.get(x)) 
np.mean(y_target_num == pre_target)
0.9666666666666667