import requests
import numpy as np
r = requests.get('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data')
with open('iris.data', 'w') as f:
f.write(r.text)
import pandas as pd
data = pd.read_csv('iris.data', names =['e_cd', 'e_kd', 'b_cd', 'b_kd', 'cat'])
data.head(5)
|
e_cd |
e_kd |
b_cd |
b_kd |
cat |
| 0 |
5.1 |
3.5 |
1.4 |
0.2 |
Iris-setosa |
| 1 |
4.9 |
3.0 |
1.4 |
0.2 |
Iris-setosa |
| 2 |
4.7 |
3.2 |
1.3 |
0.2 |
Iris-setosa |
| 3 |
4.6 |
3.1 |
1.5 |
0.2 |
Iris-setosa |
| 4 |
5.0 |
3.6 |
1.4 |
0.2 |
Iris-setosa |
len(data)
150
from sklearn.mixture import GaussianMixture
data.columns
Index(['e_cd', 'e_kd', 'b_cd', 'b_kd', 'cat'], dtype='object')
data_train = data[['e_cd', 'e_kd', 'b_cd', 'b_kd']]
gmm=GaussianMixture(n_components=3,covariance_type='full', random_state=0)
gmm.fit(data_train)
GaussianMixture(covariance_type='full', init_params='kmeans', max_iter=100,
means_init=None, n_components=3, n_init=1, precisions_init=None,
random_state=0, reg_covar=1e-06, tol=0.001, verbose=0,
verbose_interval=10, warm_start=False, weights_init=None)
gmm.means_
array([[5.006 , 3.418 , 1.464 , 0.244 ],
[6.54639415, 2.94946365, 5.48364578, 1.98726565],
[5.9170732 , 2.77804839, 4.20540364, 1.29848217]])
gmm.covariances_
array([[[0.121765 , 0.098292 , 0.015816 , 0.010336 ],
[0.098292 , 0.142277 , 0.011448 , 0.011208 ],
[0.015816 , 0.011448 , 0.029505 , 0.005584 ],
[0.010336 , 0.011208 , 0.005584 , 0.011265 ]],
[[0.38744093, 0.09223276, 0.30244302, 0.06087397],
[0.09223276, 0.11040914, 0.08385112, 0.05574334],
[0.30244302, 0.08385112, 0.32589574, 0.07276776],
[0.06087397, 0.05574334, 0.07276776, 0.08484505]],
[[0.2755171 , 0.09662295, 0.18547072, 0.05478901],
[0.09662295, 0.09255152, 0.09103431, 0.04299899],
[0.18547072, 0.09103431, 0.20235849, 0.06171383],
[0.05478901, 0.04299899, 0.06171383, 0.03233775]]])
gmm.weights_
array([0.33333333, 0.36539574, 0.30127092])
pre_target = gmm.predict(data_train)
pre_target
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 1, 2, 1, 2, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])
y_target = data['cat']
key_map = {'Iris-setosa':0, 'Iris-versicolor':2, 'Iris-virginica':1}
y_target_num = y_target.map(lambda x : key_map.get(x))
np.mean(y_target_num == pre_target)
0.9666666666666667