机器学习常见回归：SVR, GLM, Rideg回归, Lasso回归, 多项式回归常见聚类：K-means, DBS

常见回归：SVR, GLM, Rideg回归, Lasso回归, 多项式回归常见聚类：K-means, DBSCAN 常见分类：SVM, 朴素贝叶斯, 决策树, MLP

多项式回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures

x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
poly_features = PolynomialFeatures(degree=4)
x_train_poly = poly_features.fit_transform(x_train)
x_test_poly = poly_features.fit_transform(x_test)
xp = np.linspace(x_test.min(), x_test.max(), 1000).reshape(-1, 1)
reg2 = linear_model.LinearRegression().fit(x_train_poly, y_train)
# 测试集散点图
plt.scatter(x_test, y_test, s=1, color='red')
# 测试集预测曲线
plt.plot(xp, reg2.predict(poly_features.fit_transform(xp)), 'b-')
plt.show()

diff = y_test - reg2.predict(x_test_poly)
num = 0
# 设定测试集的预测值和真实值差值小于0.1为合格预测
for i in diff:
    if 0.1 > i > -0.1:
        num = num + 1

print("测试集样本数:", diff.shape[0])
print("测试集样本预测合格数:", num)
print("测试集合格比例:", num/diff.shape[0])

SVR回归

import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm

x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
reg1 = svm.SVR().fit(x_train, y_train)
plt.scatter(x_test, y_test, s=1, color='red')
xp1 = np.linspace(x_test.min(), x_test.max(), 1000).reshape(-1, 1)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(xp1, reg1.predict(xp1), 'b-')
plt.show()

diff = y_test - reg1.predict(x_test)
num = 0
# 设定测试集的预测值和真实值差值小于0.1为合格预测
for i in diff:
    if 0.1 > i > -0.1:
        num = num + 1

print("测试集样本数:", diff.shape[0])
print("测试集样本预测合格数:", num)
print("测试集合格比例:", num / diff.shape[0])

线性回归


reg1 = linear_model.LinearRegression().fit(x_train, y_train)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(x_test, reg1.predict(x_test), 'b-')
plt.show()
print('linear score: %.5f' % reg1.score(x_test, y_test))

岭回归

x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
reg = linear_model.Ridge().fit(x_train, y_train)
print("training set score:{:.2f}".format(reg.score(x_train, y_train)))
print("test set score:{:.2f}".format(reg.score(x_test, y_test)))
print('Coefficients:', reg.coef_)
print('intercept:', reg.intercept_)
y_pred = reg.predict(x_test)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(x_test, reg.predict(x_test), 'b-')
plt.show()
print('Ridge score: %.5f' % reg.score(x_test, y_test))

决策树

from sklearn import tree


# 青年=2，中年=1，老年=0；
# 有工作=1，无工作=0；
# 有房子=1，无房子=0；
# 非常好=2，好=1，一般=0；
x = [
    [2, 0, 0, 0],  # 1
    [2, 0, 0, 1],  # 2
    [2, 1, 0, 1],  # 3
    [2, 1, 1, 0],  # 4
    [2, 0, 0, 0],  # 5
    [1, 0, 0, 0],  # 6
    [1, 0, 0, 1],  # 7
    [1, 1, 1, 1],  # 8
    [1, 0, 1, 2],  # 9
    [1, 0, 1, 2],  # 10
    [0, 0, 1, 2],  # 11
    [0, 0, 1, 1],  # 12
    [0, 1, 0, 1],  # 13
    [0, 1, 0, 2],  # 14
    [0, 0, 0, 0]   # 15
]
# 同意贷款申请=1，不同意=0
y = [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0]
# x = np.array(x)
# y = np.array(y)
model = tree.DecisionTreeClassifier(criterion='entropy').fit(x, y)
tree.plot_tree(model)
plt.show()

参数criterion='entropy'使用ID3算法（使用增益判断），'gini'使用CART算法（使用gini系数判断）

k折交叉验证

from sklearn.model_selection import KFold

x = np.zeros(shape=100)
for i in range(100):
    x[i] = np.random.randint(-20, 20)
kf = KFold(n_splits=6)
for train, test in kf.split(x):
    print("训练集索引:", train, "\n", "测试集索引:", test, ";")