常见回归:SVR, GLM, Rideg回归, Lasso回归, 多项式回归 常见聚类:K-means, DBSCAN 常见分类:SVM, 朴素贝叶斯, 决策树, MLP
多项式回归
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
poly_features = PolynomialFeatures(degree=4)
x_train_poly = poly_features.fit_transform(x_train)
x_test_poly = poly_features.fit_transform(x_test)
xp = np.linspace(x_test.min(), x_test.max(), 1000).reshape(-1, 1)
reg2 = linear_model.LinearRegression().fit(x_train_poly, y_train)
# 测试集散点图
plt.scatter(x_test, y_test, s=1, color='red')
# 测试集预测曲线
plt.plot(xp, reg2.predict(poly_features.fit_transform(xp)), 'b-')
plt.show()
diff = y_test - reg2.predict(x_test_poly)
num = 0
# 设定测试集的预测值和真实值差值小于0.1为合格预测
for i in diff:
if 0.1 > i > -0.1:
num = num + 1
print("测试集样本数:", diff.shape[0])
print("测试集样本预测合格数:", num)
print("测试集合格比例:", num/diff.shape[0])
SVR回归
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import svm
x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
reg1 = svm.SVR().fit(x_train, y_train)
plt.scatter(x_test, y_test, s=1, color='red')
xp1 = np.linspace(x_test.min(), x_test.max(), 1000).reshape(-1, 1)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(xp1, reg1.predict(xp1), 'b-')
plt.show()
diff = y_test - reg1.predict(x_test)
num = 0
# 设定测试集的预测值和真实值差值小于0.1为合格预测
for i in diff:
if 0.1 > i > -0.1:
num = num + 1
print("测试集样本数:", diff.shape[0])
print("测试集样本预测合格数:", num)
print("测试集合格比例:", num / diff.shape[0])
线性回归
reg1 = linear_model.LinearRegression().fit(x_train, y_train)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(x_test, reg1.predict(x_test), 'b-')
plt.show()
print('linear score: %.5f' % reg1.score(x_test, y_test))
岭回归
x = x.reshape(-1, 1)
x_train, x_test, y_train, y_test = train_test_split(x, y1, random_state=0)
reg = linear_model.Ridge().fit(x_train, y_train)
print("training set score:{:.2f}".format(reg.score(x_train, y_train)))
print("test set score:{:.2f}".format(reg.score(x_test, y_test)))
print('Coefficients:', reg.coef_)
print('intercept:', reg.intercept_)
y_pred = reg.predict(x_test)
plt.scatter(x_test, y_test, s=1, color='red')
plt.plot(x_test, reg.predict(x_test), 'b-')
plt.show()
print('Ridge score: %.5f' % reg.score(x_test, y_test))
决策树
from sklearn import tree
# 青年=2,中年=1,老年=0;
# 有工作=1,无工作=0;
# 有房子=1,无房子=0;
# 非常好=2,好=1,一般=0;
x = [
[2, 0, 0, 0], # 1
[2, 0, 0, 1], # 2
[2, 1, 0, 1], # 3
[2, 1, 1, 0], # 4
[2, 0, 0, 0], # 5
[1, 0, 0, 0], # 6
[1, 0, 0, 1], # 7
[1, 1, 1, 1], # 8
[1, 0, 1, 2], # 9
[1, 0, 1, 2], # 10
[0, 0, 1, 2], # 11
[0, 0, 1, 1], # 12
[0, 1, 0, 1], # 13
[0, 1, 0, 2], # 14
[0, 0, 0, 0] # 15
]
# 同意贷款申请=1,不同意=0
y = [0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0]
# x = np.array(x)
# y = np.array(y)
model = tree.DecisionTreeClassifier(criterion='entropy').fit(x, y)
tree.plot_tree(model)
plt.show()
参数criterion='entropy'使用ID3算法(使用增益判断),'gini'使用CART算法(使用gini系数判断)
k折交叉验证
from sklearn.model_selection import KFold
x = np.zeros(shape=100)
for i in range(100):
x[i] = np.random.randint(-20, 20)
kf = KFold(n_splits=6)
for train, test in kf.split(x):
print("训练集索引:", train, "\n", "测试集索引:", test, ";")