生成样本点:
import numpy as np
import matplotlib.pyplot as plt
def true_fun(X):
return 1.5*X + 0.2
np.random.seed(0)
n_samples = 30
X_train = np.sort(np.random.rand(n_samples))
y_train = (true_fun(X_train) + np.random.randn(n_samples) * 0.05).reshape(n_samples,1)
线性回归实现:
data_X = []
for x in X_train:
data_X.append([1,x])
data_X = np.array((data_X))
m,p = np.shape(data_X)
max_iter = 1000
weights = np.ones((p,1))
alpha = 0.1
for i in range(0,max_iter):
error = np.dot(data_X,weights)- y_train
gradient = data_X.transpose().dot(error)/m
weights = weights - alpha * gradient
print("输出参数w:",weights[1:][0])
print("输出参数:b",weights[0])
结果:
输出参数w: [1.445439]
输出参数:b [0.22683262]
sklearn实现:
import numpy as np
from sklearn.linear_model import LinearRegression # 导入线性回归模型
import matplotlib.pyplot as plt
def true_fun(X):
return 1.5*X + 0.2
np.random.seed(0) # 随机种子
n_samples = 30
'''生成随机数据作为训练集'''
X_train = np.sort(np.random.rand(n_samples))
y_train = (true_fun(X_train) + np.random.randn(n_samples) * 0.05).reshape(n_samples,1)
model = LinearRegression() # 定义模型
model.fit(X_train[:,np.newaxis], y_train) # 训练模型
print("输出参数w:",model.coef_) # 输出模型参数w
print("输出参数:b",model.intercept_) # 输出参数b
X_test = np.linspace(0, 1, 100)
plt.plot(X_test, model.predict(X_test[:, np.newaxis]), label="Model")
plt.plot(X_test, true_fun(X_test), label="True function")
plt.scatter(X_train,y_train) # 画出训练集的点
plt.legend(loc="best")
plt.show()
输出参数w: [[1.4474774]]
输出参数:b [0.22557542]
多项式回归:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import cross_val_score
def true_fun(X):
return np.cos(1.5 * np.pi * X)
np.random.seed(0)
n_samples = 30
degrees = [1, 4, 15] # 多项式最高次
X = np.sort(np.random.rand(n_samples))
y = true_fun(X) + np.random.randn(n_samples) * 0.1
plt.figure(figsize=(14, 5))
for i in range(len(degrees)):
ax = plt.subplot(1, len(degrees), i + 1)
plt.setp(ax, xticks=(), yticks=())
polynomial_features = PolynomialFeatures(degree=degrees[i],
include_bias=False)
linear_regression = LinearRegression()
pipeline = Pipeline([("polynomial_features", polynomial_features),
("linear_regression", linear_regression)]) # 使用pipline串联模型
pipeline.fit(X[:, np.newaxis], y)
# 使用交叉验证
scores = cross_val_score(pipeline, X[:, np.newaxis], y,
scoring="neg_mean_squared_error", cv=10)
X_test = np.linspace(0, 1, 100)
plt.plot(X_test, pipeline.predict(X_test[:, np.newaxis]), label="Model")
plt.plot(X_test, true_fun(X_test), label="True function")
plt.scatter(X, y, edgecolor='b', s=20, label="Samples")
plt.xlabel("x")
plt.ylabel("y")
plt.xlim((0, 1))
plt.ylim((-2, 2))
plt.legend(loc="best")
plt.title("Degree {}\nMSE = {:.2e}(+/- {:.2e})".format(
degrees[i], -scores.mean(), scores.std()))
plt.show()
逻辑回归:
numpy实现:
import sys
from pathlib import Path
curr_path = str(Path().absolute())
parent_path = str(Path().absolute().parent)
sys.path.append(parent_path) # add current terminal path to sys.path
import numpy as np
from Mnist.load_data import load_local_mnist
(x_train, y_train), (x_test, y_test) = load_local_mnist(one_hot=False)
# print(np.shape(x_train),np.shape(y_train))
ones_col=[[1] for i in range(len(x_train))] # 生成全为1的二维嵌套列表,即[[1],[1],...,[1]]
x_train_modified=np.append(x_train,ones_col,axis=1)
ones_col=[[1] for i in range(len(x_test))]
x_test_modified=np.append(x_test,ones_col,axis=1)
# print(np.shape(x_train_modified))
# Mnsit有0-9十个标记,由于是二分类任务,所以可以将标记0的作为1,其余为0用于识别是否为0的任务
y_train_modified=np.array([1 if y_train[i]==1 else 0 for i in range(len(y_train))])
y_test_modified=np.array([1 if y_test[i]==1 else 0 for i in range(len(y_test))])
n_iters=10
x_train_modified_mat = np.mat(x_train_modified)
theta = np.mat(np.zeros(len(x_train_modified[0])))
lr = 0.01 # 学习率
def sigmoid(x):
'''sigmoid函数
'''
return 1.0/(1+np.exp(-x))
for i_iter in range(n_iters):
for n in range(len(x_train_modified)):
hypothesis = sigmoid(np.dot(x_train_modified[n], theta.T))
error = y_train_modified[n]- hypothesis
grad = error*x_train_modified_mat[n]
theta += lr*grad
print('LogisticRegression Model(learning_rate={},i_iter={})'.format(
lr, i_iter+1))
sklearn:
import sys
from pathlib import Path
curr_path = str(Path().absolute())
parent_path = str(Path().absolute().parent)
sys.path.append(parent_path) # add current terminal path to sys.path
from Mnist.load_data import load_local_mnist
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
(X_train, y_train), (X_test, y_test) = load_local_mnist(normalize = False,one_hot = False)
X_train, y_train= X_train[:2000], y_train[:2000]
X_test, y_test = X_test[:200],y_test[:200]
model = LogisticRegression(solver='lbfgs', max_iter=500) # 拟牛顿法
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred))