逻辑回归
1.什么是逻辑回归
令 ,
与线性回归一样,它们都具有
我们定义逻辑回归的预测函数为
逻辑回归模型的损失函数
大概是使用最大似然估计
可以利用梯度下降算法求解模型参数
则参数更新为
按理说可以c++实现,不过鉴于心智负担放弃了
有两种方式为了防止过拟合,可以在损失函数之后加上 或者 项
分别叫做 正则化和 正则化
2.py实现验证
经典梯度下降算法,没有加正则化(啊啊啊不想写了)
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
def sigmoid(x):
x_ravel = x.ravel()
length = len(x_ravel)
y = []
for index in range(length):
if x_ravel[index] >= 0:
y.append(1.0 / (1 + np.exp(-x_ravel[index])))
else:
y.append(np.exp(x_ravel[index]) / (np.exp(x_ravel[index]) + 1))
return np.array(y).reshape(x.shape)
class Logistic:
def __init__(self, lr, max_iter):
self.theta = None
self.x = None
self.y = None
self.lr = lr
self.max_iter = max_iter
def fit(self, in_arr, label_arr):
self.x = in_arr
self.y = label_arr
self.theta = np.random.rand(in_arr.shape[1], 1)
for _ in range(self.max_iter):
for i in range(self.x.shape[0]):
x = self.x[i].reshape(1, self.x[i].size)
y = self.y[i]
h = sigmoid(np.dot(self.theta.T, x.T))
grad = (h - y) * x
self.theta -= self.lr * grad.T
def score(self, x, y):
hit = 0
size = y.size
for i in range(x.shape[0]):
ux = x[i].reshape(1, self.x[i].size)
uy = y[i]
h = sigmoid(np.dot(self.theta.T, ux.T))
h = np.array(h, dtype=np.int32)
if h == uy:
hit += 1
return hit / size
data = load_breast_cancer()
X = data.data
Y = data.target
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.6)
LR = Logistic(0.2, 100)
LR.fit(x_train, y_train)
score = LR.score(x_test, y_test)
print('score:', score)