本文已参与[新人创作礼],一起开启掘金之路。
LR
对 对数几率 进行 线性分类器建模。
log p/(1-p) = w*x+b
p = 1 / 1+e^(-w*x-b)
import numpy as np
from sklearn import preprocessing
from sklearn import datasets
class LogisticRegression:
def __init__(self,learning_rate=0.1,lamb=0.001,iters=1000,kernel='sigmoid'):
self.learning_rate = learning_rate #学习率,default=0.1
self.lamb = lamb #正则化参数,default=0.001
self.iters = iters #最大迭代次数
self.kernel = kernel #内核函数,sig/softmax
self.theta = np.zeros((1,1)) #声明参数是二维array的格式 1*1的
self.cost = [] #记录损失值
@staticmethod
def sigmoid(features,theta):
inner = np.dot(features,theta.T)
return 1/(1+np.exp(-inner))
@staticmethod
def softmax(features,theta):
inner = features.dot(theta.T)
return np.exp(inner)/np.sum(np.exp(inner),axis=1,keepdims=True) #axis=1 行维度,相加
def cal_cost(self,features,target,theta,lamb):
m = features.shape[0]
if self.kernel=='sigmoid':
inner = self.sigmoid(features,theta) #激活后的 x*w
else:
inner = self.softmax(features,theta)
first = np.multiply(-target,np.log(inner)) #前半部分
second = np.multiply(-(1-target),np.log(1-inner)) #后半部分
reg = lamb/(2*m)*np.sum(np.power(theta[:,1:],2)) #正则化
return np.sum(first+second)/m + reg #mean_ce_loss + reg
def training(self,features,target):
features = np.insert(features,0,1,axis=1) #在第0列,插入值为1的一列,为了和常数项theta0相乘。(参数矩阵把b和W写在一起,原X就要加一列)
m,n = features.shape #m=150,n=5(4特征数+1)
k = target.shape[1] #目标类别数 3
self.theta = np.zeros((k,n)) #n=特征数+1 3*5
for _ in range(self.iters): #梯度下降
if self.kernel == 'sigmoid':
inner = self.sigmoid(features,self.theta)
else:
inner = self.softmax(features,self.theta) # 150*5 (5*3) = 150*3
error = inner - target #误差
grad = error.T.dot(features)/m + self.lamb/m * self.theta #dW 计算梯度
grad[:,0] = np.sum(error,axis=0) #db = sum(p-y)重新计算常数项theta0的梯度,取消正则化
self.theta -= self.learning_rate*grad #更新theta
self.cost.append(self.cal_cost(features,target,self.theta,self.lamb)) #添加当前的损失值
return self.cost #原本是空return
def predict(self,features,threshold=0.5):
features = np.insert(features,0,1,axis=1)
if self.kernel == 'sigmoid':
inner = self.sigmoid(features,self.theta)
return [1 if i[0]>=threshold else 0 for i in inner]
else:
inner = self.softmax(features,self.theta)
return np.argmax(inner,axis = 1) #概率最大类别的索引
def test_sigmoid():
features,target = datasets.make_classification(n_samples=300)
target = target.reshape(target.shape[0],1)
lr = LogisticRegression()
lr.training(features,target) #训练
predict = lr.predict(features)
correct = [0 if a^b else 1 for a,b in zip(predict,target)]
print('accuracy={}%'.format(accuracy*100))
def test_softmax():
dataset = datasets.load_iris() #3分类
features,target = dataset['data'],dataset['target']
target = target.reshape(-1,1) #features: 150条数据,4个特征 target:150条数据,1个标签
enc = preprocessing.OneHotEncoder()
target_train = enc.fit_transform(target).toarray() #one_hot 编码
lr = LogisticRegression(learning_rate=0.1,lamb=0,iters=5000,kernel='softmax')
cost = lr.training(features,target_train)
print(cost[-5:]) #后5个
predict = lr.predict(features)
correct = [1 if a==b else 0 for a,b in zip(predict,target)]
accuracy = correct.count(1)/len(correct)
print('accuracy={}%'.format(accuracy*100))
test_softmax()