本文已参与「新人创作礼」活动, 一起开启掘金创作之路。
保存算法模型
1、加载数据集
data = load_iris()
2、数据集划分
train_x,test_x,train_y,test_y = train_test_split(data['data'],data['target'])
3、特征工程(标准化)
std = StandardScaler()
train_x = std.fit_transform(train_x)
test_x = std.transform(test_x)
4、模型选择
可以选择不同的算法
逻辑回归
lg = LogisticRegression()
lg.fit(train_x,train_y)
KNN算法
lg = KNeighborsClassifier(n_neighbors=3)
lg.fit(train_x,train_y)
朴素贝叶斯
lg = MultinomialNB()
lg.fit(train_x,train_y)
支持向量机
lg = SVC()
lg.fit(train_x,train_y)
决策树
lg = DecisionTreeClassifier()
lg.fit(train_x,train_y)
随机森林
lg = RandomForestClassifier()
lg.fit(train_x,train_y)
保存模型
joblib.dump(std,'lg_std.pkl')
joblib.dump(lg,'lg.pkl')
代码:
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import precision_score,recall_score,f1_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB,MultinomialNB,BernoulliNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import joblib
#1、加载数据集
data = load_iris()
#2、数据集划分
train_x,test_x,train_y,test_y = train_test_split(data['data'],data['target'])
#3、特征工程(标准化)
std = StandardScaler()
train_x = std.fit_transform(train_x)
test_x = std.transform(test_x)
#4、模型选择
# lg = LogisticRegression()
# lg.fit(train_x,train_y)
# lg = KNeighborsClassifier(n_neighbors=3)
# lg.fit(train_x,train_y)
# lg = MultinomialNB()
# lg.fit(train_x,train_y)
# lg = SVC()
# lg.fit(train_x,train_y)
# lg = DecisionTreeClassifier()
# lg.fit(train_x,train_y)
lg = RandomForestClassifier()
lg.fit(train_x,train_y)
joblib.dump(std,'lg_std.pkl')
joblib.dump(lg,'lg.pkl')
使用算法模型
import joblib
import numpy as np
x1 = input("请输入鸢尾花花萼的长度")
x2 = input("请输入鸢尾花花萼的宽度")
x3 = input("请输入鸢尾花花瓣的长度")
x4 = input("请输入鸢尾花花瓣的宽度")
x = np.array([x1,x2,x3,x4]).reshape(1,4)
std = joblib.load('lg_std.pkl')
x = std.transform(x)
lg = joblib.load('lg.pkl')
y = lg.predict(x)
print(y)
算法案例手写数字识别
MNIST数据集是机器学习领域中非常经典的一个数据集,由60000个
训练样本和10000个测试样本组成,每个样本都是一张28 * 28像素的灰度
手写数字图片。
选择算法,并保存模型
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import joblib
with open('mnist.pkl','rb') as f:
train, val ,test = pickle.load(f,encoding='iso-8859-1')
train_x = train[0]
train_y = train[1]
test_x = test[0]
test_y = test[1]
# lr = LogisticRegression()
# lr.fit(train_x,train_y)
rdt = RandomForestClassifier()
rdt.fit(train_x,train_y)
acc = accuracy_score(rdt.predict(train_x),train_y)
print("训练集上的准确率为:",acc)
acc = accuracy_score(rdt.predict(test_x),test_y)
print("测试集上的准确率为:",acc)
joblib.dump(rdt,'rdt.pkl')
加载模型
给出识别图片
颜色转换
import cv2
img = cv2.imread('1.png')
b = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
w = 255 - b
cv2.imwrite('9.png',w)
import joblib
import cv2
from sklearn.preprocessing import StandardScaler
rdt = joblib.load('rdt.pkl')
#读取图片
img = cv2.imread('9.png',0)
img = cv2.resize(img,(28,28))
test = img.reshape(1,28*28)
std = StandardScaler()
test = std.fit_transform(test)
pre = rdt.predict(test)
print(pre)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
下标为7,查找图片
。。0 。。。1。。。2。。。3。。4。。5。。。6。。7。。。8。。。9
算法案例手写数字识别
MNIST数据集是机器学习领域中非常经典的一个数据集,由60000个
训练样本和10000个测试样本组成,每个样本都是一张28 * 28像素的灰度
手写数字图片。
选择算法,并保存模型
import pickle
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import joblib
with open('mnist.pkl','rb') as f:
train, val ,test = pickle.load(f,encoding='iso-8859-1')
train_x = train[0]
train_y = train[1]
test_x = test[0]
test_y = test[1]
# lr = LogisticRegression()
# lr.fit(train_x,train_y)
rdt = RandomForestClassifier()
rdt.fit(train_x,train_y)
acc = accuracy_score(rdt.predict(train_x),train_y)
print("训练集上的准确率为:",acc)
acc = accuracy_score(rdt.predict(test_x),test_y)
print("测试集上的准确率为:",acc)
joblib.dump(rdt,'rdt.pkl')
加载模型
给出识别图片
颜色转换
import cv2
img = cv2.imread('1.png')
b = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
w = 255 - b
cv2.imwrite('9.png',w)
import joblib
import cv2
from sklearn.preprocessing import StandardScaler
rdt = joblib.load('rdt.pkl')
#读取图片
img = cv2.imread('9.png',0)
img = cv2.resize(img,(28,28))
test = img.reshape(1,28*28)
std = StandardScaler()
test = std.fit_transform(test)
pre = rdt.predict(test)
print(pre)
cv2.imshow('img',img)
cv2.waitKey(0)
cv2.destroyAllWindows()
下标为7,查找图片
。。0 。。。1。。。2。。。3。。4。。5。。。6。。7。。。8。。。9