机器学习(2)贝叶斯判别模型

130 阅读1分钟

正文

本文已参与「新人创作礼」活动,一起开启掘金创作之路。在现实世界中,由许多客观现象的发生,就每一次观察和测量来说,即使在基本条件保持不变的情况下也具有不确定性。只有在大量重复的观察下,其结果才能呈现出某种规律性,即对它们观察到的特征具有统计特性。特征值不再是一个确定的向量,而是一个随机向量。此时,只能利用模式集的统计特性来分类,以使分类器发生错误的概率最小。这就是贝叶斯判别的基本出发点。原文链接:blog.csdn.net/csdn0123zl/…

python实现贝叶斯判别模型

import numpy as np

class Bayes(object):

def __init__(self, model = "multinomial",alpha = 1, log = False):

"""

:param model: "multinomial" : Naive Bayes classifier for multinomial models

"gaussian" :Naive Bayes classifier for gaussian model

"bernoulli" : Naive Bayes classifier for bernoulli model

:param alpha: smoothing

if alpha = 0 is no smoothing,

if 0 < alpha < 1 is Lidstone smoothing

if alpha = 1 is Laplace smoothing

:param log: is True

x1 * x2 * x3 to log(x1 * x2 * x3)

"""

self.model = model

self.alpha = alpha

self.log = log

def _probabilities_for_feature(self, feature):

if self.model == "multinomial" or "bernoulli":

feature_class_list = np.unique(feature)

total_mum = len(feature)

feature_value_prob = {}

for value in feature_class_list:

feature_value_prob[value] = (np.sum(np.equal(feature, value)) + self.alpha) / \

(total_mum + len(feature_class_list) * self.alpha)

feature_value_prob['error'] = self.alpha / (len(feature_class_list) * self.alpha)

return feature_value_prob

if self.model == "gaussian":

_meam = np.mean(feature)

_std = np.std(feature)

return (_meam, _std)

def fit(self, X, y, threshold = None):

X_ = X.copy()

self.feature_len = X_.shape[1]

self.threshold = threshold

if self.model == "bernoulli":

X_ = self._bernoulli_transform(X_)

self.classes_list = np.unique(y)

total_mum = len(y)

self.classes_prior = {}

# P(y = Ck )

for value in self.classes_list:

self.classes_prior[value] = (np.sum(np.equal(y, value)) + self.alpha) / (total_mum + len(self.classes_list) * self.alpha)

# P( xj | y= Ck )

self.conditional_prob = {}

for c in self.classes_list:

self.conditional_prob[c] = {}

# Traversal features

for i in range(len(X_[0])):

feature_value = self._probabilities_for_feature(X_[np.equal(y, c)][:, i])

self.conditional_prob[c][i] = feature_value

return self

def _get_xj_prob(self, values_probs,target_value):

if self.model == "multinomial" or "bernoulli":

if target_value not in values_probs.keys():

return values_probs['error']

else:

return values_probs[target_value]

elif self.model == "gaussian":

target_prob = (1 / np.sqrt(2 * np.pi * np.square(values_probs[1]))) * \

np.exp(-np.square(target_value - values_probs[0]) / (2*np.square(values_probs[1])))

return target_prob

def _bernoulli_transform(self, X):

assert len(X[0]) == len(self.threshold)

for index in range(len(X[0])):

X[:, index] = np.where(X[:, index] >= self.threshold[index], 1, 0)

return X

def predict(self, X):

X_ = X.copy()

if self.model == "bernoulli":

X_ = self._bernoulli_transform(X_)

assert len(X_.shape) == self.feature_len

result = np.zeros([len(X_)])

for r in range(len(X_)):

result_list = []

for index in range(len(self.classes_list)):

c_prior = self.classes_prior[self.classes_list[index]]

if self.log == False:

current_conditional_prob = 1.0

for i in range(len(X_[0])):

current_conditional_prob *= self._get_xj_prob(self.conditional_prob[self.classes_list[index]][i], X_[r,i])

current_conditional_prob *= c_prior

result_list.append(current_conditional_prob)

else:

current_conditional_prob = 0

for i in range(len(X[0])):

current_conditional_prob += np.log(self._get_xj_prob(self.conditional_prob[self.classes_list[index]][i], X[r,i]))

current_conditional_prob += np.log(c_prior)

    result_list.append(current_conditional_prob)

result[r] = self.classes_list[np.argmax(np.array(result_list))]

return result

具体可以参考原文链接:blog.csdn.net/weixin_3074…

结语

再见!