协同过滤推荐算法

95 阅读2分钟
#-*-coding:utf-8-*-
import os
from math import sqrt
from .models import Record

def getallratings(): #从数据库里读取评分数据
    users = {}
    # os.path.dirname(os.path.abspath(__file__))
    res = Record.objects.all()
    for line in res:
        if str(line.user.id) not in users:
            users[str(line.user.id)] = {}
        users[str(line.user.id)][line.item]=line.rate  # cache文件里的排序 uid --mark--bid     
    print(users)  
    return users

class recommender:
    #data:数据集,这里指users
    #k:表示得出最相近的k的近邻
    #metric:表示使用计算相似度的方法
    #n:表示推荐的个数
    def __init__(self, data, k=3, metric='pearson', n=15):

        self.k = k #默认找到三个相似的用户 
        self.n = n 
        self.username2id = {}
        self.userid2name = {}
        self.productid2name = {} 
        self.metric = metric
        if self.metric == 'pearson':
            self.fn = self.pearson
        if type(data).__name__ == 'dict':
            self.data = data
      
    def convertProductID2name(self, id):
        if id in self.productid2name:
            return self.productid2name[id]
        else:
            return id

    #定义的计算相似度的公式,用的是皮尔逊相关系数计算方法
    def pearson(self, rating1, rating2):
        sum_xy = 0
        sum_x = 0
        sum_y = 0
        sum_x2 = 0
        sum_y2 = 0
        n = 0
        for key in rating1:
            if key in rating2:
                n += 1
                x = rating1[key]
                y = rating2[key]
                sum_xy += x * y
                sum_x += x
                sum_y += y
                sum_x2 += pow(x, 2)
                sum_y2 += pow(y, 2)
        if n == 0:
            return 0
        
        #皮尔逊相关系数计算公式 
        denominator = sqrt(sum_x2 - pow(sum_x, 2) / n)  * sqrt(sum_y2 - pow(sum_y, 2) /n)
        if denominator == 0:
            return 0
        else:
            return (sum_xy - (sum_x * sum_y) / n) / denominator
    
    def computeNearestNeighbor(self, username):
        distances = []
        for instance in self.data:
            if instance != username:
                distance = self.fn(self.data[username],self.data[instance])
                distances.append((instance, distance))

        distances.sort(key=lambda artistTuple: artistTuple[1],reverse=True)
        return distances


    #推荐算法的主体函数
    def recommend(self, user):
        #定义一个字典,用来存储推荐的物品和分数
        recommendations = {}
        #计算出user与所有其他用户的相似度,返回一个list
        nearest = self.computeNearestNeighbor(user)
        print( nearest ) 
        
        userRatings = self.data[user]
        #print userRatings
        totalDistance = 0.0
        #得出最近的k个近邻的总距离
        for i in range(self.k):
            totalDistance += nearest[i][1]

        if totalDistance==0.0:
            totalDistance=1.0
            
        #将与user最相近的k个人中user没有看过的物品推荐给user,并且这里又做了一个分数的计算排名
        for i in range(self.k):
            
            #第i个人的与user的相似度,转换到[0,1]之间
            weight = nearest[i][1] / totalDistance
            
            #第i个人的name
            name = nearest[i][0]

            #第i个用户看过的物品和相应的打分
            neighborRatings = self.data[name]

            for artist in neighborRatings:
                if not artist in userRatings:
                    if artist not in recommendations:
                        recommendations[artist] = (neighborRatings[artist] * weight)
                    else:
                        recommendations[artist] = (recommendations[artist]+ neighborRatings[artist] * weight)

        recommendations = list(recommendations.items())
        recommendations = [(self.convertProductID2name(k), v) for (k, v) in recommendations] #把推荐结果中的ID转为名称
        
        #做了一个排序
        recommendations.sort(key=lambda artistTuple: artistTuple[1], reverse = True)
        return recommendations[:self.n]
 
 # 根据传入ID得到推荐列表
def getrecommend(id):
    id_list = []
    data=getallratings()#读取数据 
    r = recommender(data) #实例化推荐类
    k = r.recommend("%s" % id)
    for i in range(len(k)):
        id_list.append(k[i][0])
    print( '====>', id_list )
    return id_list  #返回推荐的的ID列表