Mahout开发实战(一)

127 阅读3分钟

持续创作,加速成长!这是我参与「掘金日新计划 · 6 月更文挑战」的第13天,点击查看活动详情

1、前言

上一篇为大家介绍了开发前的基本准备,本次就为大家分享下在实际开发中遇到的一些问题以及总结。

2、代码结构

主要内容就是recommender、service包下,对Mahout进行了二次封装。

image.png

3、推荐接口

定义好统一的推荐接口,方便管理。后续再引入其他推荐算法是,需要先实现此接口。

/**
 * @Title: RecommenderService
 * @Description: 推荐接口
 */
public interface RecommenderService {

    /**
     * 推荐固定条数
     * @param userId
     * @return
     * @throws TasteException
     */
    RecommendResult recommend(final long userId) throws TasteException;

    /**
     * 推荐 recommendNum 条数据
     * @param userId
     * @param recommendNum
     * @return
     * @throws TasteException
     */
    RecommendResult recommend(final long userId, final int recommendNum) throws TasteException;

    /**
     * 刷新推荐队列
     * @param userId
     * @return
     * @throws TasteException
     */
    void recommendData(long userId) throws TasteException;

    /**
     * 计算评分
     * @return
     * @throws TasteException
     */
    double evaluateScore() throws TasteException;

    /**
     * 计算评分
     * @param recommenderBuilder   推荐构造器
     * @param dataModel            数据模型
     * @param trainingPercentage   训练百分比
     * @param evaluationPercentage 评估百分比
     * @return
     * @throws TasteException
     */
    double evaluateScore(RecommenderBuilder recommenderBuilder, DataModel dataModel,
                         double trainingPercentage, double evaluationPercentage) throws TasteException;

    /**
     * 计算查全率、查准率等
     * @return
     * @throws TasteException
     */
    IRStatistics evaluateIRStatistics() throws TasteException;

    /**
     * 计算查全率和查准率
     * @param recommenderBuilder   推荐算法构造器
     * @param dataModel            数据模型
     * @param rescorer             如果有的话,在计算推荐时使用
     * @param at                   如,“精确度为5”。评估精度时要考虑的建议数量,
     * @param relevanceThreshold   优先值至少为该值的项目被认为是“相关的”用于计算的目的
     * @param evaluationPercentage 评估百分比
     * @return
     * @throws TasteException
     */
    IRStatistics evaluateIRStatistics(RecommenderBuilder recommenderBuilder,
                                      DataModel dataModel,
                                      IDRescorer rescorer,
                                      int at,
                                      double relevanceThreshold,
                                      double evaluationPercentage) throws TasteException;


}

4、抽象推荐Service

定义的抽象Service主要是将通用的方法在抽象类里实现,避免代码冗余。

/**
 * @Title: AbstractRecommenderService
 * @Description: 推荐抽象类
 */
@Service
public abstract class AbstractRecommenderService {

    protected AbstractRecommender recommender;

    @Resource
    private RecommendUtil RecommendUtil;

    public RecommendResult recommend(final long userId) throws TasteException {
        //默认推荐1条数据
        return recommend(userId, NumericConstant.ONE);
    }

    public RecommendResult recommend(long userId, int recommendNum) throws TasteException {
        if (userId <= 0 || recommendNum <= 0) {
            return null;
        }
        RecommendResult result = new RecommendResult();
        result.setUserId(userId);
        //根据long值的userId转换成zshy__common_info_id
        result.setZshy__common_info_id(RecommendUtil.getCommonInfoId(userId));
        //填充推荐数据
        List<RecommendContentPO> data = recommendByCF(userId, recommendNum);
        result.setRecommendContents(data);
        return result;
    }

    /**
     * 获取推荐结果
     * @param userId
     * @param recommendNum
     * @return
     * @throws TasteException
     */
    protected List<RecommendContentPO> recommendByCF(final long userId, final int recommendNum) throws TasteException {
        List<RecommendedItem> recommendData = recommender.recommend(userId, recommendNum);
        if (CollectionUtils.isEmpty(recommendData)) {
            return null;
        }
        //转换数据
        List<RecommendContentPO> resList = recommendData.stream().map(item-> {
            MongoUserBehavior userBehavior = RecommendUtil.getUserBehaviorByBehaviorId(item.getItemID());
            RecommendContentPO recommendContentPO = new RecommendContentPO(item.getItemID(),
                    userBehavior.getBehavior_object_id(), userBehavior.getBehavior_object_type(), userBehavior.getScore());
            return recommendContentPO;
        }).collect(Collectors.toList());
        return resList;
    }

    /**
     * 计算评分
     * @return
     * @throws TasteException
     */
    public double evaluateScore() throws TasteException {
        return recommender.evaluateScore();
    }

    /**
     * 计算评分
     * @param recommenderBuilder   推荐构造器
     * @param dataModel            数据模型
     * @param trainingPercentage   训练百分比
     * @param evaluationPercentage 评估百分比
     * @return
     * @throws TasteException
     */
    public double evaluateScore(RecommenderBuilder recommenderBuilder,
                                DataModel dataModel,
                                double trainingPercentage,
                                double evaluationPercentage) throws TasteException {
        return recommender.evaluateScore(recommenderBuilder, dataModel, trainingPercentage, evaluationPercentage);
    }

    /**
     * 计算查全率,召回率等
     * @return
     * @throws TasteException
     */
    public IRStatistics evaluateIRStatistics() throws TasteException {
        return recommender.evaluateIRStatistics();
    }

    /**
     * 计算查全率和查准率
     * @param recommenderBuilder   推荐算法构造器
     * @param dataModel            数据模型
     * @param rescorer             如果有的话,在计算推荐时使用
     * @param at                   如,“精确度为5”。评估精度时要考虑的建议数量,
     * @param relevanceThreshold   优先值至少为该值的项目被认为是“相关的”用于计算的目的
     * @param evaluationPercentage 评估百分比
     * @return
     * @throws TasteException
     */
    public IRStatistics evaluateIRStatistics(RecommenderBuilder recommenderBuilder,
                                             DataModel dataModel,
                                             IDRescorer rescorer,
                                             int at,
                                             double relevanceThreshold,
                                             double evaluationPercentage) throws TasteException {
        return recommender.evaluateIRStatistics(recommenderBuilder, dataModel, rescorer, at, relevanceThreshold,
                evaluationPercentage);
    }

}

5、基于物品推荐Service实现类

可以看到,封装了抽象推荐类后,具体的推荐实现impl非常的简洁,同时后期扩展也很方便。

/**
 * @Title: ItemBasedCFRecommenderServiceImpl
 * @Description: 基于物品的协同过滤推荐
 */
@Slf4j
@Service(value = "ItemBasedCFRecommenderServiceImpl")
public class ItemBasedCFRecommenderServiceImpl extends AbstractRecommenderService implements RecommenderService {

    @Resource
    private DataModel dataModel;

    @Resource
    private WriteRecContentsMapper writeRecContentsMapper;

    @PostConstruct
    public void init() throws TasteException {
        recommender = new ItemBasedCfRecommender(dataModel);
    }

    @Override
    @Async(value = AsyncNameConstant.RECOMMEND)
    public void recommendData(long userId) {
        try{
            if (recommender == null){
                synchronized (ItemBasedCFRecommenderServiceImpl.class){
                    if (recommender == null){
                        recommender = new ItemBasedCfRecommender(dataModel);
                    }
                }
            }
            //获取推荐数据
            RecommendResult recommendResult = recommend(userId);
            recommendResult.setRecommend_type(RecommendTypeEnum.ITEM_BASED.getCode());
            if (!CollectionUtils.isEmpty(recommendResult.getRecommendContents())) {
                //入库
                writeRecContentsMapper.insertRecContents(recommendResult);
            }else {
                log.info("根据用户推荐-未获取到推荐结果,用户ID:{}", userId);
            }
        }catch (Exception e) {
            log.error("根据物品推荐出错,错误信息:{},用户ID:{}", e.getMessage(), userId, e);
        }
    }
}

好了、本期就先介绍到这里,有什么需要交流的,大家可以随时私信我。😊