《Redis应用实例》Java实现(7):使用HyperLogLog实现唯一计数器

43 阅读2分钟

原书第7章。

package com.foxbill.redisinaction;

import redis.clients.jedis.Jedis;

/**
 * 1. HyperLogLog是Redis提供的用于基数统计的概率数据结构
 * 2. 主要特点:占用空间小(约12KB),即使统计10亿个不同元素
 * 3. 误差率低:标准误差约为0.81%
 * 4. 适用于:网站访问量统计(UV)、用户行为统计等需要进行大量去重计数但不需要精确结果的场景
 */
public class Chapter7 {
    static String hyperLogLogKey1 = "hyperloglog:example:visitors:day1";
    static String hyperLogLogKey2 = "hyperloglog:example:visitors:day2";
    static String hyperLogLogKeyMerged = "hyperloglog:example:visitors:twodays";

    public static void start(Jedis jedis) {
        init(jedis);
        PFADD(jedis);
        PFCOUNT(jedis);
    }

    private static void init(Jedis jedis) {
		/*先清除旧数据*/
        jedis.del(hyperLogLogKey1);
        jedis.del(hyperLogLogKey2);
        jedis.del(hyperLogLogKeyMerged);
    }


    // 示例1:使用PFADD添加元素到HyperLogLog
    private static void PFADD(Jedis jedis) {
        // 添加第1天的访客ID
        for (int i = 1; i <= 1000; i++) {
            jedis.pfadd(hyperLogLogKey1, "user:" + i);
        }
        // 添加第2天的访客ID(包含一些与第1天重复的访客)
        for (int i = 500; i <= 1500; i++) {
            jedis.pfadd(hyperLogLogKey2, "user:" + i);
        }
        System.out.println("- 已向" + hyperLogLogKey1 + "添加1000个用户ID");
        System.out.println("- 已向" + hyperLogLogKey2 + "添加1001个用户ID(包含与第1天重叠的501个)");
    }

    // 示例2:使用PFCOUNT获取基数估算
    private static void PFCOUNT(Jedis jedis) {
        // 统计单个HyperLogLog
        long count1 = jedis.pfcount(hyperLogLogKey1);
        long count2 = jedis.pfcount(hyperLogLogKey2);
        System.out.println("- " + hyperLogLogKey1 + "的基数估算: " + count1);
        System.out.println("- " + hyperLogLogKey2 + "的基数估算: " + count2);

        // 统计多个HyperLogLog的并集基数
        long unionCount = jedis.pfcount(hyperLogLogKey1, hyperLogLogKey2);
        System.out.println("- 两个HyperLogLog的并集基数估算: " + unionCount);
        System.out.println("- 实际唯一用户数: 1500");
        System.out.println("- 误差情况: HyperLogLog估算值与实际值的误差在0.81%左右");

        //使用PFMERGE合并多个HyperLogLog
        System.out.println("\n【示例3:使用PFMERGE合并HyperLogLog】");
        jedis.pfmerge(hyperLogLogKeyMerged, hyperLogLogKey1, hyperLogLogKey2);
        long mergedCount = jedis.pfcount(hyperLogLogKeyMerged);
        System.out.println("- 已合并两个HyperLogLog到" + hyperLogLogKeyMerged);
        System.out.println("- 合并后的基数估算: " + mergedCount);

    }

}


📌 小结

在大数据量中进行去重计数,而不需要精确结果的场景,可以使用HyperLogLog来实现高效统计,误差约为0.81%。