那些年背过的题：Redis字典设计与实现在 Redis 中，字典（或哈希表）是其内部数据结构的基础之一。Redis 使用

在 Redis 中，字典（或哈希表）是其内部数据结构的基础之一。Redis 使用字典来实现键值对存储、集合、哈希、ZSET 等数据类型。理解 Redis 的字典设计和实现有助于更好地掌握 Redis 的性能和工作原理。

字典的定义

Redis 的字典主要由两个结构体定义：dict 和 dictEntry。以下是它们的简化结构：

dictEntry 结构

每个 dictEntry 是一个键值对条目，包括指向下一个条目的指针（用于解决哈希冲突）。

typedef struct dictEntry {
    void *key;                  // 键
    union {
        void *val;              // 值
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;     // 链接到下一个条目，用于冲突链表
} dictEntry;

dict 结构

dict 是字典的主体，包含哈希表和其他必要的信息。

typedef struct dictht {
    dictEntry **table;          // 哈希表数组
    unsigned long size;         // 哈希表大小
    unsigned long sizemask;     // 哈希表大小掩码，用于计算索引
    unsigned long used;         // 已使用的槽数量
} dictht;

typedef struct dict {
    dictType *type;             // 类型特定函数
    void *privdata;             // 私有数据
    dictht ht[2];               // 两个哈希表，用于渐进式 rehash
    long rehashidx;             // 重新哈希的进度，如果没有进行则为 -1
    unsigned long iterators;    // 当前运行的迭代器数量
} dict;

哈希算法

Redis 使用了 MurmurHash2 算法进行哈希计算，这是一种速度快、散列均匀的哈希算法。

冲突解决

当发生哈希冲突时，Redis 使用链地址法（Separate Chaining），即通过链表将多个键值对存储在同一个槽中。

渐进式 Rehashing

为了避免在一次性扩展或收缩哈希表时可能带来的延迟，Redis 采用了渐进式 rehashing 技术。rehashing 过程如下：

触发条件：当哈希表中的键值对数量达到一定阈值时，Redis决定需要扩展或收缩哈希表。
分配新表：Redis创建一个新的哈希表，这个表的大小是当前哈希表大小的两倍或更小（用于收缩）。
标记迁移状态：在字典结构中标记正在进行渐进式Rehash，并记录当前迁移的位置（通常从0开始）。
渐进式迁移：每次对哈希表进行读写操作时（如查询、插入或删除），Redis不仅执行该操作，还会顺便将旧哈希表中的部分键值对迁移到新哈希表中。具体的迁移量取决于操作的复杂度和系统负载，迁移单位：bucket
更新位置：每次迁移后，更新当前迁移的位置指针，指向下一个待迁移的区域。
完成迁移：当所有键值对都迁移完毕后，Redis用新哈希表替换旧哈希表，并结束渐进式Rehash状态。

优点

避免卡顿：通过将Rehash过程分散到多个操作中，避免了一次性大规模迁移可能带来的系统卡顿。
保持响应性能：即使在进行Rehash期间，系统也可以处理其他请求，保持高响应性能。
渐进迁移：逐步迁移确保每次操作只需少量额外开销，不会显著影响系统整体性能。

字典操作

基本的字典操作包括插入、查找和删除。这些操作在 Redis 源码的 dict.c 文件中实现。以下是一些关键函数的简要说明：

插入操作

插入一个键值对到字典中：

int dictAdd(dict *d, void *key, void *val) {
    dictEntry *entry = dictAddRaw(d, key, NULL);
    if (!entry) return DICT_ERR;
    dictSetVal(d, entry, val);
    return DICT_OK;
}

dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing) {
    /* ...检查是否需要 rehash，并进行相应操作... */
    /* ...计算键的哈希值，并找到对应的槽位... */
    entry = zmalloc(sizeof(*entry));
    entry->next = d->ht[0].table[index];
    d->ht[0].table[index] = entry;
    d->ht[0].used++;
    dictSetKey(d, entry, key);
    return entry;
}

查找操作

根据键查找对应的值：

dictEntry *dictFind(dict *d, const void *key) {
    unsigned int h, idx, table;
    dictEntry *he;

    if (d->ht[0].size == 0) return NULL; /* 空字典 */
    if (dictIsRehashing(d)) _dictRehashStep(d); /* 如果正在进行 rehash，则执行单步 rehash */

    for (table = 0; table <= 1; table++) {
        h = dictHashKey(d, key);
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        while(he) {
            if (dictCompareKeys(d, key, he->key))
                return he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) return NULL; /* 没有找到且没有在 rehash，提前返回 */
    }
    return NULL; /* 没有找到 */
}

删除操作

从字典中删除一个键值对：

int dictDelete(dict *d, const void *key) {
    return dictGenericDelete(d, key, 1);
}

static int dictGenericDelete(dict *d, const void *key, int nofree) {
    unsigned int h, idx, table;
    dictEntry *he, *prevHe;

    if (d->ht[0].size == 0) return DICT_ERR; /* 空字典 */
    if (dictIsRehashing(d)) _dictRehashStep(d);

    for (table = 0; table <= 1; table++) {
        h = dictHashKey(d, key);
        idx = h & d->ht[table].sizemask;
        he = d->ht[table].table[idx];
        prevHe = NULL;
        while (he) {
            if (dictCompareKeys(d, key, he->key)) {
                if (prevHe)
                    prevHe->next = he->next;
                else
                    d->ht[table].table[idx] = he->next;
                if (!nofree) {
                    dictFreeKey(d, he);
                    dictFreeVal(d, he);
                }
                zfree(he);
                d->ht[table].used--;
                return DICT_OK;
            }
            prevHe = he;
            he = he->next;
        }
        if (!dictIsRehashing(d)) break;
    }
    return DICT_ERR; /* 没有找到 */
}

渐进式 Rehash

渐进式Rehash的触发

Redis决定是否需要Rehash主要通过以下函数：

int _dictExpandIfNeeded(dict *d) {
    // 如果正在进行Rehash，直接返回
    if (dictIsRehashing(d)) return DICT_OK;

    // 如果负载因子超过阈值，则触发扩展
    if (d->ht[0].used >= d->ht[0].size && (d->ht[0].used / d->ht[0].size > HASHTABLE_MAX_LOAD_FACTOR)) {
        return dictExpand(d, d->ht[0].size * 2);
    }
    return DICT_OK;
}

初始化Rehash

当决定进行Rehash时，会分配一个新的哈希表，并开始迁移：

int dictExpand(dict *d, unsigned long size) {
    // 分配新哈希表
    dictht n; 
    unsigned long realsize = _dictNextPower(size);
    n.size = realsize;
    n.sizemask = realsize - 1;
    n.table = zcalloc(realsize * sizeof(dictEntry*));
    n.used = 0;

    // 将新表赋值给ht[1]
    d->ht[1] = n;
    d->rehashidx = 0; // 开始进行Rehash
    return DICT_OK;
}

渐进式数据迁移

在每次对哈希表进行操作时，会顺便迁移一些数据：

void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d, 1); // 每次迁移一个bucket
}

int dictRehash(dict *d, int n) {
    if (!dictIsRehashing(d)) return 0;

    while (n--) {
        dictEntry *de, *nextde;

        // 如果所有数据都迁移完毕
        if (d->ht[0].used == 0) {
            zfree(d->ht[0].table);
            d->ht[0] = d->ht[1];
            _dictReset(&d->ht[1]);
            d->rehashidx = -1;
            return 0;
        }

        // 找到第一个非空bucket
        while (d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;

        de = d->ht[0].table[d->rehashidx];
        while(de) {
            nextde = de->next;

            // 重新计算键在新表中的位置，并插入
            unsigned int h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;

            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }
    return 1;
}

结束Rehash

当所有数据都迁移完毕后，需要将新哈希表替换旧哈希表，并重置相关状态：

if (d->ht[0].used == 0) {
    zfree(d->ht[0].table);
    d->ht[0] = d->ht[1];
    _dictReset(&d->ht[1]);
    d->rehashidx = -1;
}