字典又称散列表，Redis这种非关系型数据库，整个数据库的数据结构可以说是一个超级大的字典。

字典常见命令

// 添加 一个字典类型，key：city ，value ：{hangzhou:"xihu"}
127.0.0.1:6379> hset city hangzhou "xihu"
(integer) 1
127.0.0.1:6379> hset city chongqing "huoguo"
(integer) 1
127.0.0.1:6379> hget city hangzhou
"xihu"
127.0.0.1:6379> hget city chongqing
"huoguo"
// 获取key的所有字典
127.0.0.1:6379> hgetall city
1) "hangzhou"
2) "xihu"
3) "chongqing"
4) "huoguo"
//  更新操作，返回0
127.0.0.1:6379> hset city hangzhou "dongporou"
(integer) 0
127.0.0.1:6379> hgetall city
1) "hangzhou"
2) "dongporou"
3) "chongqing"
4) "huoguo"
//  批量添加
127.0.0.1:6379> hmset city chengdu "chuanchuan" shanxi "roujiamo"
OK
// 同样字典也支持自增
127.0.0.1:6379> hset age peter 25
(integer) 1
127.0.0.1:6379> hincrby age peter 1
(integer) 26

数据结构

Redis字典数据结构和Java的HashMap数据结构还是有很大相似之处的。（这里不讨论底层用压缩链表情况）

typedef struct dictht {
    // 指针数组，即上图ditEntry的数组
    dictEntry **table;
    // table数组大小
    unsigned long size;
    // 掩码 ，size-1
    unsigned long sizemask;
    // 已经存在节点数量
    unsigned long used;
} dictht;

上面结构体中的used属性，是已经存在的节点即（数组+链表）。另外used可能会大于size。而sizemask属性是size-1是为了通过位运算高效地获取索引值，（索引值=Hash值&掩码值）。

typedef struct dictEntry {

    // 键
    void *key;
    // 值 是个共用体
    union {
	// 指针指向具体value地址
        void *val;
	// hash值
        uint64_t u64;
	// 过期是时间
        int64_t s64;
        double d;
    } v;
    // 指针指向 链表的下一个元素
    struct dictEntry *next;
} dictEntry;

但是Redis对字典做了一层封装。

typedef struct dict {
    dictType *type;
    // 字典的私有数据
    void *privdata;
    dictht ht[2];
    // rehash时表示的状态，-1表示完成，0表示开始，每个元素rehash时完成+1
    long rehashidx;
    // 迭代器
    unsigned long iterators;
} dict;

typedef struct dictType {
    // 该字典对应的hash函数
    uint64_t (*hashFunction)(const void *key);
    // 键对应的赋值函数
    void *(*keyDup)(void *privdata, const void *key);
    // 值对应的赋值函数
    void *(*valDup)(void *privdata, const void *obj);
    // 键的对比函数
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    // 键的销毁函数
    void (*keyDestructor)(void *privdata, void *key);
    // 值的销毁函数
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

扩容

申请一块新内存，如果是初次申请默认容量是4，之后都是当前容量的一倍
新生申请的内存地址会赋值给ht[1]
把rehashidx的值由-1改为0。表示要开始进行rehash操作了。

static int dictExpand(dict *ht, unsigned long size) {
    // 定义新的字典
    dict n; 
    // 重新计算扩容后的容量
    unsigned long realsize = _dictNextPower(size), i;
    // 如果当前存在元素还是大于扩容后的容量，返回错误状态
    if (ht->used > size)
        return DICT_ERR;

    _dictInit(&n, ht->type, ht->privdata);
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = calloc(realsize,sizeof(dictEntry*));

    n.used = ht->used;
    for (i = 0; i < ht->size && ht->used > 0; i++) {
        dictEntry *he, *nextHe;

        if (ht->table[i] == NULL) continue;

        /* For each hash entry on this slot... */
        he = ht->table[i];
        while(he) {
            unsigned int h;
            nextHe = he->next;
           // 重新计算元素索引值
            h = dictHashKey(ht, he->key) & n.sizemask;
            // 使用头插法将元素放到新字典中
            he->next = n.table[h];
            n.table[h] = he;
            ht->used--;
            he = nextHe;
        }
    }
    assert(ht->used == 0);
    free(ht->table);
    *ht = n;
    return DICT_OK;
}

缩容

当使用量不到总空间10%时，则进行缩容

void tryResizeHashTables(int dbid) {
    // 字典内存大小
    if (htNeedsResize(server.db[dbid].dict))
        dictResize(server.db[dbid].dict);
    // key的过期时间
    if (htNeedsResize(server.db[dbid].expires))
        dictResize(server.db[dbid].expires);
}

int dictResize(dict *d)
{
    int minimal;

    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
    minimal = d->ht[0].used;
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;
    // 最后调用的还是扩容方法，但是 实际上是缩容
    return dictExpand(d, minimal);
}

渐进式rehash

如果一个很大字典，里面有上百万个key需要扩容，那么一次性把所有元素移到新的字典中。那redis肯定伤不起。

如果服务正在操作时候，只对当前这个key做个rehash操作，将这个key迁移到新的字典中。

static void _dictRehashStep(dict *d) {
    if (d->iterators == 0) dictRehash(d,1);
}

int dictRehash(dict *d, int n) {
    int empty_visits = n*10; 
    if (!dictIsRehashing(d)) return 0;

    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        while(de) {
            uint64_t h;

            nextde = de->next
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        // 每迁移一元素到字典中，rehashidx会自增一
        d->rehashidx++;
    }
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        // 如果元素全部迁移完成。rehashidx重新赋值为-1
        d->rehashidx = -1;
        return 0;
    }
    return 1;
}

如果服务处于空闲的话，会批量进行rehash操作，每次100个地迁移。

int dictRehashMilliseconds(dict *d, int ms) {
    long long start = timeInMilliseconds();
    int rehashes = 0;

    while(dictRehash(d,100)) {
        rehashes += 100;
        if (timeInMilliseconds()-start > ms) break;
    }
    return rehashes;
}

引用下redis设计与实现中的渐进式rehash过程

............直到全部完成。

普通迭代器

typedef struct dictIterator {
    // 迭代的字典
    dict *d;
    // 当前迭代到hash表那个索引值
    long index;
    // 表示当前正在迭代的那个hash表，是ht[0]还是ht[1]
    // safe 表示当前创建的是否是安全迭代器
    int table, safe;
    // 当前节点 ，下个节点
    dictEntry *entry, *nextEntry;
    // 字典唯一标识，如果字典发生了改变，这个值也会改变
    long long fingerprint;
} dictIterator;