Redis源码阅读_03_dict

结构定义

// hash表结构，每个字典采用两个哈希表，实现渐进式rehash
typedef struct dictht {
	// 哈希表数组，每个元素一条链表
    dictEntry **table;
    unsigned long size;
    unsigned long sizemask;
    unsigned long used;
} dictht;

typedef struct dictEntry {
    void *key;
    union {
        void *val;
        uint64_t u64;
        int64_t s64;
        double d;
    } v;
    struct dictEntry *next;
} dictEntry;

typedef struct dict {
    dictType *type;		// 类型特定函数，看做是一系列接口
    void *privdata;		// 私有数据，传递给上面函数的可选参数
    dictht ht[2];		// 两张hash表
    long rehashidx; 	/* rehashing not in progress if rehashidx == -1 */
    unsigned long iterators; /* number of iterators currently running */
} dict;

// 定义了函数接口
typedef struct dictType {
    uint64_t (*hashFunction)(const void *key);
    void *(*keyDup)(void *privdata, const void *key);
    void *(*valDup)(void *privdata, const void *obj);
    int (*keyCompare)(void *privdata, const void *key1, const void *key2);
    void (*keyDestructor)(void *privdata, void *key);
    void (*valDestructor)(void *privdata, void *obj);
} dictType;

/* 如果safe设置为1，则该迭代器是一个安全迭代器，即你可以在迭代过程中执行
 * dictAdd, dictFind等方法； 
 * 否则就不是一个安全迭代器，只可以调用dictNext()；
 * 安全迭代器在刚刚开始遍历时，会给字典打上一个标记，有了这个标记，rehash就不会执行，
 * 遍历元素时就不会出现重复；
 */
typedef struct dictIterator {
    dict *d;
    long index;
    int table, safe;
    dictEntry *entry, *nextEntry;
    /* unsafe iterator fingerprint for misuse detection. */
    long long fingerprint;
} dictIterator;

redis使用dictEnableResize() / dictDisableResize()来使能哈希表的resize，这是因为Redis使用copy-on-write，并不希望有太多的内存拷贝；
即便dict_can_resize（默认1）被设置为0，Redis仍在负载因子>dict_force_resize_ratio（默认5）时，允许哈希表增长；

接口定义

dict *dictCreate(dictType *type, void *privDataPtr);
int dictExpand(dict *d, unsigned long size);
int dictAdd(dict *d, void *key, void *val);
dictEntry *dictAddRaw(dict *d, void *key, dictEntry **existing);
dictEntry *dictAddOrFind(dict *d, void *key);
int dictReplace(dict *d, void *key, void *val);
int dictDelete(dict *d, const void *key);
dictEntry *dictUnlink(dict *ht, const void *key);
void dictFreeUnlinkedEntry(dict *d, dictEntry *he);
void dictRelease(dict *d);
dictEntry * dictFind(dict *d, const void *key);
void *dictFetchValue(dict *d, const void *key);
int dictResize(dict *d);
dictIterator *dictGetIterator(dict *d);
dictIterator *dictGetSafeIterator(dict *d);
dictEntry *dictNext(dictIterator *iter);
void dictReleaseIterator(dictIterator *iter);
dictEntry *dictGetRandomKey(dict *d);
dictEntry *dictGetFairRandomKey(dict *d);
unsigned int dictGetSomeKeys(dict *d, dictEntry **des, unsigned int count);
void dictGetStats(char *buf, size_t bufsize, dict *d);
uint64_t dictGenHashFunction(const void *key, int len);
uint64_t dictGenCaseHashFunction(const unsigned char *buf, int len);
void dictEmpty(dict *d, void(callback)(void*));
void dictEnableResize(void);
void dictDisableResize(void);
int dictRehash(dict *d, int n);
int dictRehashMilliseconds(dict *d, int ms);
void dictSetHashFunctionSeed(uint8_t *seed);
uint8_t *dictGetHashFunctionSeed(void);
unsigned long dictScan(dict *d, unsigned long v, dictScanFunction *fn, dictScanBucketFunction *bucketfn, void *privdata);
uint64_t dictGetHash(dict *d, const void *key);
dictEntry **dictFindEntryRefByPtrAndHash(dict *d, const void *oldptr, uint64_t hash);

/* Hash table types */
extern dictType dictTypeHeapStringCopyKey;
extern dictType dictTypeHeapStrings;
extern dictType dictTypeHeapStringCopyKeyValue;

哈希算法

1）使用字典设置的哈希函数，计算出key的哈希值

2）使用哈希表的sizemake属性和哈希值，计算出索引值

假设一个key的哈希值=8，resize属性=3，那么该key的索引值为8 & 3=0

Redis使用MurmurHash算法来计算键的hash值

该算法的优点在于即使输入的键是有规律的，算法仍然能给出一个良好的随机分布性，并且算法的计算速度也非常快

一般来说，对任意一类数据存在一个完美的哈希函数，这个完美的哈希函数的定义是没有发生任何碰撞，现实中这种函数很难找到，所以人们对完美哈希函数的要求放宽了：在一个特定的数据集上产生的碰撞最少的哈希函数

所以针对特定的业务，产生的特定的数据集，我们是有可能一个完美的哈希函数的！

Rehash

一个字典中存在两张哈希表的原因就是为rehash操作做准备的，另外一张哈希表，虽然存在，但是没有申请结点内存空间，只有表结构，所以不会占用很大的内存空间

ht[0]为字典正在使用的哈希表，h[1]为字典只有表结构的那个哈希表

rehash的步骤如下：

1）为字典的ht[1]分配空间

分配空间的大小：

如果执行的是扩展操作，ht[1]的大小为第一个大于等于 ht[0]的大小2（2的n次方）
如果执行的是收缩操作，ht[1]的大小为第一个大于等于 ht[0]的大小*（2的n次方）

2）将保存在ht[0]上的所有键值对rehash到ht[1]上面：rehash指的是重新计算key的哈希值和索引值，然后将键值对重新放到ht[1]的指定位置

3）当完成键值对的迁移之后，释放ht[0]，将ht[1]设置为ht[0],并在ht[1]重新设置一次空白的哈希表，为下一次rehash操作做准备。

rehash的时间

以下任意一个条件被满足时，程序自动开始执行rehash操作

1)服务器目前没有执行BGSAVE命令或BGREWIRITEAOF命令并且哈希表的负载因子大于等于1

2）服务器目前正在执行BGSAVE命令或BGREWRITEAOF命令并且哈希表的负载因子大于等于5

引申出的问题：

1）那为什么满足上面两个条件就会自动rehash呢？
2）两个条件的负载因子差距为什么这么大呢？

BGSAVE命令：数据库会开一个子进程将数据库的所有数据以RDB文件的方式保存到硬盘
BGREWRITEAOF命令：开一个子进程执行AOF文件的重写操作

我们知道无论是执行BGSAVE还是执行BGREWRITEAOF，都会开一个子进程，而大多数的操作系统都采用写时复制技术来优化子进程的使用效率，所以在子进程存在期间，服务器会提高执行扩展操作需要的负载因子，从而尽可能的避免在子进程存在期间进行哈希表的扩展操作，这样可以避免不必要的内存写入操作，最大限度节约内存

2.收缩操作：
哈希表的负载因子小于0.1时，自动执行哈希表扩展操作

那么现在有个问题，如果哈希表中存在很多很多的数据，rehash动作一次性完成的时间需要很久而且要占用大量的计算资源，如果rehash动作一次性完成的话，数据量大的情况下服务器可能会在一段时间内停止对外服务！这是不可以忍受的！

所以引出了渐进式hash：rehash动作不必一次性完成

渐进式rehash

在字典中维护一个索引计数器变量rehashidx，将其设置为0，表示rehash操作正式开始，在rehash期间，每次对字典执行添加，删除，查找或者更新操作时，程序除了完成指定操作外，还会顺带将ht[0]哈希表在rehashidx索引上的键值对rehash到ht[1]，完成顺带操作后，程序将rehashidx属性的值加一，随着程序的不断进行，最终在某个时间点上，rehahs操作会全部完成，这个时候将rehashidx设置为-1

渐进式rehash的优势在于它采用分治的方法，将rehash键值对所需的计算工作平均摊到对字典的每个添加，删除，修改，查找，更新的操作上，从而避免了集中式rehash带来的庞大计算量

核心：在rehash过程中，每个对字典的操作（增删改查）除了完成特定的任务，还需要顺带完成rehahs迭代一部分操作

渐进式rehash执行期间哈希表的操作：

因为在进行渐进式rehash的过程中，字典会同时使用两张哈希表，所以在渐进式rehash过程中，字典的删除，查找，修改，更新等操作会在两张表上进行（不是同时进行），比如查找的话是先去ht[0]查找，没有找到的话再去ht[1]查找，特别是插入操作，只会在ht[1]进行，这样保证了ht[1]上的元素一直在减少，不会增加，随着操作的不断进行，ht[0]将会变成一张空表

代码实现

初始化哈希表：重置、初始化、创建

/* Reset a hash table already initialized with ht_init().
 * NOTE: This function should only be called by ht_destroy(). */
static void _dictReset(dictht *ht)
{
    ht->table = NULL;
    ht->size = 0;
    ht->sizemask = 0;
    ht->used = 0;
}

/* Initialize the hash table */
int _dictInit(dict *d, dictType *type,
        void *privDataPtr)
{
    _dictReset(&d->ht[0]);
    _dictReset(&d->ht[1]);
    d->type = type;
    d->privdata = privDataPtr;
    d->rehashidx = -1;
    d->iterators = 0;
    return DICT_OK;
}

/* Create a new hash table */
dict *dictCreate(dictType *type,
        void *privDataPtr)
{
    dict *d = zmalloc(sizeof(*d));

    _dictInit(d,type,privDataPtr);
    return d;
}

计算rehash操作所需要的的ht[1]的大小

static unsigned long _dictNextPower(unsigned long size)
{
    unsigned long i = DICT_HT_INITIAL_SIZE;

    if (size >= LONG_MAX) return LONG_MAX + 1LU;
    while(1) {
        if (i >= size)
            return i;
        i *= 2;
    }
}

哈希表的扩展

/* 
 * 如果字典的0号哈希表为空，则将新哈希表设置为0号
 * 如果字典的0号哈希表非空，则将新哈希表设置为1号，并打开rehash标识，使得程序可以对字典进行rehash
 *
 * size 参数不够大，或者rehash已经开始， 返回 DICT_ERR
 *
 * T = O(N)	 
 */
int dictExpand(dict *d, unsigned long size)
{
    /* the size is invalid if it is smaller than the number of
     * elements already inside the hash table */
    if (dictIsRehashing(d) || d->ht[0].used > size)
        return DICT_ERR;

    dictht n; /* the new hash table */
    unsigned long realsize = _dictNextPower(size);

    /* Rehashing to the same table size is not useful. */
    if (realsize == d->ht[0].size) return DICT_ERR;

    /* Allocate the new hash table and initialize all pointers to NULL */
    n.size = realsize;
    n.sizemask = realsize-1;
    n.table = zcalloc(realsize*sizeof(dictEntry*));
    n.used = 0;

    /* Is this the first initialization? If so it's not really a rehashing
     * we just set the first hash table so that it can accept keys. */
    if (d->ht[0].table == NULL) {
        d->ht[0] = n;
        return DICT_OK;
    }

    /* Prepare a second hash table for incremental rehashing */
    d->ht[1] = n;
    d->rehashidx = 0;
    return DICT_OK;
}

哈希表的缩小

/*
 * 缩小给定字典
 * 让它的已用节点数和字典大小之间的比率接近 1:1
 *
 * 返回 DICT_ERR 表示字典已经在 rehash ，或者 dict_can_resize 为假。
 *
 * 成功创建体积更小的 ht[1] ，可以开始 resize 时，返回 DICT_OK。
 *
 * T = O(N)
 */
int dictResize(dict *d) {
    
    //新表所需结点的最小数量
    int minimal;

    // 不能在关闭 rehash 或者正在 rehash 的时候调用
    if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;

    // 计算让比率接近 1：1 所需要的最少节点数量
    minimal = d->ht[0].used;
    if (minimal < DICT_HT_INITIAL_SIZE)
        minimal = DICT_HT_INITIAL_SIZE;

    // 调整字典的大小
    // T = O(N)
    return dictExpand(d, minimal);
}

rehash

/*
 * 执行 N 步渐进式 rehash 。
 *
 * 返回 1 表示仍有键需要从 0 号哈希表移动到 1 号哈希表，
 * 返回 0 则表示所有键都已经迁移完毕。
 *
 * 注意，每步 rehash 都是以一个哈希表索引（桶）作为单位的，
 * 一个桶里可能会有多个节点，
 * 被 rehash 的桶里的所有节点都会被移动到新哈希表。
 *
 * T = O(N)
 */
int dictRehash(dict *d, int n) {
    int empty_visits = n*10; /* 最大可访问空桶 */
    // 如果不是rehash状态，退出
    if (!dictIsRehashing(d)) return 0;

	// 进行N步迁移
    while(n-- && d->ht[0].used != 0) {
        dictEntry *de, *nextde;

        /* Note that rehashidx can't overflow as we are sure there are more
         * elements because ht[0].used != 0 */
        assert(d->ht[0].size > (unsigned long)d->rehashidx);
        // 找到第一个非空桶的下标
        while(d->ht[0].table[d->rehashidx] == NULL) {
            d->rehashidx++;
            if (--empty_visits == 0) return 1;
        }
        de = d->ht[0].table[d->rehashidx];
        /* Move all the keys in this bucket from the old to the new hash HT */
        while(de) {
            uint64_t h;

            nextde = de->next;
            /* Get the index in the new hash table */
            h = dictHashKey(d, de->key) & d->ht[1].sizemask;
            de->next = d->ht[1].table[h];
            d->ht[1].table[h] = de;
            d->ht[0].used--;
            d->ht[1].used++;
            de = nextde;
        }
        d->ht[0].table[d->rehashidx] = NULL;
        d->rehashidx++;
    }

    /* Check if we already rehashed the whole table... */
    if (d->ht[0].used == 0) {
        zfree(d->ht[0].table);
        d->ht[0] = d->ht[1];
        _dictReset(&d->ht[1]);
        d->rehashidx = -1;
        return 0;
    }

    /* More to rehash... */
    return 1;
}

给定时间进行rehash

 * 在给定毫秒数内，以 100 步为单位, 对字典进行 rehash.也就是说每次对100个dictEntry进行hash.
 *
 * T = O(N)
 */
int dictRehashMilliseconds(dict *d, int ms)
{
     // 开始的时间
    long long start = timeInMilliseconds();

    //这一次迁移完成的dictntry个数
    int rehashes = 0;

    while (dictRehash(d, 100))
    {
        rehashes += 100;

        // 如果时间已过，跳出
        if (timeInMilliseconds() - start > ms) break;
    }

    //返回本次已经迁移完成的dictEntry个数
    return rehashes;
}

posted @ 2022-03-13 15:04 Coputing 阅读(55) 评论(0) 收藏举报

刷新页面返回顶部

Coputing