【redis源码】(七)Dict.c

无疑,作为key-value的nosql存储工具,redis中最核心的数据结构便是dict本身了。 哈希表作为查找效率 O(1)的数据结构,本身也存在着一些局限性,如hash算法的选择,怎样做到元素在桶内的均匀分布,及当哈希表内元素数量增多时,如果处理随着增加的碰撞,碰撞如果较深,会严重影响哈希表的效率

 

redis中的dict便是hash实现的一个很好的范例,dict的实现中最巧妙地细节便是采用了类似双buffer的hash扩容方式,及缓慢的哈希表转移算法。

1. 哈希表扩容方式【双buffer的hash表结构】

1 typedef struct dict {
2     dictType *type;
3     void *privdata;
4     dictht ht[2];
5     int rehashidx; /* rehashing not in progress if rehashidx == -1 */
6     int iterators; /* number of iterators currently running */
7 } dict;

如代码所示,在哈希表resizing的过程中,ht[0]和ht[1]两个哈希表同时工作,直到ht[0]中的元素完全转移到ht[1]中来

2. 哈希表转移过程是平滑缓慢的

哈希表的转移并不是一步到位的,这里作者应该是考虑到,在哈希表很大的情况下,如果一次性的对哈希表进行转移操作,会引起性能抖动,所以以两种转移触发条件来对哈希表进行转移

a. 在每次哈希表进行查询或者更新操作时,转移一个元素

1 static void _dictRehashStep(dict *d) {
2     if (d->iterators == 0) dictRehash(d,1);
3 }

b. 会有定时操作,每次执行指定长度时间的转移操作,粒度是每次100个元素【具体由谁来触发,还需要进一步看代码】

 1 int dictRehashMilliseconds(dict *d, int ms) {
 2     long long start = timeInMilliseconds();
 3     int rehashes = 0;
 4 
 5     while(dictRehash(d,100)) {
 6         rehashes += 100;
 7         if (timeInMilliseconds()-start > ms) break;
 8     }
 9     return rehashes;
10 }

 

 

 

好了,开始贴代码

dict.h

  1 #ifndef __DICT_H
  2 #define __DICT_H
  3 
  4 #define DICT_OK 0
  5 #define DICT_ERR 1
  6 
  7 /* Unused arguments generate annoying warnings... */
  8 #define DICT_NOTUSED(V) ((void) V)
  9 
 10 typedef struct dictEntry {
 11     void *key;
 12     void *val;
 13     struct dictEntry *next;
 14 } dictEntry;
 15 
 16 typedef struct dictType {
 17     unsigned int (*hashFunction)(const void *key);
 18     void *(*keyDup)(void *privdata, const void *key);
 19     void *(*valDup)(void *privdata, const void *obj);
 20     int (*keyCompare)(void *privdata, const void *key1, const void *key2);
 21     void (*keyDestructor)(void *privdata, void *key);
 22     void (*valDestructor)(void *privdata, void *obj);
 23 } dictType;
 24 
 25 /* This is our hash table structure. Every dictionary has two of this as we
 26  * implement incremental rehashing, for the old to the new table. */
 27 typedef struct dictht {
 28     dictEntry **table;
 29     unsigned long size;
 30     unsigned long sizemask;
 31     unsigned long used;
 32 } dictht;
 33 
 34 typedef struct dict {
 35     dictType *type;
 36     void *privdata;
 37     dictht ht[2];
 38     int rehashidx; /* rehashing not in progress if rehashidx == -1 */
 39     int iterators; /* number of iterators currently running */
 40 } dict;
 41 
 42 /* If safe is set to 1 this is a safe iteartor, that means, you can call
 43  * dictAdd, dictFind, and other functions against the dictionary even while
 44  * iterating. Otherwise it is a non safe iterator, and only dictNext()
 45  * should be called while iterating. */
 46 typedef struct dictIterator {
 47     dict *d;
 48     int table, index, safe;
 49     dictEntry *entry, *nextEntry;
 50 } dictIterator;
 51 
 52 /* This is the initial size of every hash table */
 53 #define DICT_HT_INITIAL_SIZE     4
 54 
 55 /* ------------------------------- Macros ------------------------------------*/
 56 #define dictFreeEntryVal(d, entry) \
 57     if ((d)->type->valDestructor) \
 58         (d)->type->valDestructor((d)->privdata, (entry)->val)
 59 
 60 #define dictSetHashVal(d, entry, _val_) do { \
 61     if ((d)->type->valDup) \
 62         entry->val = (d)->type->valDup((d)->privdata, _val_); \
 63     else \
 64         entry->val = (_val_); \
 65 } while(0)
 66 
 67 #define dictFreeEntryKey(d, entry) \
 68     if ((d)->type->keyDestructor) \
 69         (d)->type->keyDestructor((d)->privdata, (entry)->key)
 70 
 71 #define dictSetHashKey(d, entry, _key_) do { \
 72     if ((d)->type->keyDup) \
 73         entry->key = (d)->type->keyDup((d)->privdata, _key_); \
 74     else \
 75         entry->key = (_key_); \
 76 } while(0)
 77 
 78 #define dictCompareHashKeys(d, key1, key2) \
 79     (((d)->type->keyCompare) ? \
 80         (d)->type->keyCompare((d)->privdata, key1, key2) : \
 81         (key1) == (key2))
 82 
 83 #define dictHashKey(d, key) (d)->type->hashFunction(key)
 84 
 85 #define dictGetEntryKey(he) ((he)->key)
 86 #define dictGetEntryVal(he) ((he)->val)
 87 #define dictSlots(d) ((d)->ht[0].size+(d)->ht[1].size)
 88 #define dictSize(d) ((d)->ht[0].used+(d)->ht[1].used)
 89 #define dictIsRehashing(ht) ((ht)->rehashidx != -1)
 90 
 91 /* API */
 92 dict *dictCreate(dictType *type, void *privDataPtr);
 93 int dictExpand(dict *d, unsigned long size);
 94 int dictAdd(dict *d, void *key, void *val);
 95 int dictReplace(dict *d, void *key, void *val);
 96 int dictDelete(dict *d, const void *key);
 97 int dictDeleteNoFree(dict *d, const void *key);
 98 void dictRelease(dict *d);
 99 dictEntry * dictFind(dict *d, const void *key);
100 void *dictFetchValue(dict *d, const void *key);
101 int dictResize(dict *d);
102 dictIterator *dictGetIterator(dict *d);
103 dictIterator *dictGetSafeIterator(dict *d);
104 dictEntry *dictNext(dictIterator *iter);
105 void dictReleaseIterator(dictIterator *iter);
106 dictEntry *dictGetRandomKey(dict *d);
107 void dictPrintStats(dict *d);
108 unsigned int dictGenHashFunction(const unsigned char *buf, int len);
109 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len);
110 void dictEmpty(dict *d);
111 void dictEnableResize(void);
112 void dictDisableResize(void);
113 int dictRehash(dict *d, int n);
114 int dictRehashMilliseconds(dict *d, int ms);
115 
116 /* Hash table types */
117 extern dictType dictTypeHeapStringCopyKey;
118 extern dictType dictTypeHeapStrings;
119 extern dictType dictTypeHeapStringCopyKeyValue;
120 
121 #endif /* __DICT_H */

dict.c

  1 #include "fmacros.h"
  2 
  3 #include <stdio.h>
  4 #include <stdlib.h>
  5 #include <string.h>
  6 #include <stdarg.h>
  7 #include <assert.h>
  8 #include <limits.h>
  9 #include <sys/time.h>
 10 #include <ctype.h>
 11 
 12 #include "dict.h"
 13 #include "zmalloc.h"
 14 
 15 /* Using dictEnableResize() / dictDisableResize() we make possible to
 16  * enable/disable resizing of the hash table as needed. This is very important
 17  * for Redis, as we use copy-on-write and don't want to move too much memory
 18  * around when there is a child performing saving operations.
 19  *
 20  * Note that even when dict_can_resize is set to 0, not all resizes are
 21  * prevented: an hash table is still allowed to grow if the ratio between
 22  * the number of elements and the buckets > dict_force_resize_ratio. */
 23 static int dict_can_resize = 1;
 24 static unsigned int dict_force_resize_ratio = 5;
 25 
 26 /* -------------------------- private prototypes ---------------------------- */
 27 
 28 //扩展dict中桶的数量
 29 static int _dictExpandIfNeeded(dict *ht);
 30 //得到扩展后的dict应有的桶的数量,这个数量是2的幂次
 31 static unsigned long _dictNextPower(unsigned long size);
 32 //如果插入key,返回其在哈希表ht中应存方的hashentry的index,如果
 33 //ht正在resizing,则返回在ht[1]中的index
 34 static int _dictKeyIndex(dict *ht, const void *key);
 35 //初始化dict,初始化一个哈希表
 36 static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
 37 
 38 /* -------------------------- hash functions -------------------------------- */
 39 //一系列哈希函数
 40 /* Thomas Wang's 32 bit Mix Function */
 41 unsigned int dictIntHashFunction(unsigned int key)
 42 {
 43     key += ~(key << 15);
 44     key ^=  (key >> 10);
 45     key +=  (key << 3);
 46     key ^=  (key >> 6);
 47     key += ~(key << 11);
 48     key ^=  (key >> 16);
 49     return key;
 50 }
 51 
 52 /* Identity hash function for integer keys */
 53 unsigned int dictIdentityHashFunction(unsigned int key)
 54 {
 55     return key;
 56 }
 57 
 58 /* Generic hash function (a popular one from Bernstein).
 59  * I tested a few and this was the best. */
 60 unsigned int dictGenHashFunction(const unsigned char *buf, int len) {
 61     unsigned int hash = 5381;
 62 
 63     while (len--)
 64         hash = ((hash << 5) + hash) + (*buf++); /* hash * 33 + c */
 65     return hash;
 66 }
 67 
 68 /* And a case insensitive version */
 69 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
 70     unsigned int hash = 5381;
 71 
 72     while (len--)
 73         hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
 74     return hash;
 75 }
 76 
 77 /* ----------------------------- API implementation ------------------------- */
 78 //重置一个dictht结构
 79 /* Reset an hashtable already initialized with ht_init().
 80  * NOTE: This function should only called by ht_destroy(). */
 81 static void _dictReset(dictht *ht)
 82 {
 83     ht->table = NULL;
 84     ht->size = 0;
 85     ht->sizemask = 0;
 86     ht->used = 0;
 87 }
 88 //初始化一个新的哈希表结构,并且调用_dictInit对其进行初始化
 89 /* Create a new hash table */
 90 dict *dictCreate(dictType *type,
 91         void *privDataPtr)
 92 {
 93     dict *d = zmalloc(sizeof(*d));
 94 
 95     _dictInit(d,type,privDataPtr);
 96     return d;
 97 }
 98 
 99 //初始化哈希表
100 /* Initialize the hash table */
101 int _dictInit(dict *d, dictType *type,
102         void *privDataPtr)
103 {
104     _dictReset(&d->ht[0]);
105     _dictReset(&d->ht[1]);
106     d->type = type;
107     d->privdata = privDataPtr;
108     d->rehashidx = -1;
109     d->iterators = 0;
110     return DICT_OK;
111 }
112 
113 //resize哈希表d,如果entry数量小于默认初始值,将其置为初始值
114 //否则将其置为与保存的元素数量相同
115 /* Resize the table to the minimal size that contains all the elements,
116  * but with the invariant of a USER/BUCKETS ratio near to <= 1 */
117 int dictResize(dict *d)
118 {
119     int minimal;
120 
121     if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
122     minimal = d->ht[0].used;
123     if (minimal < DICT_HT_INITIAL_SIZE)
124         minimal = DICT_HT_INITIAL_SIZE;
125     return dictExpand(d, minimal);
126 }
127 
128 
129 //根据size,得到下一个hash的size,应该是2的幂次
130 //如果size的大小小于目前元素的数量,或者dict正在resize,则终止expanding
131 //如果确定可以resize,申请一个newsize大小的dicthashtable,并为其初始化
132 /* Expand or create the hashtable */
133 int dictExpand(dict *d, unsigned long size)
134 {
135     dictht n; /* the new hashtable */
136     unsigned long realsize = _dictNextPower(size);
137 
138     /* the size is invalid if it is smaller than the number of
139      * elements already inside the hashtable */
140     if (dictIsRehashing(d) || d->ht[0].used > size)
141         return DICT_ERR;
142 
143     /* Allocate the new hashtable and initialize all pointers to NULL */
144     n.size = realsize;
145     n.sizemask = realsize-1;
146     n.table = zcalloc(realsize*sizeof(dictEntry*));
147     n.used = 0;
148 
149     /* Is this the first initialization? If so it's not really a rehashing
150      * we just set the first hash table so that it can accept keys. */
151     if (d->ht[0].table == NULL) {
152         d->ht[0] = n;
153         return DICT_OK;
154     }
155 
156     /* Prepare a second hash table for incremental rehashing */
157     d->ht[1] = n;
158     d->rehashidx = 0;
159     return DICT_OK;
160 }
161 
162 
163 
164 //rehashing 操作需要n步来执行,一次rehash一个元素,这样一点点的rehash
165 //可以避免性能波动
166 /* Performs N steps of incremental rehashing. Returns 1 if there are still
167  * keys to move from the old to the new hash table, otherwise 0 is returned.
168  * Note that a rehashing step consists in moving a bucket (that may have more
169  * thank one key as we use chaining) from the old to the new hash table. */
170 int dictRehash(dict *d, int n) {
171     if (!dictIsRehashing(d)) return 0;
172 
173     while(n--) {
174         dictEntry *de, *nextde;
175 
176         /* Check if we already rehashed the whole table... */
177         if (d->ht[0].used == 0) {
178             zfree(d->ht[0].table);
179             d->ht[0] = d->ht[1];
180             _dictReset(&d->ht[1]);
181             d->rehashidx = -1;
182             return 0;
183         }
184 
185         /* Note that rehashidx can't overflow as we are sure there are more
186          * elements because ht[0].used != 0 */
187         while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
188         de = d->ht[0].table[d->rehashidx];
189         /* Move all the keys in this bucket from the old to the new hash HT */
190         while(de) {
191             unsigned int h;
192 
193             nextde = de->next;
194             /* Get the index in the new hash table */
195             h = dictHashKey(d, de->key) & d->ht[1].sizemask;
196             de->next = d->ht[1].table[h];
197             d->ht[1].table[h] = de;
198             d->ht[0].used--;
199             d->ht[1].used++;
200             de = nextde;
201         }
202         d->ht[0].table[d->rehashidx] = NULL;
203         d->rehashidx++;
204     }
205     return 1;
206 }
207 
208 //得到以毫秒为单位的当前时间
209 long long timeInMilliseconds(void) {
210     struct timeval tv;
211 
212     gettimeofday(&tv,NULL);
213     return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
214 }
215 //每次执行一定时间的rehashing操作,这次rehasing的时间不超过ms毫秒
216 /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
217 int dictRehashMilliseconds(dict *d, int ms) {
218     long long start = timeInMilliseconds();
219     int rehashes = 0;
220 
221     while(dictRehash(d,100)) {
222         rehashes += 100;
223         if (timeInMilliseconds()-start > ms) break;
224     }
225     return rehashes;
226 }
227 
228 
229 //这个函数执行一次rehashing,即移动一个元素。
230 //这个函数在任何一次查询或者更新操作时会被调用
231 //将rehashing的性能消耗分布在每一步
232 /* This function performs just a step of rehashing, and only if there are
233  * no safe iterators bound to our hash table. When we have iterators in the
234  * middle of a rehashing we can't mess with the two hash tables otherwise
235  * some element can be missed or duplicated.
236  *
237  * This function is called by common lookup or update operations in the
238  * dictionary so that the hash table automatically migrates from H1 to H2
239  * while it is actively used. */
240 static void _dictRehashStep(dict *d) {
241     if (d->iterators == 0) dictRehash(d,1);
242 }
243 
244 //在d中增加一个键值对
245 /* Add an element to the target hash table */
246 int dictAdd(dict *d, void *key, void *val)
247 {
248     int index;
249     dictEntry *entry;
250     dictht *ht;
251 
252     if (dictIsRehashing(d)) _dictRehashStep(d);
253 
254     /* Get the index of the new element, or -1 if
255      * the element already exists. */
256     if ((index = _dictKeyIndex(d, key)) == -1)
257         return DICT_ERR;
258 
259     /* Allocates the memory and stores key */
260     ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
261     entry = zmalloc(sizeof(*entry));
262     entry->next = ht->table[index];
263     ht->table[index] = entry;
264     ht->used++;
265 
266     /* Set the hash entry fields. */
267     dictSetHashKey(d, entry, key);
268     dictSetHashVal(d, entry, val);
269     return DICT_OK;
270 }
271 
272 //增加一个元素,如果存在,替换
273 /* Add an element, discarding the old if the key already exists.
274  * Return 1 if the key was added from scratch, 0 if there was already an
275  * element with such key and dictReplace() just performed a value update
276  * operation. */
277 int dictReplace(dict *d, void *key, void *val)
278 {
279     dictEntry *entry, auxentry;
280 
281     /* Try to add the element. If the key
282      * does not exists dictAdd will suceed. */
283     if (dictAdd(d, key, val) == DICT_OK)
284         return 1;
285     /* It already exists, get the entry */
286     entry = dictFind(d, key);
287     /* Free the old value and set the new one */
288     /* Set the new value and free the old one. Note that it is important
289      * to do that in this order, as the value may just be exactly the same
290      * as the previous one. In this context, think to reference counting,
291      * you want to increment (set), and then decrement (free), and not the
292      * reverse. */
293     auxentry = *entry;
294     dictSetHashVal(d, entry, val);
295     dictFreeEntryVal(d, &auxentry);
296     return 0;
297 }
298 
299 //删除一个元素
300 /* Search and remove an element */
301 static int dictGenericDelete(dict *d, const void *key, int nofree)
302 {
303     unsigned int h, idx;
304     dictEntry *he, *prevHe;
305     int table;
306 
307     if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
308     if (dictIsRehashing(d)) _dictRehashStep(d);
309     h = dictHashKey(d, key);
310 
311     for (table = 0; table <= 1; table++) {
312         idx = h & d->ht[table].sizemask;
313         he = d->ht[table].table[idx];
314         prevHe = NULL;
315         while(he) {
316             if (dictCompareHashKeys(d, key, he->key)) {
317                 /* Unlink the element from the list */
318                 if (prevHe)
319                     prevHe->next = he->next;
320                 else
321                     d->ht[table].table[idx] = he->next;
322                 if (!nofree) {
323                     dictFreeEntryKey(d, he);
324                     dictFreeEntryVal(d, he);
325                 }
326                 zfree(he);
327                 d->ht[table].used--;
328                 return DICT_OK;
329             }
330             prevHe = he;
331             he = he->next;
332         }
333         if (!dictIsRehashing(d)) break;
334     }
335     return DICT_ERR; /* not found */
336 }
337 
338 //删除ht中的一个元素
339 int dictDelete(dict *ht, const void *key) {
340     return dictGenericDelete(ht,key,0);
341 }
342 
343 //删除一个袁术,不释放old键值对的空间
344 int dictDeleteNoFree(dict *ht, const void *key) {
345     return dictGenericDelete(ht,key,1);
346 }
347 
348 //释放d中的dictht ht及其中所有的keyvalue对
349 /* Destroy an entire dictionary */
350 int _dictClear(dict *d, dictht *ht)
351 {
352     unsigned long i;
353 
354     /* Free all the elements */
355     for (i = 0; i < ht->size && ht->used > 0; i++) {
356         dictEntry *he, *nextHe;
357 
358         if ((he = ht->table[i]) == NULL) continue;
359         while(he) {
360             nextHe = he->next;
361             dictFreeEntryKey(d, he);
362             dictFreeEntryVal(d, he);
363             zfree(he);
364             ht->used--;
365             he = nextHe;
366         }
367     }
368     /* Free the table and the allocated cache structure */
369     zfree(ht->table);
370     /* Re-initialize the table */
371     _dictReset(ht);
372     return DICT_OK; /* never fails */
373 }
374 
375 //释放整个哈希表
376 /* Clear & Release the hash table */
377 void dictRelease(dict *d)
378 {
379     _dictClear(d,&d->ht[0]);
380     _dictClear(d,&d->ht[1]);
381     zfree(d);
382 }
383 
384 //找到key所在的entry
385 dictEntry *dictFind(dict *d, const void *key)
386 {
387     dictEntry *he;
388     unsigned int h, idx, table;
389 
390     if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
391     if (dictIsRehashing(d)) _dictRehashStep(d);
392     h = dictHashKey(d, key);
393     for (table = 0; table <= 1; table++) {
394         idx = h & d->ht[table].sizemask;
395         he = d->ht[table].table[idx];
396         while(he) {
397             if (dictCompareHashKeys(d, key, he->key))
398                 return he;
399             he = he->next;
400         }
401         if (!dictIsRehashing(d)) return NULL;
402     }
403     return NULL;
404 }
405 
406 //拿到key的value,如果不存在,返回NULL
407 void *dictFetchValue(dict *d, const void *key) {
408     dictEntry *he;
409 
410     he = dictFind(d,key);
411     return he ? dictGetEntryVal(he) : NULL;
412 }
413 
414 //拿到dict的iterator
415 dictIterator *dictGetIterator(dict *d)
416 {
417     dictIterator *iter = zmalloc(sizeof(*iter));
418 
419     iter->d = d;
420     iter->table = 0;
421     iter->index = -1;
422     iter->safe = 0;
423     iter->entry = NULL;
424     iter->nextEntry = NULL;
425     return iter;
426 }
427 
428 //得到safe的iterator
429 //如果iterator是safe的,则可以进行修改操作,否则,只能执行dictNext
430 dictIterator *dictGetSafeIterator(dict *d) {
431     dictIterator *i = dictGetIterator(d);
432 
433     i->safe = 1;
434     return i;
435 }
436 
437 //得到iter的下一个元素
438 dictEntry *dictNext(dictIterator *iter)
439 {
440     while (1) {
441         if (iter->entry == NULL) {
442             dictht *ht = &iter->d->ht[iter->table];
443             if (iter->safe && iter->index == -1 && iter->table == 0)
444                 iter->d->iterators++;
445             iter->index++;
446             if (iter->index >= (signed) ht->size) {
447                 if (dictIsRehashing(iter->d) && iter->table == 0) {
448                     iter->table++;
449                     iter->index = 0;
450                     ht = &iter->d->ht[1];
451                 } else {
452                     break;
453                 }
454             }
455             iter->entry = ht->table[iter->index];
456         } else {
457             iter->entry = iter->nextEntry;
458         }
459         if (iter->entry) {
460             /* We need to save the 'next' here, the iterator user
461              * may delete the entry we are returning. */
462             iter->nextEntry = iter->entry->next;
463             return iter->entry;
464         }
465     }
466     return NULL;
467 }
468 
469 //释放哈希表的iterator
470 void dictReleaseIterator(dictIterator *iter)
471 {
472     if (iter->safe && !(iter->index == -1 && iter->table == 0))
473         iter->d->iterators--;
474     zfree(iter);
475 }
476 
477 /* Return a random entry from the hash table. Useful to
478  * implement randomized algorithms */
479  //得到一个随机key
480 dictEntry *dictGetRandomKey(dict *d)
481 {
482     dictEntry *he, *orighe;
483     unsigned int h;
484     int listlen, listele;
485 
486     if (dictSize(d) == 0) return NULL;
487     if (dictIsRehashing(d)) _dictRehashStep(d);
488     if (dictIsRehashing(d)) {
489         do {
490             h = random() % (d->ht[0].size+d->ht[1].size);
491             he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
492                                       d->ht[0].table[h];
493         } while(he == NULL);
494     } else {
495         do {
496             h = random() & d->ht[0].sizemask;
497             he = d->ht[0].table[h];
498         } while(he == NULL);
499     }
500 
501     /* Now we found a non empty bucket, but it is a linked
502      * list and we need to get a random element from the list.
503      * The only sane way to do so is counting the elements and
504      * select a random index. */
505     listlen = 0;
506     orighe = he;
507     while(he) {
508         he = he->next;
509         listlen++;
510     }
511     listele = random() % listlen;
512     he = orighe;
513     while(listele--) he = he->next;
514     return he;
515 }
516 
517 /* ------------------------- private functions ------------------------------ */
518 
519 /* Expand the hash table if needed */
520 //如果哈希表需要resize,则执行dictexpand
521 static int _dictExpandIfNeeded(dict *d)
522 {
523     /* Incremental rehashing already in progress. Return. */
524     if (dictIsRehashing(d)) return DICT_OK;
525 
526     /* If the hash table is empty expand it to the intial size. */
527     if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
528 
529     /* If we reached the 1:1 ratio, and we are allowed to resize the hash
530      * table (global setting) or we should avoid it but the ratio between
531      * elements/buckets is over the "safe" threshold, we resize doubling
532      * the number of buckets. */
533     if (d->ht[0].used >= d->ht[0].size &&
534         (dict_can_resize ||
535          d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
536     {
537         return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
538                                     d->ht[0].size : d->ht[0].used)*2);
539     }
540     return DICT_OK;
541 }
542 
543 //根据size,得到比size大的最小的一个2的幂次数作为新哈希表的size值
544 /* Our hash table capability is a power of two */
545 static unsigned long _dictNextPower(unsigned long size)
546 {
547     unsigned long i = DICT_HT_INITIAL_SIZE;
548 
549     if (size >= LONG_MAX) return LONG_MAX;
550     while(1) {
551         if (i >= size)
552             return i;
553         i *= 2;
554     }
555 }
556 
557 
558 //返回key在d中所在的index值,如果已经存在,则返回-1,否则返回所在entry的index值
559 /* Returns the index of a free slot that can be populated with
560  * an hash entry for the given 'key'.
561  * If the key already exists, -1 is returned.
562  *
563  * Note that if we are in the process of rehashing the hash table, the
564  * index is always returned in the context of the second (new) hash table. */
565 static int _dictKeyIndex(dict *d, const void *key)
566 {
567     unsigned int h, idx, table;
568     dictEntry *he;
569 
570     /* Expand the hashtable if needed */
571     if (_dictExpandIfNeeded(d) == DICT_ERR)
572         return -1;
573     /* Compute the key hash value */
574     h = dictHashKey(d, key);
575     for (table = 0; table <= 1; table++) {
576         idx = h & d->ht[table].sizemask;
577         /* Search if this slot does not already contain the given key */
578         he = d->ht[table].table[idx];
579         while(he) {
580             if (dictCompareHashKeys(d, key, he->key))
581                 return -1;
582             he = he->next;
583         }
584         if (!dictIsRehashing(d)) break;
585     }
586     return idx;
587 }
588 
589 //清空哈希表d
590 void dictEmpty(dict *d) {
591     _dictClear(d,&d->ht[0]);
592     _dictClear(d,&d->ht[1]);
593     d->rehashidx = -1;
594     d->iterators = 0;
595 }
596 
597 #define DICT_STATS_VECTLEN 50
598 static void _dictPrintStatsHt(dictht *ht) {
599     unsigned long i, slots = 0, chainlen, maxchainlen = 0;
600     unsigned long totchainlen = 0;
601     unsigned long clvector[DICT_STATS_VECTLEN];
602 
603     if (ht->used == 0) {
604         printf("No stats available for empty dictionaries\n");
605         return;
606     }
607 
608     for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
609     for (i = 0; i < ht->size; i++) {
610         dictEntry *he;
611 
612         if (ht->table[i] == NULL) {
613             clvector[0]++;
614             continue;
615         }
616         slots++;
617         /* For each hash entry on this slot... */
618         chainlen = 0;
619         he = ht->table[i];
620         while(he) {
621             chainlen++;
622             he = he->next;
623         }
624         clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
625         if (chainlen > maxchainlen) maxchainlen = chainlen;
626         totchainlen += chainlen;
627     }
628     printf("Hash table stats:\n");
629     printf(" table size: %ld\n", ht->size);
630     printf(" number of elements: %ld\n", ht->used);
631     printf(" different slots: %ld\n", slots);
632     printf(" max chain length: %ld\n", maxchainlen);
633     printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
634     printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
635     printf(" Chain length distribution:\n");
636     for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
637         if (clvector[i] == 0) continue;
638         printf("   %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
639     }
640 }
641 
642 void dictPrintStats(dict *d) {
643     _dictPrintStatsHt(&d->ht[0]);
644     if (dictIsRehashing(d)) {
645         printf("-- Rehashing into ht[1]:\n");
646         _dictPrintStatsHt(&d->ht[1]);
647     }
648 }
649 
650 //打开rehashing的开关,允许条件满足时执行hashExpanding
651 void dictEnableResize(void) {
652     dict_can_resize = 1;
653 }
654 
655 void dictDisableResize(void) {
656     dict_can_resize = 0;
657 }
658 
659 #if 0
660 
661 /* The following are just example hash table types implementations.
662  * Not useful for Redis so they are commented out.
663  */
664 
665 /* ----------------------- StringCopy Hash Table Type ------------------------*/
666 
667 static unsigned int _dictStringCopyHTHashFunction(const void *key)
668 {
669     return dictGenHashFunction(key, strlen(key));
670 }
671 
672 static void *_dictStringDup(void *privdata, const void *key)
673 {
674     int len = strlen(key);
675     char *copy = zmalloc(len+1);
676     DICT_NOTUSED(privdata);
677 
678     memcpy(copy, key, len);
679     copy[len] = '\0';
680     return copy;
681 }
682 
683 static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
684         const void *key2)
685 {
686     DICT_NOTUSED(privdata);
687 
688     return strcmp(key1, key2) == 0;
689 }
690 
691 static void _dictStringDestructor(void *privdata, void *key)
692 {
693     DICT_NOTUSED(privdata);
694 
695     zfree(key);
696 }
697 
698 dictType dictTypeHeapStringCopyKey = {
699     _dictStringCopyHTHashFunction, /* hash function */
700     _dictStringDup,                /* key dup */
701     NULL,                          /* val dup */
702     _dictStringCopyHTKeyCompare,   /* key compare */
703     _dictStringDestructor,         /* key destructor */
704     NULL                           /* val destructor */
705 };
706 
707 /* This is like StringCopy but does not auto-duplicate the key.
708  * It's used for intepreter's shared strings. */
709 dictType dictTypeHeapStrings = {
710     _dictStringCopyHTHashFunction, /* hash function */
711     NULL,                          /* key dup */
712     NULL,                          /* val dup */
713     _dictStringCopyHTKeyCompare,   /* key compare */
714     _dictStringDestructor,         /* key destructor */
715     NULL                           /* val destructor */
716 };
717 
718 /* This is like StringCopy but also automatically handle dynamic
719  * allocated C strings as values. */
720 dictType dictTypeHeapStringCopyKeyValue = {
721     _dictStringCopyHTHashFunction, /* hash function */
722     _dictStringDup,                /* key dup */
723     _dictStringDup,                /* val dup */
724     _dictStringCopyHTKeyCompare,   /* key compare */
725     _dictStringDestructor,         /* key destructor */
726     _dictStringDestructor,         /* val destructor */
727 };
728 #endif

 

 

 

 

 

 

posted @ 2012-09-03 17:58  ~嘉言懿行~~我是煲仔饭~~  阅读(700)  评论(0编辑  收藏  举报