1 /* Hash Tables Implementation.
2 *
3 * This file implements in memory hash tables with insert/del/replace/find/
4 * get-random-element operations. Hash tables will auto resize if needed
5 * tables of power of two in size are used, collisions are handled by
6 * chaining. See the source code for more information... :)
7 *
8 * Copyright (c) 2006-2012, Salvatore Sanfilippo <antirez at gmail dot com>
9 * All rights reserved.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions are met:
13 *
14 * * Redistributions of source code must retain the above copyright notice,
15 * this list of conditions and the following disclaimer.
16 * * Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * * Neither the name of Redis nor the names of its contributors may be used
20 * to endorse or promote products derived from this software without
21 * specific prior written permission.
22 *
23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
24 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
27 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
28 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
29 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
30 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
31 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
32 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
33 * POSSIBILITY OF SUCH DAMAGE.
34 */
35
36 #include "fmacros.h"
37
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <stdarg.h>
42 #include <assert.h>
43 #include <limits.h>
44 #include <sys/time.h>
45 #include <ctype.h>
46
47 #include "dict.h"
48 #include "zmalloc.h"
49
50 /* Using dictEnableResize() / dictDisableResize() we make possible to
51 * enable/disable resizing of the hash table as needed. This is very important
52 * for Redis, as we use copy-on-write and don't want to move too much memory
53 * around when there is a child performing saving operations.
54 *
55 * Note that even when dict_can_resize is set to 0, not all resizes are
56 * prevented: an hash table is still allowed to grow if the ratio between
57 * the number of elements and the buckets > dict_force_resize_ratio. */
58 static int dict_can_resize = 1;
59 static unsigned int dict_force_resize_ratio = 5;
60
61 /* -------------------------- private prototypes ---------------------------- */
62
63 static int _dictExpandIfNeeded(dict *ht);
64 static unsigned long _dictNextPower(unsigned long size);
65 static int _dictKeyIndex(dict *ht, const void *key);
66 static int _dictInit(dict *ht, dictType *type, void *privDataPtr);
67
68 /* -------------------------- hash functions -------------------------------- */
69
70 /* Thomas Wang's 32 bit Mix Function */
71 /* 提高整数运算速度,注: (~x)+y = y - x - 1 */
72 unsigned int dictIntHashFunction(unsigned int key)
73 {
74 key += ~(key << 15); /* key = key - (key << 15) - 1 */
75 key ^= (key >> 10);
76 key += (key << 3);
77 key ^= (key >> 6);
78 key += ~(key << 11);
79 key ^= (key >> 16);
80 return key;
81 }
82
83 /* Identity hash function for integer keys */
84 /*标识hash函数,并返回key */
85 unsigned int dictIdentityHashFunction(unsigned int key)
86 {
87 return key;
88 }
89
90 static uint32_t dict_hash_function_seed = 5381;
91
92 /* 设置hash函数的seed */
93 void dictSetHashFunctionSeed(uint32_t seed) {
94 dict_hash_function_seed = seed;
95 }
96
97 /* 获取hash函数的seed */
98 uint32_t dictGetHashFunctionSeed(void) {
99 return dict_hash_function_seed;
100 }
101
102 /* MurmurHash2, by Austin Appleby
103 * Note - This code makes a few assumptions about how your machine behaves -
104 * 1. We can read a 4-byte value from any address without crashing
105 * 2. sizeof(int) == 4
106 *
107 * And it has a few limitations -
108 *
109 * 1. It will not work incrementally.
110 * 2. It will not produce the same results on little-endian and big-endian
111 * machines.
112 */
113 unsigned int dictGenHashFunction(const void *key, int len) {
114 /* 'm' and 'r' are mixing constants generated offline.
115 They're not really 'magic', they just happen to work well. */
116 uint32_t seed = dict_hash_function_seed;
117 const uint32_t m = 0x5bd1e995;
118 const int r = 24;
119
120 /* Initialize the hash to a 'random' value */
121 /* 初始hash的random值(a^b等于它们的二进制做^运算,相同的为0,不同则为1) */
122 uint32_t h = seed ^ len;
123
124 /* Mix 4 bytes at a time into the hash */
125 const unsigned char *data = (const unsigned char *)key;
126
127 while(len >= 4) {
128 uint32_t k = *(uint32_t*)data;
129
130 k *= m;
131 k ^= k >> r;
132 k *= m;
133
134 h *= m;
135 h ^= k;
136
137 /* 更新属性 */
138 data += 4;
139 len -= 4;
140 }
141
142 /* Handle the last few bytes of the input array */
143 switch(len) {
144 case 3: h ^= data[2] << 16;
145 case 2: h ^= data[1] << 8;
146 case 1: h ^= data[0]; h *= m;
147 };
148
149 /* Do a few final mixes of the hash to ensure the last few
150 * bytes are well-incorporated. */
151 h ^= h >> 13;
152 h *= m;
153 h ^= h >> 15;
154
155 return (unsigned int)h;
156 }
157
158 /* And a case insensitive hash function (based on djb hash) */
159 unsigned int dictGenCaseHashFunction(const unsigned char *buf, int len) {
160 unsigned int hash = (unsigned int)dict_hash_function_seed;
161
162 while (len--)
163 hash = ((hash << 5) + hash) + (tolower(*buf++)); /* hash * 33 + c */
164 return hash;
165 }
166
167 /* ----------------------------- API implementation ------------------------- */
168
169 /* Reset a hash table already initialized with ht_init().
170 * NOTE: This function should only be called by ht_destroy(). */
171 /* 重置哈希表的各项属性 */
172 static void _dictReset(dictht *ht)
173 {
174 /* 更新属性 */
175 ht->table = NULL;
176 ht->size = 0;
177 ht->sizemask = 0;
178 ht->used = 0;
179 }
180
181 /* Create a new hash table */
182 /* 创新一个新字典 */
183 dict *dictCreate(dictType *type,
184 void *privDataPtr)
185 {
186 /* 分配空间 */
187 dict *d = zmalloc(sizeof(*d));
188
189 /* 初始化字典 */
190 _dictInit(d,type,privDataPtr);
191
192 return d;
193 }
194
195 /* Initialize the hash table */
196 /* 初始化字典 */
197 int _dictInit(dict *d, dictType *type,
198 void *privDataPtr)
199 {
200 /* 初始化ht[0] */
201 _dictReset(&d->ht[0]);
202
203 /* 初始化ht[1] */
204 _dictReset(&d->ht[1]);
205
206 /*初始化字典属性 */
207 d->type = type;
208 d->privdata = privDataPtr;
209 d->rehashidx = -1; /* 表示rehash未进行 */
210 d->iterators = 0;
211
212 return DICT_OK;
213 }
214
215 /* Resize the table to the minimal size that contains all the elements,
216 * but with the invariant of a USED/BUCKETS ratio near to <= 1 */
217 /* 对字典进行紧缩,让节点数/桶数的比率接近<=1 */
218 int dictResize(dict *d)
219 {
220 int minimal;
221
222 /* dictAdd函数在每次向字典添加新键值对之前,都会对哈希表ht[0]进行rehash操作:在不修改任何键值对的情况下,对哈希表进行扩展,尽量将比率维持在1:1左右;如果它们之前的比率radio = used/size满足以下任何一个条件的话,rehash过程就会被激活: 1. 自然rehash: ratio >= 1, 且变量dict_can_resize为真; 2. 强制rehash: ratio大于变量dict_force_resize_ratio; */
223 /* 不能在dict_can_resize为假或者字典正在rehash时调用 */
224 if (!dict_can_resize || dictIsRehashing(d)) return DICT_ERR;
225
226 minimal = d->ht[0].used;
227
228 if (minimal < DICT_HT_INITIAL_SIZE)
229 minimal = DICT_HT_INITIAL_SIZE;
230
231 return dictExpand(d, minimal);
232 }
233
234 /* Expand or create the hash table */
235 /* 创建一个新哈希表,并视情况,进行以下动作之一:
236 * 1) 如果字典里的ht[0]为空,将新哈希表赋值给它;
237 * 2) 如果字典里的ht[0]不为空,那么将新哈希表赋值给ht[1],并打开rehash标识;
238 */
239 int dictExpand(dict *d, unsigned long size)
240 {
241 dictht n; /* the new hash table */
242
243 /* 计算哈希表的真实大小 */
244 unsigned long realsize = _dictNextPower(size);
245
246 /* the size is invalid if it is smaller than the number of
247 * elements already inside the hash table */
248 if (dictIsRehashing(d) || d->ht[0].used > size)
249 return DICT_ERR;
250
251 /* Allocate the new hash table and initialize all pointers to NULL */
252 /* 创建并初始化新哈希表 */
253 n.size = realsize;
254 n.sizemask = realsize-1;
255 n.table = zcalloc(realsize*sizeof(dictEntry*));
256 n.used = 0;
257
258 /* Is this the first initialization? If so it's not really a rehashing
259 * we just set the first hash table so that it can accept keys. */
260 /* 如果ht[0]为空,那么这就是一次创建新哈希表行为 */
261 if (d->ht[0].table == NULL) {
262 d->ht[0] = n;
263 return DICT_OK;
264 }
265
266 /* Prepare a second hash table for incremental rehashing */
267 /* 如果ht[0]不为空,那么这就是一次扩展字典的行为 */
268 /* 将新哈希表设置为ht[1],并打开rehash标识 */
269 d->ht[1] = n;
270 d->rehashidx = 0; /* 标识着rehash的开始 */
271
272 return DICT_OK;
273 }
274
275 /* Performs N steps of incremental rehashing. Returns 1 if there are still
276 * keys to move from the old to the new hash table, otherwise 0 is returned.
277 * Note that a rehashing step consists in moving a bucket (that may have more
278 * thank one key as we use chaining) from the old to the new hash table. */
279 /* 执行N步渐进式rehash:
280 *
281 * 如果执行之后哈希表还有元素需要rehash,那么返回1;
282 * 如果哈希表里面所有元素已经迁移完毕,那么返回0;
283 *
284 * 每步rehash都会移动哈希表数组内某个索引上的整个链表节点;
285 * 所以从ht[0]迁移到ht[1]的key可能不止一个;
286 */
287 int dictRehash(dict *d, int n) {
288 if (!dictIsRehashing(d)) return 0;
289
290 while(n--) {
291 dictEntry *de, *nextde;
292
293 /* Check if we already rehashed the whole table... */
294 /* 如果ht[0]为空,那么迁移完毕,用ht[1]代替原来的ht[0] */
295 if (d->ht[0].used == 0) {
296 /* 释放ht[0]的哈希表数组 */
297 zfree(d->ht[0].table);
298
299 /* 用ht[1]替代ht[0] */
300 d->ht[0] = d->ht[1];
301
302 /* 清空ht[1]的指针 */
303 _dictReset(&d->ht[1]);
304
305 /* 关闭rehash标识 */
306 d->rehashidx = -1;
307
308 /* 通知调用者,rehash完毕 */
309 return 0;
310 }
311
312 /* Note that rehashidx can't overflow as we are sure there are more
313 * elements because ht[0].used != 0 */
314 assert(d->ht[0].size > (unsigned)d->rehashidx);
315
316 /* 移动到数组中首个不为NULL链表的索引上 */
317 while(d->ht[0].table[d->rehashidx] == NULL) d->rehashidx++;
318
319 /* 指向链表头 */
320 de = d->ht[0].table[d->rehashidx];
321 /* Move all the keys in this bucket from the old to the new hash HT */
322 /* 将链表内的所有元素从ht[0]迁移到ht[1]
323 * 因为桶内的元素通常只有一个,或者不多于某个特定的比率
324 * 所以可将整个操作看作O(1)
325 */
326 while(de) {
327 unsigned int h;
328
329 nextde = de->next;
330 /* Get the index in the new hash table */
331 /* 计算元素在ht[1]的哈希值 */
332 h = dictHashKey(d, de->key) & d->ht[1].sizemask;
333
334 /* 添加节点到ht[1],调整指针 */
335 de->next = d->ht[1].table[h];
336 d->ht[1].table[h] = de;
337
338 /* 更新计数器 */
339 d->ht[0].used--;
340 d->ht[1].used++;
341 de = nextde;
342 }
343 /* 设置指针为NULL,方便下次rehash时跳过 */
344 d->ht[0].table[d->rehashidx] = NULL;
345
346 /* 前进至下一索引 */
347 d->rehashidx++;
348 }
349 /* 通知调用者,还有元素等待rehash */
350 return 1;
351 }
352
353 /* 以毫秒为单位,返回当前时间 */
354 long long timeInMilliseconds(void) {
355 struct timeval tv;
356
357 gettimeofday(&tv,NULL);
358 return (((long long)tv.tv_sec)*1000)+(tv.tv_usec/1000);
359 }
360
361 /* Rehash for an amount of time between ms milliseconds and ms+1 milliseconds */
362 /* 在给定毫秒数内,以100步为单位,对字典进行rehash */
363 int dictRehashMilliseconds(dict *d, int ms) {
364 long long start = timeInMilliseconds();
365 int rehashes = 0;
366
367 while(dictRehash(d,100)) {
368 rehashes += 100;
369 if (timeInMilliseconds()-start > ms) break;
370 }
371 return rehashes;
372 }
373
374 /* This function performs just a step of rehashing, and only if there are
375 * no safe iterators bound to our hash table. When we have iterators in the
376 * middle of a rehashing we can't mess with the two hash tables otherwise
377 * some element can be missed or duplicated.
378 *
379 * This function is called by common lookup or update operations in the
380 * dictionary so that the hash table automatically migrates from H1 to H2
381 * while it is actively used. */
382 /* 如果条件允许的话,将一个元素从ht[0]迁移到ht[1]
383 * 函数被其它查找和更新函数所调用,从而实现渐进式rehash
384 */
385 static void _dictRehashStep(dict *d) {
386 /* 只在没有安全迭代器的时候,才能进行迁移;否则可能会产生重复元素或者丢失元素 */
387 if (d->iterators == 0) dictRehash(d,1);
388 }
389
390 /* Add an element to the target hash table */
391 /* 添加给定key-value对到字典 */
392 int dictAdd(dict *d, void *key, void *val)
393 {
394 /* 添加key到哈希表,返回包含该key的节点 */
395 dictEntry *entry = dictAddRaw(d,key);
396
397 /* 添加失败? */
398 if (!entry) return DICT_ERR;
399
400 /* 设置节点的值 */
401 dictSetVal(d, entry, val);
402
403 return DICT_OK;
404 }
405
406 /* Low level add. This function adds the entry but instead of setting
407 * a value returns the dictEntry structure to the user, that will make
408 * sure to fill the value field as he wishes.
409 *
410 * This function is also directly exposed to user API to be called
411 * mainly in order to store non-pointers inside the hash value, example:
412 *
413 * entry = dictAddRaw(dict,mykey);
414 * if (entry != NULL) dictSetSignedIntegerVal(entry,1000);
415 *
416 * Return values:
417 *
418 * If key already exists NULL is returned.
419 * If key was added, the hash entry is returned to be manipulated by the caller.
420 */
421 /* 添加key到字典的底层实现,完成之后返回新节点
422 * 如果key已经存在,则返回NULL
423 */
424 dictEntry *dictAddRaw(dict *d, void *key)
425 {
426 int index;
427 dictEntry *entry;
428 dictht *ht;
429
430 /* 尝试渐进式地rehash一个元素 */
431 if (dictIsRehashing(d)) _dictRehashStep(d);
432
433 /* Get the index of the new element, or -1 if
434 * the element already exists. */
435 /* 查找可容纳新元素的索引位置,如果元素已经存在,则index为-1 */
436 if ((index = _dictKeyIndex(d, key)) == -1)
437 return NULL;
438
439 /* Allocate the memory and store the new entry */
440 /* 决定该把新元素放在那个哈希表 */
441 ht = dictIsRehashing(d) ? &d->ht[1] : &d->ht[0];
442
443 /* 为新元素分配节点空间 */
444 entry = zmalloc(sizeof(*entry));
445
446 /* 新节点的后继指针指向旧的表头节点 */
447 entry->next = ht->table[index];
448
449 /* 设置新节点为表头 */
450 ht->table[index] = entry;
451
452 /* 更新已有节点数量 */
453 ht->used++;
454
455 /* Set the hash entry fields. */
456 /* 关联节点和key */
457 dictSetKey(d, entry, key);
458
459 /* 返回新节点 */
460 return entry;
461 }
462
463 /* Add an element, discarding the old if the key already exists.
464 * Return 1 if the key was added from scratch, 0 if there was already an
465 * element with such key and dictReplace() just performed a value update
466 * operation. */
467 /* 用新的值代替key原有的值
468 * 如果key不存在,将关联添加到哈希表中;
469 * 如果关联时新创建的,则返回1;如果关联是被更新的,则返回0;
470 */
471 int dictReplace(dict *d, void *key, void *val)
472 {
473 dictEntry *entry, auxentry;
474
475 /* Try to add the element. If the key
476 * does not exists dictAdd will suceed. */
477 /* 尝试添加新元素到哈希表,只要key不存在,添加就会成功 */
478 if (dictAdd(d, key, val) == DICT_OK)
479 return 1;
480
481 /* It already exists, get the entry */
482 /* 如果添加失败,则说明元素已经存在 */
483 /* 获取这个元素所对应的节点 */
484 entry = dictFind(d, key);
485
486 /* Set the new value and free the old one. Note that it is important
487 * to do that in this order, as the value may just be exactly the same
488 * as the previous one. In this context, think to reference counting,
489 * you want to increment (set), and then decrement (free), and not the
490 * reverse. */
491 /* 指向旧值 */
492 auxentry = *entry;
493
494 /* 设置新值 */
495 dictSetVal(d, entry, val);
496
497 /* 释放旧值 */
498 dictFreeVal(d, &auxentry);
499
500 return 0;
501 }
502
503 /* dictReplaceRaw() is simply a version of dictAddRaw() that always
504 * returns the hash entry of the specified key, even if the key already
505 * exists and can't be added (in that case the entry of the already
506 * existing key is returned.)
507 *
508 * See dictAddRaw() for more information. */
509 /* 类似于dictAddRaw() */
510 /* dictReplaceRaw无论在新添加节点还是更新节点的情况下,都返回key所对应的节点 */
511 dictEntry *dictReplaceRaw(dict *d, void *key) {
512 /* 查找 */
513 dictEntry *entry = dictFind(d,key);
514
515 /* 没找到就添加,找到直接返回 */
516 return entry ? entry : dictAddRaw(d,key);
517 }
518
519 /* Search and remove an element */
520 /* 按key查找并删除节点 */
521 static int dictGenericDelete(dict *d, const void *key, int nofree)
522 {
523 unsigned int h, idx;
524 dictEntry *he, *prevHe;
525 int table;
526
527 /* 空表? */
528 if (d->ht[0].size == 0) return DICT_ERR; /* d->ht[0].table is NULL */
529
530 /* 渐进式rehash */
531 if (dictIsRehashing(d)) _dictRehashStep(d);
532
533 /* 计算哈希值 */
534 h = dictHashKey(d, key);
535
536 /* 在两个哈希表中查找 */
537 for (table = 0; table <= 1; table++) {
538 /* 索引值 */
539 idx = h & d->ht[table].sizemask;
540
541 /* 索引在数组中对应的表头 */
542 he = d->ht[table].table[idx];
543 prevHe = NULL;
544
545 /* 遍历链表 */
546 /* 因为链表的元素数量通常为1,或者维持在一个很小的比率,因此可将这操作看作O()1 */
547 while(he) {
548 /* 对比键 */
549 if (dictCompareKeys(d, key, he->key)) {
550 /* Unlink the element from the list */
551 if (prevHe)
552 prevHe->next = he->next;
553 else
554 d->ht[table].table[idx] = he->next;
555
556 /* 释放节点的键和值 */
557 if (!nofree) {
558 dictFreeKey(d, he);
559 dictFreeVal(d, he);
560 }
561 /* 释放节点 */
562 zfree(he);
563
564 d->ht[table].used--;
565
566 return DICT_OK;
567 }
568 prevHe = he;
569 he = he->next;
570 }
571 /* 如果不是正在进行rehash,那么无需遍历ht[1] */
572 if (!dictIsRehashing(d)) break;
573 }
574
575 return DICT_ERR; /* not found */
576 }
577
578 int dictDelete(dict *ht, const void *key) {
579 return dictGenericDelete(ht,key,0);
580 }
581
582 /* 删除哈希表中的key,并且释放保存这个key的节点 */
583 int dictDeleteNoFree(dict *ht, const void *key) {
584 return dictGenericDelete(ht,key,1);
585 }
586
587 /* 销毁指定的哈希表 */
588 /* Destroy an entire dictionary */
589 int _dictClear(dict *d, dictht *ht)
590 {
591 unsigned long i;
592
593 /* Free all the elements */
594 /* 遍历哈希表数组 */
595 for (i = 0; i < ht->size && ht->used > 0; i++) {
596 dictEntry *he, *nextHe;
597
598 if ((he = ht->table[i]) == NULL) continue;
599
600 /* 释放整个链表上的元素 */
601 /* 因为链表的元素数量通常为1,或者维持在一个很小的比率,因此可将这操作看作O(1) */
602 while(he) {
603 nextHe = he->next;
604 dictFreeKey(d, he);
605 dictFreeVal(d, he);
606
607 zfree(he);
608
609 ht->used--;
610
611 he = nextHe;
612 }
613 }
614 /* Free the table and the allocated cache structure */
615 zfree(ht->table);
616
617 /* Re-initialize the table */
618 _dictReset(ht);
619
620 return DICT_OK; /* never fails */
621 }
622
623 /* Clear & Release the hash table */
624 /* 清空并释放字典 */
625 void dictRelease(dict *d)
626 {
627 _dictClear(d,&d->ht[0]);
628 _dictClear(d,&d->ht[1]);
629
630 zfree(d);
631 }
632
633 /* 在字典中查找给定key所定义的节点,如果key不存在,则返回NULL */
634 dictEntry *dictFind(dict *d, const void *key)
635 {
636 dictEntry *he;
637 unsigned int h, idx, table;
638
639 if (d->ht[0].size == 0) return NULL; /* We don't have a table at all */
640
641 if (dictIsRehashing(d)) _dictRehashStep(d);
642
643 /* 计算哈希值 */
644 h = dictHashKey(d, key);
645
646 /* 在两个哈希表中查找 */
647 for (table = 0; table <= 1; table++) {
648 /* 索引值 */
649 idx = h & d->ht[table].sizemask;
650
651 /* 节点链表 */
652 he = d->ht[table].table[idx];
653
654 /* 在链表中查找,因为链表的元素数量通常为1,或者维持在一个很小的比率,因此可将这操作看作O(1) */
655 while(he) {
656 /* 找到并返回 */
657 if (dictCompareKeys(d, key, he->key))
658 return he;
659
660 he = he->next;
661 }
662 /* 如果rehash并不在进行中,那么无需查找ht[1] */
663 if (!dictIsRehashing(d)) return NULL;
664 }
665
666 return NULL;
667 }
668
669 /* 返回在字典中,key所对应的值;如果key不存在,则返回NULL */
670 void *dictFetchValue(dict *d, const void *key) {
671 dictEntry *he;
672
673 he = dictFind(d,key);
674
675 return he ? dictGetVal(he) : NULL;
676 }
677
678 /* 根据给定字典,创建一个不安全迭代器 */
679 dictIterator *dictGetIterator(dict *d)
680 {
681 dictIterator *iter = zmalloc(sizeof(*iter));
682
683 /* 更新属性 */
684 iter->d = d;
685 iter->table = 0;
686 iter->index = -1;
687 iter->safe = 0;
688 iter->entry = NULL;
689 iter->nextEntry = NULL;
690
691 return iter;
692 }
693
694 /* 根据给定字典,创建一个安全迭代器 */
695 dictIterator *dictGetSafeIterator(dict *d) {
696 dictIterator *i = dictGetIterator(d);
697
698 i->safe = 1;
699 return i;
700 }
701
702 /* 返回迭代器指向的当前节点;如果字典已经迭代完毕,返回NULL */
703 dictEntry *dictNext(dictIterator *iter)
704 {
705 while (1) {
706 if (iter->entry == NULL) {
707 dictht *ht = &iter->d->ht[iter->table];
708
709 /* 在开始迭代之前,添加字典iterators计数器的值 */
710 /* 只有安全迭代器才会增加计数 */
711 if (iter->safe && iter->index == -1 && iter->table == 0)
712 iter->d->iterators++;
713
714 /* 增加索引 */
715 iter->index++;
716
717 /* 当迭代器的元素数量超过ht->size的值时,说明这个表已经迭代完毕 */
718 if (iter->index >= (signed) ht->size) {
719 /* 是否继续迭代ht[1]? */
720 if (dictIsRehashing(iter->d) && iter->table == 0) {
721 iter->table++;
722 iter->index = 0;
723 ht = &iter->d->ht[1];
724 } else {
725 /* 如果没有ht[1],或者已经迭代完了 ht[1]到达这里 */
726 /* 跳出 */
727 break;
728 }
729 }
730 /* 指向下一索引的节点链表 */
731 iter->entry = ht->table[iter->index];
732 } else {
733 /* 指向链表的下一节点 */
734 iter->entry = iter->nextEntry;
735 }
736
737 /* 保存后继指针nextEntry,以对应当前节点entry可能被修改的情况 */
738 if (iter->entry) {
739 /* We need to save the 'next' here, the iterator user
740 * may delete the entry we are returning. */
741 iter->nextEntry = iter->entry->next;
742 return iter->entry;
743 }
744 }
745 return NULL;
746 }
747
748 /* 释放迭代器 */
749 void dictReleaseIterator(dictIterator *iter)
750 {
751 if (iter->safe && !(iter->index == -1 && iter->table == 0))
752 iter->d->iterators--;
753
754 zfree(iter);
755 }
756
757 /* Return a random entry from the hash table. Useful to
758 * implement randomized algorithms */
759 /* 从字典中返回一个随机节点,可用于实现随机化算法 */
760 /* 如果字典为空,则返回NULL */
761 dictEntry *dictGetRandomKey(dict *d)
762 {
763 dictEntry *he, *orighe;
764 unsigned int h;
765 int listlen, listele;
766
767 /* 空表,返回NULL */
768 if (dictSize(d) == 0) return NULL;
769
770 /* 渐进式rehash */
771 if (dictIsRehashing(d)) _dictRehashStep(d);
772
773 /* 根据哈希表的使用情况,随机从哈希表中挑选一个非空表头 */
774 if (dictIsRehashing(d)) {
775 do {
776 h = random() % (d->ht[0].size+d->ht[1].size);
777 he = (h >= d->ht[0].size) ? d->ht[1].table[h - d->ht[0].size] :
778 d->ht[0].table[h];
779 } while(he == NULL);
780 } else {
781 do {
782 h = random() & d->ht[0].sizemask;
783 he = d->ht[0].table[h];
784 } while(he == NULL);
785 }
786
787 /* Now we found a non empty bucket, but it is a linked
788 * list and we need to get a random element from the list.
789 * The only sane way to do so is counting the elements and
790 * select a random index. */
791 /* 随机获取链表中的其中一个元素,计算链表长度 */
792 /* 因为链表的元素数量通常为1,或者一个很小的比率,所以这个操作可看作O(1) */
793 listlen = 0;
794 orighe = he;
795 while(he) {
796 he = he->next;
797 listlen++;
798 }
799
800 /* 计算随机值 */
801 listele = random() % listlen;
802
803 /* 取出对应节点 */
804 he = orighe;
805 while(listele--) he = he->next;
806
807 return he;
808 }
809
810 /* ------------------------- private functions ------------------------------ */
811
812 /* Expand the hash table if needed */
813 /* 根据需要,扩展字典的大小(即时对ht[0]进行rehash) */
814 static int _dictExpandIfNeeded(dict *d)
815 {
816 /* Incremental rehashing already in progress. Return. */
817 /* 已经在渐进式rehash当中,则直接返回 */
818 if (dictIsRehashing(d)) return DICT_OK;
819
820 /* If the hash table is empty expand it to the initial size. */
821 /* 如果哈希表为空,则将它扩展为初始大小 */
822 if (d->ht[0].size == 0) return dictExpand(d, DICT_HT_INITIAL_SIZE);
823
824 /* If we reached the 1:1 ratio, and we are allowed to resize the hash
825 * table (global setting) or we should avoid it but the ratio between
826 * elements/buckets is over the "safe" threshold, we resize doubling
827 * the number of buckets. */
828 /* 如果哈希表的已用节点数 >= 哈希表的大小,并且以下条件任一个为真:
829 * 1) dict_can_resize为真;
830 * 2) 已用节点数除以哈希表大小之比大于dict_force_resize_ratio;
831 * 那么调用dictExpand对哈希表进行扩展,扩展的体积至少为已用节点数的两倍
832 */
833 if (d->ht[0].used >= d->ht[0].size &&
834 (dict_can_resize ||
835 d->ht[0].used/d->ht[0].size > dict_force_resize_ratio))
836 {
837 return dictExpand(d, ((d->ht[0].size > d->ht[0].used) ?
838 d->ht[0].size : d->ht[0].used)*2);
839 }
840
841 return DICT_OK;
842 }
843
844 /* Our hash table capability is a power of two */
845 /* 计算哈希表的真实体积 */
846 /* 如果size <= DICT_HT_INITIAL_SIZE的话,则返回DICT_HT_INITIAL_SIZE;
847 * 否则这个值为第一个 >= size的二次幂。
848 */
849 static unsigned long _dictNextPower(unsigned long size)
850 {
851 unsigned long i = DICT_HT_INITIAL_SIZE;
852
853 if (size >= LONG_MAX) return LONG_MAX;
854 while(1) {
855 if (i >= size)
856 return i;
857 i *= 2;
858 }
859 }
860
861 /* Returns the index of a free slot that can be populated with
862 * an hash entry for the given 'key'.
863 * If the key already exists, -1 is returned.
864 *
865 * Note that if we are in the process of rehashing the hash table, the
866 * index is always returned in the context of the second (new) hash table. */
867 /* 返回给定key,可以哈希表数组存放的索引 */
868 /* 如果key已经存在于哈希表,则返回-1 */
869 /* 当正在执行rehash的时候,返回的index总是应用于第二个(新的哈希表) */
870 static int _dictKeyIndex(dict *d, const void *key)
871 {
872 unsigned int h, idx, table;
873 dictEntry *he;
874
875 /* Expand the hash table if needed */
876 /* 如果有需要,对字典进行扩展 */
877 if (_dictExpandIfNeeded(d) == DICT_ERR)
878 return -1;
879
880 /* Compute the key hash value */
881 /* 计算key的哈希值 */
882 h = dictHashKey(d, key);
883
884 /* 在两个哈希表中进行查找给定的key */
885 for (table = 0; table <= 1; table++) {
886 /* 根据哈希值和哈希表的sizemask,计算出key可能出现在table数组中的哪个索引 */
887 idx = h & d->ht[table].sizemask;
888
889 /* Search if this slot does not already contain the given key */
890 /* 在节点链表里查找给定的key */
891 he = d->ht[table].table[idx];
892 while(he) {
893 /* key已经存在 */
894 if (dictCompareKeys(d, key, he->key))
895 return -1;
896
897 he = he->next;
898 }
899
900 /* 第一次进行运行到这里时,说明已经查找完d->ht[0]了 */
901 /* 这时如果哈希表不在rehash当中,就没必要查找d->ht[1] */
902 if (!dictIsRehashing(d)) break;
903 }
904
905 return idx;
906 }
907
908 /* 清空整个字典 */
909 void dictEmpty(dict *d) {
910 _dictClear(d,&d->ht[0]);
911 _dictClear(d,&d->ht[1]);
912 d->rehashidx = -1;
913 d->iterators = 0;
914 }
915
916 /* 打开rehash标识 */
917 void dictEnableResize(void) {
918 dict_can_resize = 1;
919 }
920
921 /* 关闭rehash标识 */
922 void dictDisableResize(void) {
923 dict_can_resize = 0;
924 }
925
926 #if 0
927
928 /* The following is code that we don't use for Redis currently, but that is part
929 of the library. */
930
931 /* ----------------------- Debugging ------------------------*/
932
933 #define DICT_STATS_VECTLEN 50
934 static void _dictPrintStatsHt(dictht *ht) {
935 unsigned long i, slots = 0, chainlen, maxchainlen = 0;
936 unsigned long totchainlen = 0;
937 unsigned long clvector[DICT_STATS_VECTLEN];
938
939 if (ht->used == 0) {
940 printf("No stats available for empty dictionaries\n");
941 return;
942 }
943
944 for (i = 0; i < DICT_STATS_VECTLEN; i++) clvector[i] = 0;
945 for (i = 0; i < ht->size; i++) {
946 dictEntry *he;
947
948 if (ht->table[i] == NULL) {
949 clvector[0]++;
950 continue;
951 }
952 slots++;
953 /* For each hash entry on this slot... */
954 chainlen = 0;
955 he = ht->table[i];
956 while(he) {
957 chainlen++;
958 he = he->next;
959 }
960 clvector[(chainlen < DICT_STATS_VECTLEN) ? chainlen : (DICT_STATS_VECTLEN-1)]++;
961 if (chainlen > maxchainlen) maxchainlen = chainlen;
962 totchainlen += chainlen;
963 }
964 printf("Hash table stats:\n");
965 printf(" table size: %ld\n", ht->size);
966 printf(" number of elements: %ld\n", ht->used);
967 printf(" different slots: %ld\n", slots);
968 printf(" max chain length: %ld\n", maxchainlen);
969 printf(" avg chain length (counted): %.02f\n", (float)totchainlen/slots);
970 printf(" avg chain length (computed): %.02f\n", (float)ht->used/slots);
971 printf(" Chain length distribution:\n");
972 for (i = 0; i < DICT_STATS_VECTLEN-1; i++) {
973 if (clvector[i] == 0) continue;
974 printf(" %s%ld: %ld (%.02f%%)\n",(i == DICT_STATS_VECTLEN-1)?">= ":"", i, clvector[i], ((float)clvector[i]/ht->size)*100);
975 }
976 }
977
978 void dictPrintStats(dict *d) {
979 _dictPrintStatsHt(&d->ht[0]);
980 if (dictIsRehashing(d)) {
981 printf("-- Rehashing into ht[1]:\n");
982 _dictPrintStatsHt(&d->ht[1]);
983 }
984 }
985
986 /* ----------------------- StringCopy Hash Table Type ------------------------*/
987
988 static unsigned int _dictStringCopyHTHashFunction(const void *key)
989 {
990 return dictGenHashFunction(key, strlen(key));
991 }
992
993 static void *_dictStringDup(void *privdata, const void *key)
994 {
995 int len = strlen(key);
996 char *copy = zmalloc(len+1);
997 DICT_NOTUSED(privdata);
998
999 memcpy(copy, key, len);
1000 copy[len] = '\0';
1001 return copy;
1002 }
1003
1004 static int _dictStringCopyHTKeyCompare(void *privdata, const void *key1,
1005 const void *key2)
1006 {
1007 DICT_NOTUSED(privdata);
1008
1009 return strcmp(key1, key2) == 0;
1010 }
1011
1012 static void _dictStringDestructor(void *privdata, void *key)
1013 {
1014 DICT_NOTUSED(privdata);
1015
1016 zfree(key);
1017 }
1018
1019 dictType dictTypeHeapStringCopyKey = {
1020 _dictStringCopyHTHashFunction, /* hash function */
1021 _dictStringDup, /* key dup */
1022 NULL, /* val dup */
1023 _dictStringCopyHTKeyCompare, /* key compare */
1024 _dictStringDestructor, /* key destructor */
1025 NULL /* val destructor */
1026 };
1027
1028 /* This is like StringCopy but does not auto-duplicate the key.
1029 * It's used for intepreter's shared strings. */
1030 dictType dictTypeHeapStrings = {
1031 _dictStringCopyHTHashFunction, /* hash function */
1032 NULL, /* key dup */
1033 NULL, /* val dup */
1034 _dictStringCopyHTKeyCompare, /* key compare */
1035 _dictStringDestructor, /* key destructor */
1036 NULL /* val destructor */
1037 };
1038
1039 /* This is like StringCopy but also automatically handle dynamic
1040 * allocated C strings as values. */
1041 dictType dictTypeHeapStringCopyKeyValue = {
1042 _dictStringCopyHTHashFunction, /* hash function */
1043 _dictStringDup, /* key dup */
1044 _dictStringDup, /* val dup */
1045 _dictStringCopyHTKeyCompare, /* key compare */
1046 _dictStringDestructor, /* key destructor */
1047 _dictStringDestructor, /* val destructor */
1048 };
1049 #endif