【redis源码】(八) Intset.c

intset 实现了一个数字元素的集合。

使用数组和元素的有序存放实现存取,查找过程使用二分查找法,所有插入删除的的效率为O(log2N)。 与其他数据结构类似,作者使用变编码方式实现对内存的高效利用。 初始化的intset中的数字定义为int16_t,即每个元素占用2个字节,而随着数据的插入,逐渐调整编码方式到int32_t或int64_t

 

上代码

intset.h

 1 #ifndef __INTSET_H
 2 #define __INTSET_H
 3 #include <stdint.h>
 4 
 5 typedef struct intset {
 6     uint32_t encoding;
 7     uint32_t length;
 8     int8_t contents[];
 9 } intset;
10 
11 intset *intsetNew(void);
12 intset *intsetAdd(intset *is, int64_t value, uint8_t *success);
13 intset *intsetRemove(intset *is, int64_t value, int *success);
14 uint8_t intsetFind(intset *is, int64_t value);
15 int64_t intsetRandom(intset *is);
16 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value);
17 uint32_t intsetLen(intset *is);
18 
19 #endif // __INTSET_H

intset.c

  1 #include <stdio.h>
  2 #include <stdlib.h>
  3 #include <string.h>
  4 #include "intset.h"
  5 #include "zmalloc.h"
  6 
  7 /* Note that these encodings are ordered, so:
  8  * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
  9 #define INTSET_ENC_INT16 (sizeof(int16_t)) 
 10 #define INTSET_ENC_INT32 (sizeof(int32_t))
 11 #define INTSET_ENC_INT64 (sizeof(int64_t))
 12 
 13 /* Return the required encoding for the provided value. */
 14 //根据v的值判断使用哪儿种int以节省内存
 15 static uint8_t _intsetValueEncoding(int64_t v) {
 16     if (v < INT32_MIN || v > INT32_MAX)
 17         return INTSET_ENC_INT64;
 18     else if (v < INT16_MIN || v > INT16_MAX)
 19         return INTSET_ENC_INT32;
 20     return INTSET_ENC_INT16;
 21 }
 22 
 23 //得到enc编码方式下的第pos个位置的值
 24 /* Return the value at pos, given an encoding. */
 25 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
 26     if (enc == INTSET_ENC_INT64)
 27         return ((int64_t*)is->contents)[pos];
 28     else if (enc == INTSET_ENC_INT32)
 29         return ((int32_t*)is->contents)[pos];
 30     return ((int16_t*)is->contents)[pos];
 31 }
 32 
 33 //得到is中第pos个位置的value
 34 /* Return the value at pos, using the configured encoding. */
 35 static int64_t _intsetGet(intset *is, int pos) {
 36     return _intsetGetEncoded(is,pos,is->encoding);
 37 }
 38 
 39 //在第n个pos个位置存放value
 40 /* Set the value at pos, using the configured encoding. */
 41 static void _intsetSet(intset *is, int pos, int64_t value) {
 42     if (is->encoding == INTSET_ENC_INT64)
 43         ((int64_t*)is->contents)[pos] = value;
 44     else if (is->encoding == INTSET_ENC_INT32)
 45         ((int32_t*)is->contents)[pos] = value;
 46     else
 47         ((int16_t*)is->contents)[pos] = value;
 48 }
 49 
 50 //得到一个新的intset,编码采用INTSET_ENC_INT16
 51 /* Create an empty intset. */
 52 intset *intsetNew(void) {
 53     intset *is = zmalloc(sizeof(intset));
 54     is->encoding = INTSET_ENC_INT16;
 55     is->length = 0;
 56     return is;
 57 }
 58 
 59 /* Resize the intset */
 60 //重新为intset分配内存
 61 static intset *intsetResize(intset *is, uint32_t len) {
 62     uint32_t size = len*is->encoding;
 63     is = zrealloc(is,sizeof(intset)+size);
 64     return is;
 65 }
 66 
 67 
 68 //在排好序的iniset中查找value
 69 //如果找到,返回1,*pos为其所在位置
 70 //如果找不到,返回0,*pos为其能插入的位置
 71 //二分查找法
 72 /* Search for the position of "value". Return 1 when the value was found and
 73  * sets "pos" to the position of the value within the intset. Return 0 when
 74  * the value is not present in the intset and sets "pos" to the position
 75  * where "value" can be inserted. */
 76 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
 77     int min = 0, max = is->length-1, mid = -1;
 78     int64_t cur = -1;
 79 
 80     /* The value can never be found when the set is empty */
 81     if (is->length == 0) {
 82         if (pos) *pos = 0;
 83         return 0;
 84     } else {
 85         /* Check for the case where we know we cannot find the value,
 86          * but do know the insert position. */
 87         //如果插入值大于最后一个元素,或者小于第一个元素,则可以认定无法找到该元素
 88         if (value > _intsetGet(is,is->length-1)) {
 89             if (pos) *pos = is->length;
 90             return 0;
 91         } else if (value < _intsetGet(is,0)) {
 92             if (pos) *pos = 0;
 93             return 0;
 94         }
 95     }
 96 
 97     while(max >= min) {
 98         mid = (min+max)/2;
 99         cur = _intsetGet(is,mid);
100         if (value > cur) {
101             min = mid+1;
102         } else if (value < cur) {
103             max = mid-1;
104         } else {
105             break;
106         }
107     }
108 
109     if (value == cur) {
110         if (pos) *pos = mid;
111         return 1;
112     } else {
113         if (pos) *pos = min;
114         return 0;
115     }
116 }
117 
118 //升级intset的编码,并且出入一个新数字,因为新数字的绝对值一定大于
119 //当前intset中所有的数字的绝对值,如果是负数,则最小,放在最前边,否则放在最后边
120 /* Upgrades the intset to a larger encoding and inserts the given integer. */
121 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
122     uint8_t curenc = is->encoding;
123     uint8_t newenc = _intsetValueEncoding(value);
124     int length = is->length;
125     int prepend = value < 0 ? 1 : 0;
126 
127     /* First set new encoding and resize */
128     is->encoding = newenc;
129     is = intsetResize(is,is->length+1);
130 
131     /* Upgrade back-to-front so we don't overwrite values.
132      * Note that the "prepend" variable is used to make sure we have an empty
133      * space at either the beginning or the end of the intset. */
134     while(length--)
135         _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
136 
137     /* Set the value at the beginning or the end. */
138     if (prepend)
139         _intsetSet(is,0,value);
140     else
141         _intsetSet(is,is->length,value);
142     is->length++;
143     return is;
144 }
145 
146 //把from开始直到最后intset最后的内容move到to开始的地方,在这之前,应该要resize一下
147 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
148     void *src, *dst;
149     uint32_t bytes = is->length-from;
150     if (is->encoding == INTSET_ENC_INT64) {
151         src = (int64_t*)is->contents+from;
152         dst = (int64_t*)is->contents+to;
153         bytes *= sizeof(int64_t);
154     } else if (is->encoding == INTSET_ENC_INT32) {
155         src = (int32_t*)is->contents+from;
156         dst = (int32_t*)is->contents+to;
157         bytes *= sizeof(int32_t);
158     } else {
159         src = (int16_t*)is->contents+from;
160         dst = (int16_t*)is->contents+to;
161         bytes *= sizeof(int16_t);
162     }
163     memmove(dst,src,bytes);
164 }
165 
166 //在intset中插入一个元素,如果返回0,表示已经存在,否则返回1
167 /* Insert an integer in the intset */
168 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
169     uint8_t valenc = _intsetValueEncoding(value);
170     uint32_t pos;
171     if (success) *success = 1;
172 
173     /* Upgrade encoding if necessary. If we need to upgrade, we know that
174      * this value should be either appended (if > 0) or prepended (if < 0),
175      * because it lies outside the range of existing values. */
176     if (valenc > is->encoding) {
177         /* This always succeeds, so we don't need to curry *success. */
178         return intsetUpgradeAndAdd(is,value);
179     } else {
180         /* Abort if the value is already present in the set.
181          * This call will populate "pos" with the right position to insert
182          * the value when it cannot be found. */
183         if (intsetSearch(is,value,&pos)) {
184             if (success) *success = 0;
185             return is;
186         }
187 
188         is = intsetResize(is,is->length+1);
189         if (pos < is->length) intsetMoveTail(is,pos,pos+1);
190     }
191 
192     _intsetSet(is,pos,value);
193     is->length++;
194     return is;
195 }
196 
197 //在intset中删除一个元素value
198 /* Delete integer from intset */
199 intset *intsetRemove(intset *is, int64_t value, int *success) {
200     uint8_t valenc = _intsetValueEncoding(value);
201     uint32_t pos;
202     if (success) *success = 0;
203 
204     if (valenc <= is->encoding && intsetSearch(is,value,&pos)) {
205         /* We know we can delete */
206         if (success) *success = 1;
207 
208         /* Overwrite value with tail and update length */
209         if (pos < (is->length-1)) intsetMoveTail(is,pos+1,pos);
210         is = intsetResize(is,is->length-1);
211         is->length--;
212     }
213     return is;
214 }
215 
216 //判断value是否在is中
217 /* Determine whether a value belongs to this set */
218 uint8_t intsetFind(intset *is, int64_t value) {
219     uint8_t valenc = _intsetValueEncoding(value);
220     return valenc <= is->encoding && intsetSearch(is,value,NULL);
221 }
222 
223 //随机取一个intset中的元素
224 /* Return random member */
225 int64_t intsetRandom(intset *is) {
226     return _intsetGet(is,rand()%is->length);
227 }
228 
229 //把pos位置上的元素取出,如果超出位置,返回0,找到了则返回1,*value为其值
230 /* Sets the value to the value at the given position. When this position is
231  * out of range the function returns 0, when in range it returns 1. */
232 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
233     if (pos < is->length) {
234         *value = _intsetGet(is,pos);
235         return 1;
236     }
237     return 0;
238 }
239 
240 //得到intset的元素数量
241 /* Return intset length */
242 uint32_t intsetLen(intset *is) {
243     return is->length;
244 }
245 
246 #ifdef INTSET_TEST_MAIN
247 #include <sys/time.h>
248 
249 void intsetRepr(intset *is) {
250     int i;
251     for (i = 0; i < is->length; i++) {
252         printf("%lld\n", (uint64_t)_intsetGet(is,i));
253     }
254     printf("\n");
255 }
256 
257 void error(char *err) {
258     printf("%s\n", err);
259     exit(1);
260 }
261 
262 void ok(void) {
263     printf("OK\n");
264 }
265 
266 long long usec(void) {
267     struct timeval tv;
268     gettimeofday(&tv,NULL);
269     return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
270 }
271 
272 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
273 void _assert(char *estr, char *file, int line) {
274     printf("\n\n=== ASSERTION FAILED ===\n");
275     printf("==> %s:%d '%s' is not true\n",file,line,estr);
276 }
277 
278 intset *createSet(int bits, int size) {
279     uint64_t mask = (1<<bits)-1;
280     uint64_t i, value;
281     intset *is = intsetNew();
282 
283     for (i = 0; i < size; i++) {
284         if (bits > 32) {
285             value = (rand()*rand()) & mask;
286         } else {
287             value = rand() & mask;
288         }
289         is = intsetAdd(is,value,NULL);
290     }
291     return is;
292 }
293 
294 void checkConsistency(intset *is) {
295     int i;
296 
297     for (i = 0; i < (is->length-1); i++) {
298         if (is->encoding == INTSET_ENC_INT16) {
299             int16_t *i16 = (int16_t*)is->contents;
300             assert(i16[i] < i16[i+1]);
301         } else if (is->encoding == INTSET_ENC_INT32) {
302             int32_t *i32 = (int32_t*)is->contents;
303             assert(i32[i] < i32[i+1]);
304         } else {
305             int64_t *i64 = (int64_t*)is->contents;
306             assert(i64[i] < i64[i+1]);
307         }
308     }
309 }
310 
311 int main(int argc, char **argv) {
312     uint8_t success;
313     int i;
314     intset *is;
315     sranddev();
316 
317     printf("Value encodings: "); {
318         assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
319         assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
320         assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
321         assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
322         assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
323         assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
324         assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
325         assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
326         assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
327         assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
328         ok();
329     }
330 
331     printf("Basic adding: "); {
332         is = intsetNew();
333         is = intsetAdd(is,5,&success); assert(success);
334         is = intsetAdd(is,6,&success); assert(success);
335         is = intsetAdd(is,4,&success); assert(success);
336         is = intsetAdd(is,4,&success); assert(!success);
337         ok();
338     }
339 
340     printf("Large number of random adds: "); {
341         int inserts = 0;
342         is = intsetNew();
343         for (i = 0; i < 1024; i++) {
344             is = intsetAdd(is,rand()%0x800,&success);
345             if (success) inserts++;
346         }
347         assert(is->length == inserts);
348         checkConsistency(is);
349         ok();
350     }
351 
352     printf("Upgrade from int16 to int32: "); {
353         is = intsetNew();
354         is = intsetAdd(is,32,NULL);
355         assert(is->encoding == INTSET_ENC_INT16);
356         is = intsetAdd(is,65535,NULL);
357         assert(is->encoding == INTSET_ENC_INT32);
358         assert(intsetFind(is,32));
359         assert(intsetFind(is,65535));
360         checkConsistency(is);
361 
362         is = intsetNew();
363         is = intsetAdd(is,32,NULL);
364         assert(is->encoding == INTSET_ENC_INT16);
365         is = intsetAdd(is,-65535,NULL);
366         assert(is->encoding == INTSET_ENC_INT32);
367         assert(intsetFind(is,32));
368         assert(intsetFind(is,-65535));
369         checkConsistency(is);
370         ok();
371     }
372 
373     printf("Upgrade from int16 to int64: "); {
374         is = intsetNew();
375         is = intsetAdd(is,32,NULL);
376         assert(is->encoding == INTSET_ENC_INT16);
377         is = intsetAdd(is,4294967295,NULL);
378         assert(is->encoding == INTSET_ENC_INT64);
379         assert(intsetFind(is,32));
380         assert(intsetFind(is,4294967295));
381         checkConsistency(is);
382 
383         is = intsetNew();
384         is = intsetAdd(is,32,NULL);
385         assert(is->encoding == INTSET_ENC_INT16);
386         is = intsetAdd(is,-4294967295,NULL);
387         assert(is->encoding == INTSET_ENC_INT64);
388         assert(intsetFind(is,32));
389         assert(intsetFind(is,-4294967295));
390         checkConsistency(is);
391         ok();
392     }
393 
394     printf("Upgrade from int32 to int64: "); {
395         is = intsetNew();
396         is = intsetAdd(is,65535,NULL);
397         assert(is->encoding == INTSET_ENC_INT32);
398         is = intsetAdd(is,4294967295,NULL);
399         assert(is->encoding == INTSET_ENC_INT64);
400         assert(intsetFind(is,65535));
401         assert(intsetFind(is,4294967295));
402         checkConsistency(is);
403 
404         is = intsetNew();
405         is = intsetAdd(is,65535,NULL);
406         assert(is->encoding == INTSET_ENC_INT32);
407         is = intsetAdd(is,-4294967295,NULL);
408         assert(is->encoding == INTSET_ENC_INT64);
409         assert(intsetFind(is,65535));
410         assert(intsetFind(is,-4294967295));
411         checkConsistency(is);
412         ok();
413     }
414 
415     printf("Stress lookups: "); {
416         long num = 100000, size = 10000;
417         int i, bits = 20;
418         long long start;
419         is = createSet(bits,size);
420         checkConsistency(is);
421 
422         start = usec();
423         for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
424         printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
425     }
426 
427     printf("Stress add+delete: "); {
428         int i, v1, v2;
429         is = intsetNew();
430         for (i = 0; i < 0xffff; i++) {
431             v1 = rand() % 0xfff;
432             is = intsetAdd(is,v1,NULL);
433             assert(intsetFind(is,v1));
434 
435             v2 = rand() % 0xfff;
436             is = intsetRemove(is,v2,NULL);
437             assert(!intsetFind(is,v2));
438         }
439         checkConsistency(is);
440         ok();
441     }
442 }
443 #endif

 

 

posted @ 2012-09-04 11:43  ~嘉言懿行~~我是煲仔饭~~  阅读(580)  评论(0编辑  收藏  举报