Redis源码笔记六: ziplist(进度四分之三)

 1 /*
 2  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
 3  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 4  * All rights reserved.
 5  *
 6  * Redistribution and use in source and binary forms, with or without
 7  * modification, are permitted provided that the following conditions are met:
 8  *
 9  *   * Redistributions of source code must retain the above copyright notice,
10  *     this list of conditions and the following disclaimer.
11  *   * Redistributions in binary form must reproduce the above copyright
12  *     notice, this list of conditions and the following disclaimer in the
13  *     documentation and/or other materials provided with the distribution.
14  *   * Neither the name of Redis nor the names of its contributors may be used
15  *     to endorse or promote products derived from this software without
16  *     specific prior written permission.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  */
30 
31 #define ZIPLIST_HEAD 0
32 #define ZIPLIST_TAIL 1
33 
34 /* 创建一个新的ziplist */
35 unsigned char *ziplistNew(void);
36 
37 /* 将一个包含给定值的新节点推入ziplist的表头或者表尾 */
38 unsigned char *ziplistPush(unsigned char *zl, unsigned char *s, unsigned int slen, int where);
39 
40 unsigned char *ziplistIndex(unsigned char *zl, int index);
41 unsigned char *ziplistNext(unsigned char *zl, unsigned char *p);
42 unsigned char *ziplistPrev(unsigned char *zl, unsigned char *p);
43 unsigned int ziplistGet(unsigned char *p, unsigned char **sval, unsigned int *slen, long long *lval);
44 
45 /* 将一个包含给定值的新节点插入到给定地址 */
46 unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen);
47 
48 /* 删除给定地址上的节点 */
49 unsigned char *ziplistDelete(unsigned char *zl, unsigned char **p);
50 
51 /* 在给定索引上,连续进行多次删除 */
52 unsigned char *ziplistDeleteRange(unsigned char *zl, unsigned int index, unsigned int num);
53 
54 unsigned int ziplistCompare(unsigned char *p, unsigned char *s, unsigned int slen);
55 
56 /* 在ziplist中查找并返回给定值的节点 */
57 unsigned char *ziplistFind(unsigned char *p, unsigned char *vstr, unsigned int vlen, unsigned int skip);
58 
59 /* 返回ziplist保存的节点数量 */
60 unsigned int ziplistLen(unsigned char *zl);
61 
62 /* 以字节为单位,返回ziplist占用的内存大小 */
63 size_t ziplistBlobLen(unsigned char *zl);

ziplist.h

  1 /* The ziplist is a specially encoded dually linked list that is designed
  2  * to be very memory efficient. It stores both strings and integer values,
  3  * where integers are encoded as actual integers instead of a series of
  4  * characters. It allows push and pop operations on either side of the list
  5  * in O(1) time. However, because every operation requires a reallocation of
  6  * the memory used by the ziplist, the actual complexity is related to the
  7  * amount of memory used by the ziplist.
  8  *
  9  * ----------------------------------------------------------------------------
 10  *
 11  * ZIPLIST OVERALL LAYOUT:
 12  * The general layout of the ziplist is as follows:
 13  * <zlbytes><zltail><zllen><entry><entry><zlend>
 14  *
 15  * <zlbytes> is an unsigned integer to hold the number of bytes that the
 16  * ziplist occupies. This value needs to be stored to be able to resize the
 17  * entire structure without the need to traverse it first.
 18  *
 19  * <zltail> is the offset to the last entry in the list. This allows a pop
 20  * operation on the far side of the list without the need for full traversal.
 21  *
 22  * <zllen> is the number of entries.When this value is larger than 2**16-2,
 23  * we need to traverse the entire list to know how many items it holds.
 24  *
 25  * <zlend> is a single byte special value, equal to 255, which indicates the
 26  * end of the list.
 27  *
 28  * ZIPLIST ENTRIES:
 29  * Every entry in the ziplist is prefixed by a header that contains two pieces
 30  * of information. First, the length of the previous entry is stored to be
 31  * able to traverse the list from back to front. Second, the encoding with an
 32  * optional string length of the entry itself is stored.
 33  *
 34  * The length of the previous entry is encoded in the following way:
 35  * If this length is smaller than 254 bytes, it will only consume a single
 36  * byte that takes the length as value. When the length is greater than or
 37  * equal to 254, it will consume 5 bytes. The first byte is set to 254 to
 38  * indicate a larger value is following. The remaining 4 bytes take the
 39  * length of the previous entry as value.
 40  *
 41  * The other header field of the entry itself depends on the contents of the
 42  * entry. When the entry is a string, the first 2 bits of this header will hold
 43  * the type of encoding used to store the length of the string, followed by the
 44  * actual length of the string. When the entry is an integer the first 2 bits
 45  * are both set to 1. The following 2 bits are used to specify what kind of
 46  * integer will be stored after this header. An overview of the different
 47  * types and encodings is as follows:
 48  *
 49  * |00pppppp| - 1 byte
 50  *      String value with length less than or equal to 63 bytes (6 bits).
 51  * |01pppppp|qqqqqqqq| - 2 bytes
 52  *      String value with length less than or equal to 16383 bytes (14 bits).
 53  * |10______|qqqqqqqq|rrrrrrrr|ssssssss|tttttttt| - 5 bytes
 54  *      String value with length greater than or equal to 16384 bytes.
 55  * |11000000| - 1 byte
 56  *      Integer encoded as int16_t (2 bytes).
 57  * |11010000| - 1 byte
 58  *      Integer encoded as int32_t (4 bytes).
 59  * |11100000| - 1 byte
 60  *      Integer encoded as int64_t (8 bytes).
 61  * |11110000| - 1 byte
 62  *      Integer encoded as 24 bit signed (3 bytes).
 63  * |11111110| - 1 byte
 64  *      Integer encoded as 8 bit signed (1 byte).
 65  * |1111xxxx| - (with xxxx between 0000 and 1101) immediate 4 bit integer.
 66  *      Unsigned integer from 0 to 12. The encoded value is actually from
 67  *      1 to 13 because 0000 and 1111 can not be used, so 1 should be
 68  *      subtracted from the encoded 4 bit value to obtain the right value.
 69  * |11111111| - End of ziplist.
 70  *
 71  * All the integers are represented in little endian byte order.
 72  *
 73  * ----------------------------------------------------------------------------
 74  *
 75  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
 76  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
 77  * All rights reserved.
 78  *
 79  * Redistribution and use in source and binary forms, with or without
 80  * modification, are permitted provided that the following conditions are met:
 81  *
 82  *   * Redistributions of source code must retain the above copyright notice,
 83  *     this list of conditions and the following disclaimer.
 84  *   * Redistributions in binary form must reproduce the above copyright
 85  *     notice, this list of conditions and the following disclaimer in the
 86  *     documentation and/or other materials provided with the distribution.
 87  *   * Neither the name of Redis nor the names of its contributors may be used
 88  *     to endorse or promote products derived from this software without
 89  *     specific prior written permission.
 90  *
 91  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 92  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 94  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 95  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 96  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 97  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 98  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 99  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
100  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
101  * POSSIBILITY OF SUCH DAMAGE.
102  */
103  
104  /*
105   * ziplist是为内存占用而特别优化的双链表。
106   * 它可以保存字符串和整数,其中整数以整数类型而不是字符串来进行编码和保存。
107   * 
108   * 对ziplist的两端进行push和pop的复杂度都为O(1),
109   * 不过,因为对ziplist的每次修改操作都需要进行内存重分配,
110   * 因此,实际的时间复杂度与ziplist使用的内存大小有关。
111  */
112 
113  /*  
114   * 以下时ziplist的内存结构:
115   * <zlbytes><zltail><zllen><entry><zlend>
116   *
117   * <zlbytes>是一个无符号整数(uint32_t),用于记录整个ziplist所占用的字节数量。通过保存整个值,可以在不遍历整个ziplist的前提下,对整个ziplist进行内存重分配。
118   * <zltail>是到列表中最后一个节点的偏移量(uint32_t)。有了这个偏移量,就可以在常数复杂内对表尾进行操作,而不必遍历整个列表。
119   * <zllen>是节点的数量,为(uint16_t),当这个值大于2**16-2时,需要遍历整个列表,才能计算出列表的长度。
120   * <zlend>是一个单字节的特殊值,等于255,它标识了列表的末端。
121  */
122 
123 /*
124  * ziplist中的每个节点,都带有一个header作为前缀。
125  * header包括两部分:
126  * 1) 前一个节点的长度,在从后往前遍历时使用;
127  * 2) 当前节点所保存的值的类型和长度
128 */
129 
130 /* 
131  * 前一个节点的长度的存储方式如下:
132  * 1) 如果节点的长度 <  254字节,那么直接用一个字节保存这个值;
133  * 2) 如果字节的长度 >= 254字节,那么将一个字节设置为254,再在之后用4个字节来表示节点的实际长度(共使用5个字节);
134 */
135 
136 /* 另一个header域保存的信息却绝于这个节点所保存的内容本身。
137  * 1) 当节点保存的是字符串时,header的前2位用于指示保存内容长度所使用的编码方式,之后跟着的是内容长度的值;
138  * 2) 当节点保存的是整数时,header的前 2 位都设置为 1, 之后的 2 位用于指示保存的整数值的类型(这个类型决定了内容所占用的空间);
139  *
140 */
141 #include <stdio.h>
142 #include <stdlib.h>
143 #include <string.h>
144 #include <stdint.h>
145 #include <assert.h>
146 #include <limits.h>
147 #include "zmalloc.h"
148 #include "util.h"
149 #include "ziplist.h"
150 #include "endianconv.h"
151 
152 #define ZIP_END 255
153 #define ZIP_BIGLEN 254
154 
155 /* Different encoding/length possibilities */
156 #define ZIP_STR_MASK 0xc0    /* 1100, 0000 */
157 #define ZIP_INT_MASK 0x30
158 #define ZIP_STR_06B (0 << 6)
159 #define ZIP_STR_14B (1 << 6)
160 #define ZIP_STR_32B (2 << 6)
161 #define ZIP_INT_16B (0xc0 | 0<<4)
162 #define ZIP_INT_32B (0xc0 | 1<<4)
163 #define ZIP_INT_64B (0xc0 | 2<<4)
164 #define ZIP_INT_24B (0xc0 | 3<<4)
165 #define ZIP_INT_8B 0xfe
166 /* 4 bit integer immediate encoding */
167 #define ZIP_INT_IMM_MASK 0x0f
168 #define ZIP_INT_IMM_MIN 0xf1    /* 11110001 */
169 #define ZIP_INT_IMM_MAX 0xfd    /* 11111101 */
170 #define ZIP_INT_IMM_VAL(v) (v & ZIP_INT_IMM_MASK)
171 
172 #define INT24_MAX 0x7fffff
173 #define INT24_MIN (-INT24_MAX - 1)
174 
175 /* Macro to determine type */
176 #define ZIP_IS_STR(enc) (((enc) & ZIP_STR_MASK) < ZIP_STR_MASK)
177 
178 /* Utility macros */
179 /* 用于取出 zl 各部分值的宏 */
180 
181 /* 取出列表以字节计算的列表长度(内存的0 - 31 位, 整数) */
182 #define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))
183 
184 /* 取出列表的表尾偏移量(内存的32 - 63 位, 整数) */
185 #define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
186 
187 /* 取出列表的长度(内存的 64 - 79 位, 整数) */
188 #define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))
189 
190 /* 列表的header长度 */
191 #define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))    /* 32 * 2 bit + 16 bit */
192 
193 /* 返回列表的header之后的位置 */
194 #define ZIPLIST_ENTRY_HEAD(zl)  ((zl)+ZIPLIST_HEADER_SIZE)
195 
196 /* 返回列表最后一个元素之后的位置 */
197 #define ZIPLIST_ENTRY_TAIL(zl)  ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))
198 
199 /* 返回列表的结束符之前的位置 */
200 #define ZIPLIST_ENTRY_END(zl)   ((zl)+intrev32ifbe(ZIPLIST_BYTES(zl))-1)
201 
202 /* We know a positive increment can only be 1 because entries can only be
203  * pushed one at a time. */
204 /*
205  * 对<zllen>增一,ZIPLIST_LENGTH(zl)的最大值为UINT16_MAX;
206 */
207 #define ZIPLIST_INCR_LENGTH(zl,incr) { \
208     if (ZIPLIST_LENGTH(zl) < UINT16_MAX) \
209         ZIPLIST_LENGTH(zl) = intrev16ifbe(intrev16ifbe(ZIPLIST_LENGTH(zl))+incr); \
210 }
211 
212 typedef struct zlentry {
213     /* 前一个节点长度的存储所占的字节数，上个节点占用的长度 */
214     unsigned int prevrawlensize, prevrawlen;
215 
216     /* 当前节点长度的存储所占的字节数, 当前节点占用的长度 */
217     unsigned int lensize, len;
218 
219     /* 当前节点的头部大小 */
220     unsigned int headersize;
221 
222     /* 当前链表节点长度(既字段len)使用的编码类型 */
223     unsigned char encoding;
224 
225     /* 指向当前节点位置的指针 */
226     unsigned char *p;
227 } zlentry; /* 节点 */
228 
229 /* Extract the encoding from the byte pointed by 'ptr' and set it into
230  * 'encoding'. */
231 /* 判断是否为字符串编码 */
232 #define ZIP_ENTRY_ENCODING(ptr, encoding) do {  \
233     (encoding) = (ptr[0]); \
234     if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
235 } while(0)
236 
237 /* Return bytes needed to store integer encoded by 'encoding' */
238 /* 返回encoding指定的整数编码方式所需的长度 */
239 static unsigned int zipIntSize(unsigned char encoding) {
240     switch(encoding) {
241     case ZIP_INT_8B:  return 1;
242     case ZIP_INT_16B: return 2;
243     case ZIP_INT_24B: return 3;
244     case ZIP_INT_32B: return 4;
245     case ZIP_INT_64B: return 8;
246     default: return 0; /* 4 bit immediate */
247     }
248     assert(NULL);
249     return 0;
250 }
251 
252 /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns
253  * the amount of bytes required to encode such a length. */
254 /* 编码长度 l , 并将它写入到 p。
255  *
256  * 如果 p 为 NULL, 那么返回编码 rawlen 所需的字节数;
257 */
258 static unsigned int zipEncodeLength(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
259     unsigned char len = 1, buf[5];
260 
261     if (ZIP_IS_STR(encoding)) {
262         /* Although encoding is given it may not be set for strings,
263          * so we determine it here using the raw length. */
264          /* 字符串编码 */
265         if (rawlen <= 0x3f) {
266         /* 11 1111, 长度 6 bit, 长度 + 编码 8 bit(1 byte) */
267             if (!p) return len;
268             buf[0] = ZIP_STR_06B | rawlen;
269         } else if (rawlen <= 0x3fff) {
270         /* 11 1111 1111 1111, 长度 14 bit, 长度 + 编码 16 bit(2 bytes) */
271             len += 1;
272             if (!p) return len;
273             buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
274             buf[1] = rawlen & 0xff;
275         } else {
276         /* 长度 32 bit(4 bytes), 长度 + 编码 40bit(5 bytes) */
277             len += 4;
278             if (!p) return len;
279             buf[0] = ZIP_STR_32B;
280             buf[1] = (rawlen >> 24) & 0xff;
281             buf[2] = (rawlen >> 16) & 0xff;
282             buf[3] = (rawlen >> 8) & 0xff;
283             buf[4] = rawlen & 0xff;
284         }
285     } else {
286         /* Implies integer encoding, so length is always 1. */
287         /* 编码为整数, 长度总为1 bytes */
288         if (!p) return len;
289         buf[0] = encoding;
290     }
291 
292     /* Store this length at p */
293     memcpy(p,buf,len);
294     return len;
295 }
296 
297 /* Decode the length encoded in 'ptr'. The 'encoding' variable will hold the
298  * entries encoding, the 'lensize' variable will hold the number of bytes
299  * required to encode the entries length, and the 'len' variable will hold the
300  * entries length. */
301 /*
302  * 从 ptr 指针中取出节点的编码, 保存节点长度存储所占的字节数, 以及节点占用的长度
303 */
304 #define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do {                    \
305     /* 取出节点的编码 */
306     ZIP_ENTRY_ENCODING((ptr), (encoding));                                     \
307     if ((encoding) < ZIP_STR_MASK) {                                           \
308         if ((encoding) == ZIP_STR_06B) {                                       \
309             (lensize) = 1;                                                     \
310             (len) = (ptr)[0] & 0x3f;                                           \
311         } else if ((encoding) == ZIP_STR_14B) {                                \
312             (lensize) = 2;                                                     \
313             (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1];                       \
314         } else if (encoding == ZIP_STR_32B) {                                  \
315             (lensize) = 5;                                                     \
316             (len) = ((ptr)[1] << 24) |                                         \
317                     ((ptr)[2] << 16) |                                         \
318                     ((ptr)[3] <<  8) |                                         \
319                     ((ptr)[4]);                                                \
320         } else {                                                               \
321             assert(NULL);                                                      \
322         }                                                                      \
323     } else {
324     \
325     /* 将诶点保存的是整数, 取出编码 */
326         (lensize) = 1;                                                         \
327         (len) = zipIntSize(encoding);                                          \
328     }                                                                          \
329 } while(0);
330 
331 /* Encode the length of the previous entry and write it to "p". Return the
332  * number of bytes needed to encode this length if "p" is NULL. */
333 /*
334  * 编码前置节点的长度, 并将它写入 p 。
335  *
336  * 如果 p 为 NULL, 那么返回编码 len 所需的字节数;
337 */
338 static unsigned int zipPrevEncodeLength(unsigned char *p, unsigned int len) {
339     if (p == NULL) {
340         return (len < ZIP_BIGLEN) ? 1 : sizeof(len)+1;
341     } else {
342         if (len < ZIP_BIGLEN) {
343         /* 如果前一个节点的长度小于ZIP_BIGLEN(254)时,那么只使用一个字节保存它的值 */
344             p[0] = len;
345             return 1;
346         } else {
347         /* 如果前一个字节的长度大于或等于ZIP_BIGLEN(254)时,那么将第一个字节的值设为254,然后用接下来的4个字节保存实际长度 */
348             p[0] = ZIP_BIGLEN;
349             memcpy(p+1,&len,sizeof(len));
350             memrev32ifbe(p+1);
351             return 1+sizeof(len);
352         }
353     }
354 }
355 
356 /* Encode the length of the previous entry and write it to "p". This only
357  * uses the larger encoding (required in __ziplistCascadeUpdate). */
358 /*
359  * 将前驱节点的长度 len 写入到 p中
360  * 其中 p 的空间比保存 len 所需的实际空间要更大,这适用于将一个比原节点更短的新节点插入到某个节点时使用
361  * 具体参考 __ziplistCascadeUpdate 函数的头注释
362 */
363 static void zipPrevEncodeLengthForceLarge(unsigned char *p, unsigned int len) {
364     if (p == NULL) return;
365     p[0] = ZIP_BIGLEN;
366     memcpy(p+1,&len,sizeof(len));
367     memrev32ifbe(p+1);
368 }
369 
370 /* Decode the number of bytes required to store the length of the previous
371  * element, from the perspective of the entry pointed to by 'ptr'. */
372 /* 从指针 ptr 中取出保存前一节点的长度所需的字节数
373  * 返回值: unsigned int
374 */
375 #define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
376     if ((ptr)[0] < ZIP_BIGLEN) {                                               \
377         (prevlensize) = 1;                                                     \
378     } else {                                                                   \
379         (prevlensize) = 5;                                                     \
380     }                                                                          \
381 } while(0);
382 
383 /* Decode the length of the previous element, from the perspective of the entry
384  * pointed to by 'ptr'. */
385 /* 从指针 ptr 中取出前一个节点的长度 
386  * 返回值: unsigned int
387 */
388 #define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
389     /* 取得保存前一个节点的长度所需的字节数 */
390     ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
391     /* 获取长度值 */
392     if ((prevlensize) == 1) {                                                  \
393         (prevlen) = (ptr)[0];                                                  \
394     } else if ((prevlensize) == 5) {                                           \
395         assert(sizeof((prevlensize)) == 4);                                    \
396         memcpy(&(prevlen), ((char*)(ptr)) + 1, 4);                             \
397         memrev32ifbe(&prevlen);                                                \
398     }                                                                          \
399 } while(0);
400 
401 /* Return the difference in number of bytes needed to store the length of the
402  * previous element 'len', in the entry pointed to by 'p'. */
403 /* 返回编码 len 所需的长度减去编码 p 的前一个节点的大小所需的长度之差 */
404 static int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
405     /* 获取编码前一节点所需的长度 */
406     unsigned int prevlensize;
407     ZIP_DECODE_PREVLENSIZE(p, prevlensize);
408 
409     /* 计算差 */
410     return zipPrevEncodeLength(NULL, len) - prevlensize;
411 }
412 
413 /* Return the total number of bytes used by the entry pointed to by 'p'. */
414 /* 返回 p 指向的节点的空间总长度 */
415 static unsigned int zipRawEntryLength(unsigned char *p) {
416     unsigned int prevlensize, encoding, lensize, len;
417 
418     /* 保存前驱节点长度的空间长度 */
419     ZIP_DECODE_PREVLENSIZE(p, prevlensize);
420 
421     /* 保存本节点的空间长度 */
422     ZIP_DECODE_LENGTH(p + prevlensize, encoding, lensize, len);
423 
424     return prevlensize + lensize + len;
425 }
426 
427 /* Check if string pointed to by 'entry' can be encoded as an integer.
428  * Stores the integer value in 'v' and its encoding in 'encoding'. */
429 /* 检查 entry 所保存的值, 看它能否编码为整数 */
430 /* 复杂度: O(N), N为 entry 所保存字符串值的长度 */
431 /* 返回值: 如果可以的话, 返回 1 , 并将值保存在 v,将编码保存在encoding;否则返回0; */
432 static int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
433     long long value;
434 
435     if (entrylen >= 32 || entrylen == 0) return 0;
436     /* 尝试转换为整数 */
437     if (string2ll((char*)entry,entrylen,&value)) {
438         /* Great, the string can be encoded. Check what's the smallest
439          * of our encoding types that can hold this value. */
440          /* 选择整数编码 */
441         if (value >= 0 && value <= 12) {
442             *encoding = ZIP_INT_IMM_MIN+value;
443         } else if (value >= INT8_MIN && value <= INT8_MAX) {
444             *encoding = ZIP_INT_8B;
445         } else if (value >= INT16_MIN && value <= INT16_MAX) {
446             *encoding = ZIP_INT_16B;
447         } else if (value >= INT24_MIN && value <= INT24_MAX) {
448             *encoding = ZIP_INT_24B;
449         } else if (value >= INT32_MIN && value <= INT32_MAX) {
450             *encoding = ZIP_INT_32B;
451         } else {
452             *encoding = ZIP_INT_64B;
453         }
454         *v = value;
455         return 1;
456     }
457     return 0;
458 }
459 
460 /* Store integer 'value' at 'p', encoded as 'encoding' */
461 /* 将 value 保存到 p , 并设置编码为 encoding */
462 static void zipSaveInteger(unsigned char *p, int64_t value, unsigned char encoding) {
463     int16_t i16;
464     int32_t i32;
465     int64_t i64;
466     /* 8 bit 整数 */
467     if (encoding == ZIP_INT_8B) {
468         ((int8_t*)p)[0] = (int8_t)value;
469     /* 16 bit 整数 */
470     } else if (encoding == ZIP_INT_16B) {
471         i16 = value;
472         memcpy(p,&i16,sizeof(i16));
473         memrev16ifbe(p);
474     /* 24 bit 整数 */
475     } else if (encoding == ZIP_INT_24B) {
476         i32 = value<<8;
477         memrev32ifbe(&i32);
478         memcpy(p,((uint8_t*)&i32)+1,sizeof(i32)-sizeof(uint8_t));
479     /* 32 bit 整数 */
480     } else if (encoding == ZIP_INT_32B) {
481         i32 = value;
482         memcpy(p,&i32,sizeof(i32));
483         memrev32ifbe(p);
484     /* 64 bit 整数 */
485     } else if (encoding == ZIP_INT_64B) {
486         i64 = value;
487         memcpy(p,&i64,sizeof(i64));
488         memrev64ifbe(p);
489     /* 值和编码保存在同一个 byte */
490     } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
491         /* Nothing to do, the value is stored in the encoding itself. */
492     } else {
493         assert(NULL);
494     }
495 }
496 
497 /* Read integer encoded as 'encoding' from 'p' */
498 /* 根据 encoding, 从指针 p 中取出整数值 */
499 static int64_t zipLoadInteger(unsigned char *p, unsigned char encoding) {
500     int16_t i16;
501     int32_t i32;
502     int64_t i64, ret = 0;
503 
504     /* 8 bit */
505     if (encoding == ZIP_INT_8B) {
506         ret = ((int8_t*)p)[0];
507 
508     /* 16 bit */
509     } else if (encoding == ZIP_INT_16B) {
510         memcpy(&i16,p,sizeof(i16));
511         memrev16ifbe(&i16);
512         ret = i16;
513 
514     /* 32 bit */
515     } else if (encoding == ZIP_INT_32B) {
516         memcpy(&i32,p,sizeof(i32));
517         memrev32ifbe(&i32);
518         ret = i32;
519 
520     /* 24 bit */
521     } else if (encoding == ZIP_INT_24B) {
522         i32 = 0;
523         memcpy(((uint8_t*)&i32)+1,p,sizeof(i32)-sizeof(uint8_t));
524         memrev32ifbe(&i32);
525         ret = i32>>8;
526 
527     /* 64 bit */
528     } else if (encoding == ZIP_INT_64B) {
529         memcpy(&i64,p,sizeof(i64));
530         memrev64ifbe(&i64);
531         ret = i64;
532 
533     /* 值和编码保存在同一个 byte */
534     } else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX) {
535         ret = (encoding & ZIP_INT_IMM_MASK)-1;
536     } else {
537         assert(NULL);
538     }
539     return ret;
540 }
541 
542 /* Return a struct with all information about an entry. */
543 /* 从指针 p 中提取出节点的各个属性, 并将属性保存到 zlentry结构, 然后返回 */
544 static zlentry zipEntry(unsigned char *p) {
545     zlentry e;
546     
547     /* 取出前一个节点的长度 */
548     ZIP_DECODE_PREVLEN(p, e.prevrawlensize, e.prevrawlen);
549 
550     /* 取出当前节点的编码, 保存节点的长度所需的长度, 以及节点的长度 */
551     ZIP_DECODE_LENGTH(p + e.prevrawlensize, e.encoding, e.lensize, e.len);
552 
553     /* 记录 header 的长度 */
554     e.headersize = e.prevrawlensize + e.lensize;
555 
556     /* 记录指针 p */
557     e.p = p;
558 
559     return e;
560 }
561 
562 /* Create a new empty ziplist. */
563 /* 新创建一个新的 ziplist
564  * 返回值: 新创建的 ziplist
565 */
566 unsigned char *ziplistNew(void) {
567     /* 分配 2 个 32 bit, 一个 16 bit, 以及一个 8 bit,分别用于 <zlbytes><zltail><zllen><zlend> */
568     unsigned int bytes = ZIPLIST_HEADER_SIZE+1;
569     unsigned char *zl = zmalloc(bytes);
570 
571     /* 设置长度 */
572     ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
573 
574     /* 设置表尾偏移量 */
575     ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
576 
577     /* 设置列表项数量 */
578     ZIPLIST_LENGTH(zl) = 0;
579 
580     /* 设置表尾标识 */
581     zl[bytes-1] = ZIP_END;
582 
583     return zl;
584 }
585 
586 /* Resize the ziplist. */
587 /* 对 zl 进行空间重分配, 并更新相关属性 */
588 /* 返回值: 更新后的ziplist */
589 static unsigned char *ziplistResize(unsigned char *zl, unsigned int len) {
590     /* 重新分配空间  */
591     zl = zrealloc(zl,len);
592 
593     /* 更新长度 */
594     ZIPLIST_BYTES(zl) = intrev32ifbe(len);
595 
596     /* 设置表尾 */
597     zl[len-1] = ZIP_END;
598 
599     return zl;
600 }
601 
602 /* When an entry is inserted, we need to set the prevlen field of the next
603  * entry to equal the length of the inserted entry. It can occur that this
604  * length cannot be encoded in 1 byte and the next entry needs to be grow
605  * a bit larger to hold the 5-byte encoded prevlen. This can be done for free,
606  * because this only happens when an entry is already being inserted (which
607  * causes a realloc and memmove). However, encoding the prevlen may require
608  * that this entry is grown as well. This effect may cascade throughout
609  * the ziplist when there are consecutive entries with a size close to
610  * ZIP_BIGLEN, so we need to check that the prevlen can be encoded in every
611  * consecutive entry.
612  *
613  * Note that this effect can also happen in reverse, where the bytes required
614  * to encode the prevlen field can shrink. This effect is deliberately ignored,
615  * because it can cause a "flapping" effect where a chain prevlen fields is
616  * first grown and then shrunk again after consecutive inserts. Rather, the
617  * field is allowed to stay larger than necessary, because a large prevlen
618  * field implies the ziplist is holding large entries anyway.
619  *
620  * The pointer "p" points to the first entry that does NOT need to be
621  * updated, i.e. consecutive fields MAY need an update. */
622 /* 当将一个新节点添加到某个节点之前的时候,
623  * 如果原节点的 prevlen 不足以保存新节点的长度,
624  * 那么就需要对原节点的空间进行扩展(从 1 字节扩展到 5 字节)
625  *
626  * 但是, 当对原节点进行扩展之后, 原节点的下一个节点的prevlen可能出现空间不足,
627  * 这种情况在多个连续节点的长度都接近ZIP_BIGLEN时可能发生。
628  *
629  * 这个函数就用于处理这种连续扩展动作。
630  *
631  * 因为节点的长度变小而引起的连续缩小也是可能出现的,
632  * 不过, 为了避免扩展-缩小-扩展-缩小这样的情况反复出现(flapping,抖动)
633  * 我们不处理这种情况, 而是任由 prevlen 比所需的长度更长;
634  *
635  * 返回值:更新后的ziplist
636 */
637 static unsigned char *__ziplistCascadeUpdate(unsigned char *zl, unsigned char *p) {
638     size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), rawlen, rawlensize;
639     size_t offset, noffset, extra;
640     unsigned char *np;
641     zlentry cur, next;
642     
643     /* 一直更新直到表尾 */
644     while (p[0] != ZIP_END) {
645         /* 当前节点 */
646         cur = zipEntry(p);
647 
648         /* 当前节点的长度 */
649         rawlen = cur.headersize + cur.len;
650 
651         /* 编码当前节点的长度所需的空间大小 */
652         rawlensize = zipPrevEncodeLength(NULL,rawlen);
653 
654         /* Abort if there is no next entry. */
655         /* 已经到达表尾, 推出 */
656         if (p[rawlen] == ZIP_END) break;
657 
658         /* 取出下一节点 */
659         next = zipEntry(p+rawlen);
660 
661         /* Abort when "prevlen" has not changed. */
662         /* 如果下一个的prevlen 等于当前节点的rawlen, 那么说明编码大小无需改变,退出 */
663         if (next.prevrawlen == rawlen) break;
664     
665         /* 下一节点的长度编码空间不足, 进行扩展 */
666         if (next.prevrawlensize < rawlensize) {
667             /* The "prevlen" field of "next" needs more bytes to hold
668              * the raw length of "cur". */
669             
670             offset = p-zl;
671 
672             /* 需要多添加的长度 */
673             extra = rawlensize-next.prevrawlensize;
674 
675             /* 重分配 */
676             zl = ziplistResize(zl,curlen+extra);
677             p = zl+offset;
678 
679             /* Current pointer and offset for next element. */
680             np = p+rawlen;
681             noffset = np-zl;
682 
683             /* Update tail offset when next element is not the tail element. */
684             if ((zl+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))) != np) {
685                 ZIPLIST_TAIL_OFFSET(zl) =
686                     intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+extra);
687             }
688 
689             /* Move the tail to the back. */
690             /* 为获得空间而进行数据移动 */
691             memmove(np+rawlensize,
692                 np+next.prevrawlensize,
693                 curlen-noffset-next.prevrawlensize-1);
694             zipPrevEncodeLength(np,rawlen);
695 
696             /* Advance the cursor */
697             p += rawlen;
698             curlen += extra;
699         } else {
700             /* 下一节点的长度编码空间有多余, 不进行收缩,只是将被编码的长度写入空间 */
701             if (next.prevrawlensize > rawlensize) {
702                 /* This would result in shrinking, which we want to avoid.
703                  * So, set "rawlen" in the available bytes. */
704                 zipPrevEncodeLengthForceLarge(p+rawlen,rawlen);
705             } else {
706                 zipPrevEncodeLength(p+rawlen,rawlen);
707             }
708             
709             // next.prevrawlensize == rawlensize
710             /* Stop here, as the raw length of "next" has not changed. */
711             break;
712         }
713     }
714     return zl;
715 }
716 
717 /* Delete "num" entries, starting at "p". Returns pointer to the ziplist. */
718 /* 从指针 p 开始, 删除 num 个节点 */
719 /* 返回值: 删除元素后的 ziplist */
720 static unsigned char *__ziplistDelete(unsigned char *zl, unsigned char *p, unsigned int num) {
721     unsigned int i, totlen, deleted = 0;
722     size_t offset;
723     int nextdiff = 0;
724     zlentry first, tail;
725 
726     /* 首个节点 */
727     first = zipEntry(p);
728 
729     /* 累积起所有删除目标(字节)的编码长度,并移动指针p */
730     for (i = 0; p[0] != ZIP_END && i < num; i++) {
731         p += zipRawEntryLength(p);
732         deleted++;
733     }
734     
735     /* 被删除的节点的 byte 总和 */
736     totlen = p-first.p;
737     if (totlen > 0) {
738         if (p[0] != ZIP_END) {
739             /* Storing `prevrawlen` in this entry may increase or decrease the
740              * number of bytes required compare to the current `prevrawlen`.
741              * There always is room to store this, because it was previously
742              * stored by an entry that is now being deleted. */
743             /* 更新最后一个被删除的节点之后的一个节点,
744              * 将它的 prevlen 值设置为 first.prevrawlen,
745              * 也即是被删除的第一个节点前的前一个节点的长度
746             */
747             nextdiff = zipPrevLenByteDiff(p,first.prevrawlen);
748             p -= nextdiff;
749             zipPrevEncodeLength(p,first.prevrawlen);
750 
751             /* Update offset for tail */
752             /* 更新 ziplist到表尾的偏移量 */
753             ZIPLIST_TAIL_OFFSET(zl) =
754                 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))-totlen);
755 
756             /* When the tail contains more than one entry, we need to take
757              * "nextdiff" in account as well. Otherwise, a change in the
758              * size of prevlen doesn't have an effect on the *tail* offset. */
759             /* 更新 ziplist的偏移量, 如果有需要的话, 算上nextdiff */
760             tail = zipEntry(p);
761             if (p[tail.headersize+tail.len] != ZIP_END) {
762                 ZIPLIST_TAIL_OFFSET(zl) =
763                    intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
764             }
765 
766             /* Move tail to the front of the ziplist */
767             /* 前移内存中的数据, 覆盖原本的被删除数据 */
768             memmove(first.p,p,
769                 intrev32ifbe(ZIPLIST_BYTES(zl))-(p-zl)-1);
770         } else {
771             /* The entire tail was deleted. No need to move memory. */
772             /* 被删除的是尾节点, 无需内存移动, 直接更新偏移量就可以了 */
773             ZIPLIST_TAIL_OFFSET(zl) =
774                 intrev32ifbe((first.p-zl)-first.prevrawlen);
775         }
776 
777         /* Resize and update length */
778         /* 调整大小, 并更新 ziplist 的长度 */
779         offset = first.p-zl;
780         zl = ziplistResize(zl, intrev32ifbe(ZIPLIST_BYTES(zl))-totlen+nextdiff);
781         ZIPLIST_INCR_LENGTH(zl,-deleted);
782         p = zl+offset;
783 
784         /* When nextdiff != 0, the raw length of the next entry has changed, so
785          * we need to cascade the update throughout the ziplist */
786         /* 层级更新 */
787         if (nextdiff != 0)
788             zl = __ziplistCascadeUpdate(zl,p);
789     }
790     return zl;
791 }
792 
793 /* Insert item at "p". */
794 /* 添加保存给定元素 s 的新节点到地址 p  */
795 /* 返回值: 删除元素后的 ziplist */
796 static unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
797     size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, prevlen = 0;
798     size_t offset;
799     int nextdiff = 0;
800     unsigned char encoding = 0;
801     long long value = 123456789; /* initialized to avoid warning. Using a value
802                                     that is easy to see if for some reason
803                                     we use it uninitialized. */
804     zlentry entry, tail;
805 
806     /* Find out prevlen for the entry that is inserted. */
807     /* 如果 p 之后不是没有节点(不是插入到末端), 那么取出节点相关资料, 以及prevlen */
808     if (p[0] != ZIP_END) {
809         entry = zipEntry(p);
810         prevlen = entry.prevrawlen;
811     } else {
812         /* 获取列表最后一个节点(表尾)的地址 */
813         unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
814 
815         /* 如果地址之后不是末端(也即是, 列表至少有一个节点) */
816         if (ptail[0] != ZIP_END) {
817             /* 保存 ptail 指向的节点的空间长度 */
818             prevlen = zipRawEntryLength(ptail);
819         }
820     }
821 
822     /* See if the entry can be encoded */
823     /* 查看能否将新值保存为整数 
824      * 如果可以的话返回1,并将新值保存到 value , 编码形式保存到 encoding
825     */
826     if (zipTryEncoding(s,slen,&value,&encoding)) {
827         /* 'encoding' is set to the appropriate integer encoding */
828         /* s 可以保存为整数, 那么继续计算保存它所需的空间 */
829         reqlen = zipIntSize(encoding);
830     } else {
831         /* 'encoding' is untouched, however zipEncodeLength will use the
832          * string length to figure out how to encode it. */
833         /* 不能保存为整数, 直接使用字符串长度 */
834         reqlen = slen;
835     }
836     /* We need space for both the length of the previous entry and
837      * the length of the payload. */
838     /* 计算编码 prevlen 所需的长度 */
839     reqlen += zipPrevEncodeLength(NULL,prevlen);
840     /* 计算编码 slen 所需的长度 */
841     reqlen += zipEncodeLength(NULL,encoding,slen);
842 
843     /* When the insert position is not equal to the tail, we need to
844      * make sure that the next entry can hold this entry's length in
845      * its prevlen field. */
846     /* 如果添加的位置不是表尾, 那么必须确定后继节点的 prevlen 空间 */
847     /* 足以保存新节点的编码长度 */
848     /*
849      * zipPrevLenByteDiff 的返回值有三种可能:
850      * 1) 新旧两个节点的编码长度相等, 返回 0;
851      * 2) 新节点编码长度 > 旧节点编码长度, 返回 5 - 1 = 4;
852      * 3) 旧节点编码长度 > 新节点编码长度, 返回 1 - 5 = -4;
853     */
854     nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
855 
856     /* Store offset because a realloc may change the address of zl. */
857     /* 保存偏移量, 因为重分配空间有可能改变 zl 的内存地址 */
858     offset = p-zl;
859 
860     /* 重分配空间, 并更新长度属性和表尾 */
861     /* 新空间长度 = 现有长度 + 新节点所需长度 + 编码新节点长度所需的长度差*/
862     zl = ziplistResize(zl,curlen+reqlen+nextdiff);
863     /* 更新 p 的指针 */
864     p = zl+offset;
865 
866     /* Apply memory move when necessary and update tail offset. */
867     /* 如果新节点不是添加到列表末端, 那么它后面就有其它节点,因此,我们需要移动这部分节点 */
868     if (p[0] != ZIP_END) {
869         /* Subtract one because of the ZIP_END bytes */
870         /* 向右移动原有数据, 为新节点让出空间 */
871         memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);
872 
873         /* Encode this entry's raw length in the next entry. */
874         /* 将本节点的长度编码至下一节点 */
875         zipPrevEncodeLength(p+reqlen,reqlen);
876 
877         /* Update offset for tail */
878         /* 更新 ziplist 的表尾偏移量 */
879         ZIPLIST_TAIL_OFFSET(zl) =
880             intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);
881 
882         /* When the tail contains more than one entry, we need to take
883          * "nextdiff" in account as well. Otherwise, a change in the
884          * size of prevlen doesn't have an effect on the *tail* offset. */
885         /* 有需要的话, 将 nextdiff 也加上到 zltail 上 */
886         tail = zipEntry(p+reqlen);
887         if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
888             ZIPLIST_TAIL_OFFSET(zl) =
889                 intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
890         }
891     } else {
892         /* This element will be the new tail. */
893         /* 更新 ziplist 的 zltail 属性, 现在新添加节点为表尾节点 */
894         ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
895     }
896 
897     /* When nextdiff != 0, the raw length of the next entry has changed, so
898      * we need to cascade the update throughout the ziplist */
899     if (nextdiff != 0) {
900         offset = p-zl;
901         zl = __ziplistCascadeUpdate(zl,p+reqlen);
902         p = zl+offset;
903     }
904 
905     /* Write the entry */
906     /* 写入数据到节点*/
907 
908     /* 编码上一节点的长度, 并向后移动指针 */
909     p += zipPrevEncodeLength(p,prevlen);
910     /* 编码本节点的长度和类型, 并向后移动指针 */
911     p += zipEncodeLength(p,encoding,slen);
912 
913     /* 写入内容到节点 */
914     if (ZIP_IS_STR(encoding)) {
915         memcpy(p,s,slen);
916     } else {
917         zipSaveInteger(p,value,encoding);
918     }
919 
920     /* 更新节点数量 */
921     ZIPLIST_INCR_LENGTH(zl,1);
922 
923     return zl;
924 }

ziplist.c

posted on 2014-04-14 22:23 __夏沫阅读(449) 评论(0) 收藏举报

刷新页面返回顶部

Redis源码笔记六: ziplist(进度四分之三)

导航

公告