【读书笔记】第四章、压缩列表

一、结构体

1. ziplist结构

2. entry 结构

entry 结构

这里面 previous_entry_length 记录了上个元素的长度。
以及 encoding 编码，外加内容。

假设当前元素首地址为 p
那么 p - p->previous_entry_length 就是上一个元素。通过这样的方法可以达到从尾到头遍历的目的。
encoding，即 content 字段存储的数据类型。为节约内存 encoding 的长度可变。

encoding 编码	encoding 长度	content类型
00bbbbbb [6 bit 表长度]	1 byte	最大长度为 63的字节数组
01bbbbbb aaaaaaaa [14 bit 表长度]	2 byte	最大长度为 2^14 - 1 的字节数组
10xxxxxx aaaaaaaa cccccccc dddddddd eeeeeeee [32 bit 表长度]	5 byte	最大长度为 2^32 - 1的字节数组
11000000	1 byte	int16 整数
11010000	1 byte	int32 整数
11100000	1 byte	int64 整数
11110000	1 byte	24 位整数
11111110	1 byte	8 位整数
1111xxxx	1 byte	没有 content 字段； xxxx 表示 0 ~ 12 的整数

由此可见，encoding 编码的第一个字节前两位【红色】，是解释content 是字节数组，还是整数，

如果是字节数组，也同时来表达字节数组的最大长度，剩余部分位实际使用长度【10除外，貌似弃用了首字节后六位】。
如果是整数时候，可以根据第 3 位，第 4 位【绿色】，表达整数的具体类型。
最后当 1111xxxx 表示 0 ~ 12 的时候，可以没有 content ，相当于立即数。[这个说法是书中说的根据源码我感觉不太正确]

以下源码说的是 0001 ~ 1101 是立即数。也就是 1~ 13. 除非 redis 自动做了减一操作？大模型给出了肯定，这里需要再看源码。

也就是说 11110000 ~ 11111100 是立即数，其中 11111110 不在其中，所以编码可用。

// 摘自 /src/ziplist
/* Different encoding/length possibilities */
#define ZIP_STR_MASK 0xc0           //11000000
#define ZIP_INT_MASK 0x30           //00110000

#define ZIP_STR_06B (0 << 6)        // 00000000
#define ZIP_STR_14B (1 << 6)        // 01000000
#define ZIP_STR_32B (2 << 6)        // 10000000
#define ZIP_INT_16B (0xc0 | 0<<4)   // 11000000
#define ZIP_INT_32B (0xc0 | 1<<4)   // 00010000 | 11000000 = 11010000
#define ZIP_INT_64B (0xc0 | 2<<4)   // 00100000 | 11000000 = 11100000
#define ZIP_INT_24B (0xc0 | 3<<4)   // 00110000 | 11000000 = 11110000
#define ZIP_INT_8B 0xfe             // 11111110

//这里说 11110001 ~ 11111101 才是立即数
/* 4 bit integer immediate encoding |1111xxxx| with xxxx between
 * 0001 and 1101. */
#define ZIP_INT_IMM_MASK 0x0f   /* Mask to extract the 4 bits value. To add
                                   one is needed to reconstruct the value. */
#define ZIP_INT_IMM_MIN 0xf1    /* 11110001 */
#define ZIP_INT_IMM_MAX 0xfd    /* 11111101 */

#define INT24_MAX 0x7fffff
#define INT24_MIN (-INT24_MAX - 1)

3. zlentry结构

/* We use this function to receive information about a ziplist entry.
 * Note that this is not how the data is actually encoded, is just what we
 * get filled by a function in order to operate more easily. */
typedef struct zlentry {
    // 上个元素长度类型的长
    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
    // 上个元素长度
    unsigned int prevrawlen;     /* Previous entry len. */
    // 本元素长度类型
    unsigned int lensize;        /* Bytes used to encode this entry type/len.
                                    For example strings have a 1, 2 or 5 bytes
                                    header. Integers always use a single byte.*/
    // 本元素长度
    unsigned int len;            /* Bytes used to represent the actual entry.
                                    For strings this is just the string length
                                    while for integers it is 1, 2, 3, 4, 8 or
                                    0 (for 4 bit immediate) depending on the
                                    number range. */
    // 头大小
    unsigned int headersize;     /* prevrawlensize + lensize. */
    // encoding字段长度
    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on
                                    the entry encoding. However for 4 bits
                                    immediate integers this can assume a range
                                    of values and must be range-checked. */
    // 当前元素首地址
    unsigned char *p;            /* Pointer to the very start of the entry, that
                                    is, this points to prev-entry-len field. */
} zlentry;

笔记：
注意我的 ziplist 并画里面的 entry 是 zlentry。如果直接用 zlentry ，那么当然不必要这么复杂的 zlentry。这样也不叫压缩列表了。
所以，压缩的过程是 zlentry 变为 entry 的过程。
所以，因为要压缩，就要记录 lensize 类型，len 元素长度。

解码压缩列表

// 摘自 /src/ziplist.c
/* ZIP_BIG_PREVLEN - 1 is the max number of bytes of
                               the previous entry, for the "prevlen" field prefixing
                               each entry, to be represented with just a single byte.
                               Otherwise it is represented as FE AA BB CC DD, where
                               AA BB CC DD are a 4 bytes unsigned integer
                               representing the previous entry len. */
#define ZIP_BIG_PREVLEN 254

/* Fills a struct with all information about an entry.
 * This function is the "unsafe" alternative to the one below.
 * Generally, all function that return a pointer to an element in the ziplist
 * will assert that this element is valid, so it can be freely used.
 * Generally functions such ziplistGet assume the input pointer is already
 * validated (since it's the return value of another function). */

// p指向压缩元素， e是解压后的元素
static inline void zipEntry(unsigned char *p, zlentry *e) {
    //解码上个元素长度
    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
    //解码编码
    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    //解析编码长度
    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
    assert(e->lensize != 0); /* check that encoding was valid. */
    e->headersize = e->prevrawlensize + e->lensize;
    // 取出内容
    e->p = p;
}

/* Return the length of the previous element, and the number of bytes that
 * are used in order to encode the previous element length.
 * 'ptr' must point to the prevlen prefix of an entry (that encodes the
 * length of the previous entry in order to navigate the elements backward).
 * The length of the previous entry is stored in 'prevlen', the number of
 * bytes needed to encode the previous entry length are stored in
 * 'prevlensize'. */

#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
    // 如果是 1 ，大小就是本身
    if ((prevlensize) == 1) {                                                  \
        (prevlen) = (ptr)[0];                                                  \
    //如果是5 ，第一个字节是 0xFE ，后四个字节才是真正的大小
    } else { /* prevlensize == 5 */                                            \
        // 取出头部然后获取前一个元素的大小
        // 按数组取出，然后乘以对应位大小，相加
        (prevlen) = ((ptr)[4] << 24) |                                         \
                    ((ptr)[3] << 16) |                                         \
                    ((ptr)[2] <<  8) |                                         \
                    ((ptr)[1]);                                                \
    }                                                                          \
} while(0)

/* Return the number of bytes used to encode the length of the previous
 * entry. The length is returned by setting the var 'prevlensize'. */
//获取到上个元素长度类型
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
    // 注意：对于 prvious_entry_length 只有 1个字节和 5 个字节。
    // 如果是5个字节，则使用 0xFE 打头，所以只有四个字节表示大小 
    //  ZIP_BIG_PREVLEN 就是 0xFE 所以，这个len 小于 254 它就是 1个字节 len 就是自身。
    // 否则还要 zipStorePrevEntryLengthLarge 解析
    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \
        (prevlensize) = 1;                                                     \
    } else {                                                                   \
        (prevlensize) = 5;                                                     \
    }                                                                          \
} while(0)

/* Extract the encoding from the byte pointed by 'ptr' and set it into
 * 'encoding' field of the zlentry structure. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do {  \
    //取出编码类型，看起来压缩的时候 将 编码类型编在 上一元素大小类型后了
    (encoding) = ((ptr)[0]); \
    if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)

/* Decode the entry encoding type and data length (string length for strings,
 * number of bytes used for the integer for integer entries) encoded in 'ptr'.
 * The 'encoding' variable is input, extracted by the caller, the 'lensize'
 * variable will hold the number of bytes required to encode the entry
 * length, and the 'len' variable will hold the entry length.
 * On invalid encoding error, lensize is set to 0. */
#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do {                    \
    //ZIP_STR_MASK 0xc0           //11000000
    if ((encoding) < ZIP_STR_MASK) {                                           \
        //小于该值，则是字符串
        if ((encoding) == ZIP_STR_06B) {                                       \
            (lensize) = 1;                                                     \
            // 0x3f = 00111111
            // 掩码取出真实使用长度
            (len) = (ptr)[0] & 0x3f;                                           \
        } else if ((encoding) == ZIP_STR_14B) {                                \
            (lensize) = 2;                                                     \
            (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1];                       \
        } else if ((encoding) == ZIP_STR_32B) {                                \
            (lensize) = 5;                                                     \
            (len) = ((uint32_t)(ptr)[1] << 24) |                               \
                    ((uint32_t)(ptr)[2] << 16) |                               \
                    ((uint32_t)(ptr)[3] <<  8) |                               \
                    ((uint32_t)(ptr)[4]);                                      \
        } else {                                                               \
            (lensize) = 0; /* bad encoding, should be covered by a previous */ \
            (len) = 0;     /* ZIP_ASSERT_ENCODING / zipEncodingLenSize, or  */ \
                           /* match the lensize after this macro with 0.    */ \
        }                                                                      \
    } else {                                                                   \
        (lensize) = 1;                                                         \
        if ((encoding) == ZIP_INT_8B)  (len) = 1;                              \
        else if ((encoding) == ZIP_INT_16B) (len) = 2;                         \
        else if ((encoding) == ZIP_INT_24B) (len) = 3;                         \
        else if ((encoding) == ZIP_INT_32B) (len) = 4;                         \
        else if ((encoding) == ZIP_INT_64B) (len) = 8;                         \
        // 立即数，它的content 大小为 0
        else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)   \
            (len) = 0; /* 4 bit immediate */                                   \
        else                                                                   \
            (lensize) = (len) = 0; /* bad encoding */                          \
    }                                                                          \
} while(0)

笔记：

宏定义为什么用 do{}while(0)?

这里宏定义用了 do {} while(0) ，明显只运行一次，为什么用 do while ？

确保宏展开为单一语句块

#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) \
    if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
        (prevlensize) = 1; \
    } else { \
        (prevlensize) = 5; \
    }

if (condition)
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);
else
    do_something();

//展开后
if (condition)
    if (ptr[0] < ZIP_BIG_PREVLEN) {
        prevlensize = 1;
    } else {
        prevlensize = 5;
    };
else
    do_something();

注意这里的 else 会错误地绑定到内层的 if，导致语法错误或逻辑错误。

避免空宏问题
如果宏定义为空（比如某些条件编译中宏可能不包含任何代码），直接写 {} 可能导致空语句块引发编译器警告或错误。而 do { } while(0) 是一个合法的空语句块，不会被编译器优化掉，也不会引发警告。
一致性与可维护性
使用 do { ... } while(0) 是C语言宏定义的惯例，开发者看到这种形式会立刻明白这是一个多语句宏，方便阅读和维护。
即使宏当前只有一条语句，未来可能扩展为多条语句，使用 do { ... } while(0) 能保持向前兼容，减少修改时的风险。

避免意外的分号问题

#define BAD_MACRO(x) x = 1; x = 2
if (condition)
    BAD_MACRO(x);

if (condition)
x = 1; x = 2; //此时 x = 2 在外面

二、基本操作

1.创建压缩列表

// 摘自 /src/ziplist.c
/* Create a new empty ziplist. */
/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))
/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
 * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))

#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))  //10
#define ZIPLIST_END_SIZE        (sizeof(uint8_t))                      //1

#define ZIP_END 255         /* Special "end of ziplist" entry. */

unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE + ZIPLIST_END_SIZE;  // 11
    // 直接申请内存
    unsigned char *zl = zmalloc(bytes);
    // (*((uint32_t*)(zl))) 强转为 uint32_t* 指针，并指向地址对应值
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    // (*((uint32_t*)((zl)+sizeof(uint32_t)))) 强转为 uint32_t* 指针，并指向 4 字节后对应值 然后赋值  10
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    // (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) 强转为 uint16_t* 指针，并指向 8 字节后对应值 然后赋值  0
    ZIPLIST_LENGTH(zl) = 0;
    //结尾标注 0xFF
    zl[bytes-1] = ZIP_END;
    return zl;
}

什么是小端序

以下是 intrev32ifbe 相关。

// 摘自 /src/endianconv.h

/* variants of the function doing the actual conversion only if the target
 * host is big endian */
// 小端序（Little Endian）是指低位字节存储在低地址，高位字节存储在高地址（常见于 x86 架构）。

#if (BYTE_ORDER == LITTLE_ENDIAN)
#define memrev16ifbe(p) ((void)(0))
#define memrev32ifbe(p) ((void)(0))
#define memrev64ifbe(p) ((void)(0))
#define intrev16ifbe(v) (v)
#define intrev32ifbe(v) (v)
#define intrev64ifbe(v) (v)
#else
#define memrev16ifbe(p) memrev16(p)
#define memrev32ifbe(p) memrev32(p)
#define memrev64ifbe(p) memrev64(p)
#define intrev16ifbe(v) intrev16(v)
#define intrev32ifbe(v) intrev32(v)
#define intrev64ifbe(v) intrev64(v)
#endif

笔记：
LITTLE_ENDIAN 是小端序，小端序是你存储的值高位在高位地址，低位在低位地址。
我也是今天才听说这个词，很疑惑之前不都是正常的吗？什么高位地位？
仔细想想其实地址的高低位抽象出来确实应该是从左到右越从上到下来越高。
正常情况加我们学编程其实不怎么考虑这个问题。当我们尝试看一个二进制编码就明白了。

这里很明显，先储存地位，再储存高位。这就是 x86 cpu。
然后大端序当然是相反了。但是注意，手机 arm 处理器是支持双端序的，就是说看配置。
我暂时没有找到纯大端序的编译器。不能给出例子。

2.插入元素

// 摘自 /src/ziplist.c
/* Return the pointer to the last entry of a ziplist, using the
 * last entry offset inside the ziplist header. */

//指针移动到结尾  //defined ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
#define ZIPLIST_ENTRY_TAIL(zl)  ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))


/* Insert an entry at "p". */
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    return __ziplistInsert(zl,p,s,slen);
}

/* Insert item at "p". */
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    // 取出 当前长度，定义 要求长度，和新长度
    // ziplist 第一个元素为压缩表长度
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
    unsigned int prevlensize, prevlen = 0;
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */
    // 如果p 不指向终点
    if (p[0] != ZIP_END) {
        //解析出上个元素的信息
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
    } else {
        // ptail 指回开头，开始判断是否有元素
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
        //ZIPLIST_ENTRY_TAIL 之后 *ptail 如果并没有指向尾部，说明有元素
        if (ptail[0] != ZIP_END) {
            //获取上个元素大小
            prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
        }
    }

    /* See if the entry can be encoded */
    //开始对插入的字节进行编码
    //返回 1 则是整数类型，0则是字符串类型
    if (zipTryEncoding(s,slen,&value,&encoding)) {
        /* 'encoding' is set to the appropriate integer encoding */
        // 当前 encoding 其实只有一个头元素，所以可以通过这个方法判断 size 大小
        // 放入 reqlen 中
        reqlen = zipIntSize(encoding);
    } else {
        /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
         * string length to figure out how to encode it. */
        // 长度大于 32 或者等于 0 或者 或者无法转换为 允许的数字类型 则表示为 string
        reqlen = slen;
    }
    /* We need space for both the length of the previous entry and
     * the length of the payload. */
    //计算头 entry 第一个元素大小，首先是 prevlen
    reqlen += zipStorePrevEntryLength(NULL,prevlen);
    //然后是 encoding 大小
    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);

    
    /* When the insert position is not equal to the tail, we need to
     * make sure that the next entry can hold this entry's length in
     * its prevlen field. */
    int forcelarge = 0;
    // 如果不是 ziplist 末尾，计算一下 当前元素大小，和 p 记录的前一个元素大小差距
    // 这里主要是为了算 ziplist 大小变化，对于 插入位置后面的节点， previous_entry_length 完全可以拿过来继续用，
    // 但是后面元素的 previous_entry_length  却要修改。简单说就是 让后一个元素，把 previous_entry_length  留下，让它滚后面去
    // 但是滚后面要滚多少？要取决于新元素的头大小 + 新元素大小
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
    // 因为每个元素都有上一个元素的头，然而这种插入可能导致每个元素的头大小都会变化，进而导致连锁更新，导致性能下降。
    // 一般来说，如果元素头缩小。且元素长度小于4 就是插入元素长度不是太长的话，我们就强制这个要去后面的元素继续使用大头，这样对它后面的元素影响不大，
    // 这样它的大小对于后一个元素连说不变，避免了连锁更新。
    // 但是，reqlen >= 4 的时候依然可能触发连锁更新 ，这时候 放入 __ziplistCascadeUpdate 解决
    if (nextdiff == -4 && reqlen < 4) {
        nextdiff = 0;
        forcelarge = 1;
    }

    /* Store offset because a realloc may change the address of zl. */
    offset = p-zl;
    newlen = curlen+reqlen+nextdiff;
    // 重新申请内存  注意 ziplistResize 即使申请到新位置的内存，也会把内存中的数据移动过去
    zl = ziplistResize(zl,newlen);
    // 将 p 指向 新的内存位置
    p = zl+offset;

    /* Apply memory move when necessary and update tail offset. */
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        //内存移动
        //拷贝下个数据到新位置
        //目标位置 p+reqlen  ，起始位置 p-nextdiff， 赋值内存长度 当前长度
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
        //为让出位置的元素 生成新的头
        if (forcelarge)
            zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
        else
            zipStorePrevEntryLength(p+reqlen,reqlen);

        /* Update offset for tail */
        ZIPLIST_TAIL_OFFSET(zl) =
            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */
        assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    if (nextdiff != 0) {
        offset = p-zl;
        //有空如何解决连锁更新吧
        zl = __ziplistCascadeUpdate(zl,p+reqlen);
        p = zl+offset;
    }

    /* Write the entry */
    p += zipStorePrevEntryLength(p,prevlen);
    p += zipStoreEntryEncoding(p,encoding,slen);
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }
    ZIPLIST_INCR_LENGTH(zl,1);
    return zl;
}

/* Return the total number of bytes used by the entry pointed to by 'p'. */
static inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) {
    zlentry e;
    assert(zipEntrySafe(zl, zlbytes, p, &e, 0));
    return e.headersize + e.len;
}

/* Fills a struct with all information about an entry.
 * This function is safe to use on untrusted pointers, it'll make sure not to
 * try to access memory outside the ziplist payload.
 * Returns 1 if the entry is valid, and 0 otherwise. */
//validate_prevlen 是检测上个元素是否越界。这里调用方法给了 0 ，也就是说无需检查直接. 也许给 1 的时候有其他用途？
static inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) {
    //取出第一第二个元素
    unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE;
    unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE;
 
    #define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast))

    /* If there's no possibility for the header to reach outside the ziplist,
     * take the fast path. (max lensize and prevrawlensize are both 5 bytes) */
    // 10 是头部，p + 10 后依然小于最后元素地址，说明可以插
    if (p >= zlfirst && p + 10 < zllast) {
        //解析出该有的数据
        ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
        ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
        ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
        e->headersize = e->prevrawlensize + e->lensize;
        //数据指针
        e->p = p;
        /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */
        //e lensize == 0
        if (unlikely(e->lensize == 0))
            return 0;
        /* Make sure the entry doesn't reach outside the edge of the ziplist */
        // 插入位置 + e的头部 + e的长度 不超过 ziplist 最大长度
        if (OUT_OF_RANGE(p + e->headersize + e->len))
            return 0;
        //确保上个元素不超界
        /* Make sure prevlen doesn't reach outside the edge of the ziplist */
        if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
            return 0;
        return 1;
    }
    
    //判断是否越界
    /* Make sure the pointer doesn't reach outside the edge of the ziplist */
    if (OUT_OF_RANGE(p))
        return 0;

    //确保 描述上个头 没有越界
    /* Make sure the encoded prevlen header doesn't reach outside the allocation */
    ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize);
    if (OUT_OF_RANGE(p + e->prevrawlensize))
        return 0;

    /* Make sure encoded entry header is valid. */
    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    // 获取头大小
    e->lensize = zipEncodingLenSize(e->encoding);
    //#define ZIP_ENCODING_SIZE_INVALID 0xff
    //头无效值      
    if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID))
        return 0;

    /* Make sure the encoded entry header doesn't reach outside the allocation */
    // 确保自己没有头不越界
    if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize))
        return 0;

    /* Decode the prevlen and entry len headers. */
    // 获取实际上个元素大小
    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
    // 获取本元素实际大小
    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
    //赋值 headersize
    e->headersize = e->prevrawlensize + e->lensize;

    /* Make sure the entry doesn't reach outside the edge of the ziplist */
    //越界测试
    if (OUT_OF_RANGE(p + e->headersize + e->len))
        return 0;
    // 上个元素越界测试
    /* Make sure prevlen doesn't reach outside the edge of the ziplist */
    if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
        return 0;
    //指向内容
    e->p = p;
    return 1;
#undef OUT_OF_RANGE
}

#define ZIP_ENCODING_SIZE_INVALID 0xff
/* Return the number of bytes required to encode the entry type + length.
 * On error, return ZIP_ENCODING_SIZE_INVALID */
static inline unsigned int zipEncodingLenSize(unsigned char encoding) {
    if (encoding == ZIP_INT_16B || encoding == ZIP_INT_32B ||
        encoding == ZIP_INT_24B || encoding == ZIP_INT_64B ||
        encoding == ZIP_INT_8B)
        return 1;
    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
        return 1;
    if (encoding == ZIP_STR_06B)
        return 1;
    if (encoding == ZIP_STR_14B)
        return 2;
    if (encoding == ZIP_STR_32B)
        return 5;
    return ZIP_ENCODING_SIZE_INVALID;
}

/* Return bytes needed to store integer encoded by 'encoding' */
static inline unsigned int zipIntSize(unsigned char encoding) {
    switch(encoding) {
    case ZIP_INT_8B:  return 1;
    case ZIP_INT_16B: return 2;
    case ZIP_INT_24B: return 3;
    case ZIP_INT_32B: return 4;
    case ZIP_INT_64B: return 8;
    }
    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
        return 0; /* 4 bit immediate */
    /* bad encoding, covered by a previous call to ZIP_ASSERT_ENCODING */
    redis_unreachable();
    return 0;
}

/* Given a pointer 'p' to the prevlen info that prefixes an entry, this
 * function returns the difference in number of bytes needed to encode
 * the prevlen if the previous entry changes of size.
 *
 * So if A is the number of bytes used right now to encode the 'prevlen'
 * field.
 *
 * And B is the number of bytes that are needed in order to encode the
 * 'prevlen' if the previous element will be updated to one of size 'len'.
 *
 * Then the function returns B - A
 *
 * So the function returns a positive number if more space is needed,
 * a negative number if less space is needed, or zero if the same space
 * is needed. */
int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
    unsigned int prevlensize;
    ZIP_DECODE_PREVLENSIZE(p, prevlensize);
    return zipStorePrevEntryLength(NULL, len) - prevlensize;
}

/* Encode the length of the previous entry and write it to "p". Return the
 * number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
    if (p == NULL) {
        return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1;
    } else {
        if (len < ZIP_BIG_PREVLEN) {
            p[0] = len;
            return 1;
        } else {
            return zipStorePrevEntryLengthLarge(p,len);
        }
    }
}

/* Encode the length of the previous entry and write it to "p". This only
 * uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
    uint32_t u32;
    if (p != NULL) {
        p[0] = ZIP_BIG_PREVLEN;
        u32 = len;
        memcpy(p+1,&u32,sizeof(u32));
        memrev32ifbe(p+1);
    }
    return 1 + sizeof(uint32_t);
}

/* Write the encoding header of the entry in 'p'. If p is NULL it just returns
 * the amount of bytes required to encode such a length. Arguments:
 *
 * 'encoding' is the encoding we are using for the entry. It could be
 * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
 * for single-byte small immediate integers.
 *
 * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
 * string that this entry represents.
 *
 * The function returns the number of bytes used by the encoding/length
 * header stored in 'p'. */
unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
    unsigned char len = 1, buf[5];

    if (ZIP_IS_STR(encoding)) {
        /* Although encoding is given it may not be set for strings,
         * so we determine it here using the raw length. */
        //表达字节数组只有三种， 1个字节，2个字节，5个字节。
        //其中 00 开头表示 encoding 长度只有 1个字节 后面 6位表示长度
        // 00111111
        if (rawlen <= 0x3f) {
            if (!p) return len;
            buf[0] = ZIP_STR_06B | rawlen;
        // 00111111 11111111  这里既然它不是 1个字节的 那么 小于该值就是 2个字节的。
        } else if (rawlen <= 0x3fff) {
            len += 1;
            if (!p) return len;
            buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
            buf[1] = rawlen & 0xff;
        // 前两种都不是，剩下的是五个字节的
        } else {
            len += 4;
            if (!p) return len;
            buf[0] = ZIP_STR_32B;
            buf[1] = (rawlen >> 24) & 0xff;
            buf[2] = (rawlen >> 16) & 0xff;
            buf[3] = (rawlen >> 8) & 0xff;
            buf[4] = rawlen & 0xff;
        }
    } else {
        /* Implies integer encoding, so length is always 1. */
        // 数字只有一个字节的 encoding
        if (!p) return len;
        buf[0] = encoding;
    }

    /* Store this length at p. */
    memcpy(p,buf,len);
    return len;
}

压缩元素

/* Check if string pointed to by 'entry' can be encoded as an integer.
 * Stores the integer value in 'v' and its encoding in 'encoding'. */
// 返回 0 代表 解析为字符串， 1 代表数字
int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
    long long value;
    // 过长的数字 redis 即转换数字意义不大，长度为0 也没必要转换。
    if (entrylen >= 32 || entrylen == 0) return 0;
    
    // string2ll 尝试将 string数字 转换为 longlong
    // int string2ll(const char *s, size_t slen, long long *value)
    if (string2ll((char*)entry, entrylen,&value)) {
        /* Great, the string can be encoded. Check what's the smallest
         * of our encoding types that can hold this value. */
        if (value >= 0 && value <= 12) {
            // 这里解答了之前的困惑，使用 ZIP_INT_IMM_MIN 加上当前值，解码的时候 减去 ZIP_INT_IMM_MIN 即可。所以和 1 ~ 13 无关
            *encoding = ZIP_INT_IMM_MIN+value;
        } else if (value >= INT8_MIN && value <= INT8_MAX) {
            *encoding = ZIP_INT_8B;
        } else if (value >= INT16_MIN && value <= INT16_MAX) {
            *encoding = ZIP_INT_16B;
        } else if (value >= INT24_MIN && value <= INT24_MAX) {
            *encoding = ZIP_INT_24B;
        } else if (value >= INT32_MIN && value <= INT32_MAX) {
            *encoding = ZIP_INT_32B;
        } else {
            *encoding = ZIP_INT_64B;
        }
        *v = value;
        return 1;
    }
    return 0;
}

笔记：

// 摘自 /src/config.h
// __builtin_expect 是 linux 内含有的方法。所以这里需要宏定义
#if __GNUC__ >= 3
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif

posted @ 2025-06-03 23:06 大俗XD 阅读(14) 评论(0) 收藏举报

刷新页面返回顶部

大俗

望尽天涯路