【读书笔记】第四章、压缩列表

一、结构体

1. ziplist结构

2. entry 结构

entry 结构

这里面 previous_entry_length 记录了上个元素的长度。
以及 encoding 编码,外加内容。

假设当前元素首地址为 p
那么 p - p->previous_entry_length 就是上一个元素。通过这样的方法可以达到从尾到头遍历的目的。
encoding,即 content 字段存储的数据类型。 为节约内存 encoding 的长度可变。

encoding 编码 encoding 长度 content类型
00bbbbbb [6 bit 表长度] 1 byte 最大长度为 63的字节数组
01bbbbbb aaaaaaaa [14 bit 表长度] 2 byte 最大长度为 2^14 - 1 的字节数组
10xxxxxx aaaaaaaa cccccccc dddddddd eeeeeeee [32 bit 表长度] 5 byte 最大长度为 2^32 - 1的字节数组
11000000 1 byte int16 整数
11010000 1 byte int32 整数
11100000 1 byte int64 整数
11110000 1 byte 24 位整数
11111110 1 byte 8 位整数
1111xxxx 1 byte 没有 content 字段; xxxx 表示 0 ~ 12 的整数

由此可见,encoding 编码的 第一个字节前两位【红色】,是解释content 是 字节数组,还是整数,

  • 如果是字节数组,也同时来表达字节数组的最大长度,剩余部分位实际使用长度【10除外,貌似弃用了首字节后六位】。

  • 如果是整数时候,可以根据 第 3 位,第 4 位【绿色】,表达整数的具体类型。

  • 最后 当 1111xxxx 表示 0 ~ 12 的时候,可以没有 content ,相当于立即数。[这个说法是 书中说的 根据源码 我感觉不太正确]

以下源码说的是 0001 ~ 1101 是立即数。也就是 1~ 13. 除非 redis 自动做了 减一操作?大模型给出了肯定,这里需要再看源码。

也就是说 11110000 ~ 11111100 是立即数, 其中 11111110 不在其中,所以编码可用。

// 摘自 /src/ziplist
/* Different encoding/length possibilities */
#define ZIP_STR_MASK 0xc0           //11000000
#define ZIP_INT_MASK 0x30           //00110000

#define ZIP_STR_06B (0 << 6)        // 00000000
#define ZIP_STR_14B (1 << 6)        // 01000000
#define ZIP_STR_32B (2 << 6)        // 10000000
#define ZIP_INT_16B (0xc0 | 0<<4)   // 11000000
#define ZIP_INT_32B (0xc0 | 1<<4)   // 00010000 | 11000000 = 11010000
#define ZIP_INT_64B (0xc0 | 2<<4)   // 00100000 | 11000000 = 11100000
#define ZIP_INT_24B (0xc0 | 3<<4)   // 00110000 | 11000000 = 11110000
#define ZIP_INT_8B 0xfe             // 11111110

//这里说 11110001 ~ 11111101 才是立即数
/* 4 bit integer immediate encoding |1111xxxx| with xxxx between
 * 0001 and 1101. */
#define ZIP_INT_IMM_MASK 0x0f   /* Mask to extract the 4 bits value. To add
                                   one is needed to reconstruct the value. */
#define ZIP_INT_IMM_MIN 0xf1    /* 11110001 */
#define ZIP_INT_IMM_MAX 0xfd    /* 11111101 */

#define INT24_MAX 0x7fffff
#define INT24_MIN (-INT24_MAX - 1)

3. zlentry结构

/* We use this function to receive information about a ziplist entry.
 * Note that this is not how the data is actually encoded, is just what we
 * get filled by a function in order to operate more easily. */
typedef struct zlentry {
    // 上个元素长度类型的长
    unsigned int prevrawlensize; /* Bytes used to encode the previous entry len*/
    // 上个元素长度
    unsigned int prevrawlen;     /* Previous entry len. */
    // 本元素长度类型
    unsigned int lensize;        /* Bytes used to encode this entry type/len.
                                    For example strings have a 1, 2 or 5 bytes
                                    header. Integers always use a single byte.*/
    // 本元素长度
    unsigned int len;            /* Bytes used to represent the actual entry.
                                    For strings this is just the string length
                                    while for integers it is 1, 2, 3, 4, 8 or
                                    0 (for 4 bit immediate) depending on the
                                    number range. */
    // 头大小
    unsigned int headersize;     /* prevrawlensize + lensize. */
    // encoding字段长度
    unsigned char encoding;      /* Set to ZIP_STR_* or ZIP_INT_* depending on
                                    the entry encoding. However for 4 bits
                                    immediate integers this can assume a range
                                    of values and must be range-checked. */
    // 当前元素首地址
    unsigned char *p;            /* Pointer to the very start of the entry, that
                                    is, this points to prev-entry-len field. */
} zlentry;

笔记:
注意我的 ziplist 并画里面的 entry 是 zlentry。如果直接用 zlentry ,那么当然不必要这么复杂的 zlentry。这样也不叫压缩列表了。
所以,压缩的过程是 zlentry 变为 entry 的过程。
所以,因为要压缩,就要记录 lensize 类型,len 元素长度。

解码压缩列表

// 摘自 /src/ziplist.c
/* ZIP_BIG_PREVLEN - 1 is the max number of bytes of
                               the previous entry, for the "prevlen" field prefixing
                               each entry, to be represented with just a single byte.
                               Otherwise it is represented as FE AA BB CC DD, where
                               AA BB CC DD are a 4 bytes unsigned integer
                               representing the previous entry len. */
#define ZIP_BIG_PREVLEN 254

/* Fills a struct with all information about an entry.
 * This function is the "unsafe" alternative to the one below.
 * Generally, all function that return a pointer to an element in the ziplist
 * will assert that this element is valid, so it can be freely used.
 * Generally functions such ziplistGet assume the input pointer is already
 * validated (since it's the return value of another function). */

// p指向压缩元素, e是解压后的元素
static inline void zipEntry(unsigned char *p, zlentry *e) {
    //解码上个元素长度
    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
    //解码编码
    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    //解析编码长度
    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
    assert(e->lensize != 0); /* check that encoding was valid. */
    e->headersize = e->prevrawlensize + e->lensize;
    // 取出内容
    e->p = p;
}

/* Return the length of the previous element, and the number of bytes that
 * are used in order to encode the previous element length.
 * 'ptr' must point to the prevlen prefix of an entry (that encodes the
 * length of the previous entry in order to navigate the elements backward).
 * The length of the previous entry is stored in 'prevlen', the number of
 * bytes needed to encode the previous entry length are stored in
 * 'prevlensize'. */

#define ZIP_DECODE_PREVLEN(ptr, prevlensize, prevlen) do {                     \
    ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);                                  \
    // 如果是 1 ,大小就是本身
    if ((prevlensize) == 1) {                                                  \
        (prevlen) = (ptr)[0];                                                  \
    //如果是5 ,第一个字节是 0xFE ,后四个字节才是真正的大小
    } else { /* prevlensize == 5 */                                            \
        // 取出头部然后获取前一个元素的大小
        // 按数组取出,然后乘以对应位大小,相加
        (prevlen) = ((ptr)[4] << 24) |                                         \
                    ((ptr)[3] << 16) |                                         \
                    ((ptr)[2] <<  8) |                                         \
                    ((ptr)[1]);                                                \
    }                                                                          \
} while(0)

/* Return the number of bytes used to encode the length of the previous
 * entry. The length is returned by setting the var 'prevlensize'. */
//获取到上个元素长度类型
#define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) do {                          \
    // 注意:对于 prvious_entry_length 只有 1个字节和 5 个字节。
    // 如果是5个字节,则使用 0xFE 打头,所以只有四个字节表示大小 
    //  ZIP_BIG_PREVLEN 就是 0xFE 所以,这个len 小于 254 它就是 1个字节 len 就是自身。
    // 否则还要 zipStorePrevEntryLengthLarge 解析
    if ((ptr)[0] < ZIP_BIG_PREVLEN) {                                          \
        (prevlensize) = 1;                                                     \
    } else {                                                                   \
        (prevlensize) = 5;                                                     \
    }                                                                          \
} while(0)

/* Extract the encoding from the byte pointed by 'ptr' and set it into
 * 'encoding' field of the zlentry structure. */
#define ZIP_ENTRY_ENCODING(ptr, encoding) do {  \
    //取出编码类型,看起来压缩的时候 将 编码类型编在 上一元素大小类型后了
    (encoding) = ((ptr)[0]); \
    if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; \
} while(0)

/* Decode the entry encoding type and data length (string length for strings,
 * number of bytes used for the integer for integer entries) encoded in 'ptr'.
 * The 'encoding' variable is input, extracted by the caller, the 'lensize'
 * variable will hold the number of bytes required to encode the entry
 * length, and the 'len' variable will hold the entry length.
 * On invalid encoding error, lensize is set to 0. */
#define ZIP_DECODE_LENGTH(ptr, encoding, lensize, len) do {                    \
    //ZIP_STR_MASK 0xc0           //11000000
    if ((encoding) < ZIP_STR_MASK) {                                           \
        //小于该值,则是字符串
        if ((encoding) == ZIP_STR_06B) {                                       \
            (lensize) = 1;                                                     \
            // 0x3f = 00111111
            // 掩码取出真实使用长度
            (len) = (ptr)[0] & 0x3f;                                           \
        } else if ((encoding) == ZIP_STR_14B) {                                \
            (lensize) = 2;                                                     \
            (len) = (((ptr)[0] & 0x3f) << 8) | (ptr)[1];                       \
        } else if ((encoding) == ZIP_STR_32B) {                                \
            (lensize) = 5;                                                     \
            (len) = ((uint32_t)(ptr)[1] << 24) |                               \
                    ((uint32_t)(ptr)[2] << 16) |                               \
                    ((uint32_t)(ptr)[3] <<  8) |                               \
                    ((uint32_t)(ptr)[4]);                                      \
        } else {                                                               \
            (lensize) = 0; /* bad encoding, should be covered by a previous */ \
            (len) = 0;     /* ZIP_ASSERT_ENCODING / zipEncodingLenSize, or  */ \
                           /* match the lensize after this macro with 0.    */ \
        }                                                                      \
    } else {                                                                   \
        (lensize) = 1;                                                         \
        if ((encoding) == ZIP_INT_8B)  (len) = 1;                              \
        else if ((encoding) == ZIP_INT_16B) (len) = 2;                         \
        else if ((encoding) == ZIP_INT_24B) (len) = 3;                         \
        else if ((encoding) == ZIP_INT_32B) (len) = 4;                         \
        else if ((encoding) == ZIP_INT_64B) (len) = 8;                         \
        // 立即数,它的content 大小为 0
        else if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)   \
            (len) = 0; /* 4 bit immediate */                                   \
        else                                                                   \
            (lensize) = (len) = 0; /* bad encoding */                          \
    }                                                                          \
} while(0)

笔记:

宏定义为什么用 do{}while(0)?

这里宏定义用了 do {} while(0) ,明显只运行一次,为什么用 do while ?

  1. 确保宏展开为单一语句块
    #define ZIP_DECODE_PREVLENSIZE(ptr, prevlensize) \
        if ((ptr)[0] < ZIP_BIG_PREVLEN) { \
            (prevlensize) = 1; \
        } else { \
            (prevlensize) = 5; \
        }
    
    if (condition)
        ZIP_DECODE_PREVLENSIZE(ptr, prevlensize);
    else
        do_something();
    
    //展开后
    if (condition)
        if (ptr[0] < ZIP_BIG_PREVLEN) {
            prevlensize = 1;
        } else {
            prevlensize = 5;
        };
    else
        do_something();
    
    注意这里的 else 会错误地绑定到内层的 if,导致语法错误或逻辑错误。
  2. 避免空宏问题
    如果宏定义为空(比如某些条件编译中宏可能不包含任何代码),直接写 {} 可能导致空语句块引发编译器警告或错误。而 do { } while(0) 是一个合法的空语句块,不会被编译器优化掉,也不会引发警告。
  3. 一致性与可维护性
    使用 do { ... } while(0) 是C语言宏定义的惯例,开发者看到这种形式会立刻明白这是一个多语句宏,方便阅读和维护。
    即使宏当前只有一条语句,未来可能扩展为多条语句,使用 do { ... } while(0) 能保持向前兼容,减少修改时的风险。
  4. 避免意外的分号问题
    #define BAD_MACRO(x) x = 1; x = 2
    if (condition)
        BAD_MACRO(x);
    
    if (condition)
    x = 1; x = 2; //此时 x = 2 在外面
    

二、基本操作

1.创建压缩列表

// 摘自 /src/ziplist.c
/* Create a new empty ziplist. */
/* Return total bytes a ziplist is composed of. */
#define ZIPLIST_BYTES(zl)       (*((uint32_t*)(zl)))
/* Return the offset of the last item inside the ziplist. */
#define ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
/* Return the length of a ziplist, or UINT16_MAX if the length cannot be
 * determined without scanning the whole ziplist. */
#define ZIPLIST_LENGTH(zl)      (*((uint16_t*)((zl)+sizeof(uint32_t)*2)))

#define ZIPLIST_HEADER_SIZE     (sizeof(uint32_t)*2+sizeof(uint16_t))  //10
#define ZIPLIST_END_SIZE        (sizeof(uint8_t))                      //1

#define ZIP_END 255         /* Special "end of ziplist" entry. */

unsigned char *ziplistNew(void) {
    unsigned int bytes = ZIPLIST_HEADER_SIZE + ZIPLIST_END_SIZE;  // 11
    // 直接申请内存
    unsigned char *zl = zmalloc(bytes);
    // (*((uint32_t*)(zl))) 强转为 uint32_t* 指针,并指向地址对应值
    ZIPLIST_BYTES(zl) = intrev32ifbe(bytes);
    // (*((uint32_t*)((zl)+sizeof(uint32_t)))) 强转为 uint32_t* 指针,并指向 4 字节后对应值 然后赋值  10
    ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(ZIPLIST_HEADER_SIZE);
    // (*((uint16_t*)((zl)+sizeof(uint32_t)*2))) 强转为 uint16_t* 指针,并指向 8 字节后对应值 然后赋值  0
    ZIPLIST_LENGTH(zl) = 0;
    //结尾标注 0xFF
    zl[bytes-1] = ZIP_END;
    return zl;
}

什么是小端序

以下是 intrev32ifbe 相关。

// 摘自 /src/endianconv.h

/* variants of the function doing the actual conversion only if the target
 * host is big endian */
// 小端序(Little Endian)是指低位字节存储在低地址,高位字节存储在高地址(常见于 x86 架构)。

#if (BYTE_ORDER == LITTLE_ENDIAN)
#define memrev16ifbe(p) ((void)(0))
#define memrev32ifbe(p) ((void)(0))
#define memrev64ifbe(p) ((void)(0))
#define intrev16ifbe(v) (v)
#define intrev32ifbe(v) (v)
#define intrev64ifbe(v) (v)
#else
#define memrev16ifbe(p) memrev16(p)
#define memrev32ifbe(p) memrev32(p)
#define memrev64ifbe(p) memrev64(p)
#define intrev16ifbe(v) intrev16(v)
#define intrev32ifbe(v) intrev32(v)
#define intrev64ifbe(v) intrev64(v)
#endif

笔记:
LITTLE_ENDIAN 是小端序,小端序是 你存储的值 高位在高位地址,低位在低位地址。
我也是今天才听说这个词,很疑惑之前不都是正常的吗?什么高位地位?
仔细想想其实地址的高低位抽象出来确实应该是 从左到右越从上到下来越高。
正常情况加我们学编程其实不怎么考虑这个问题。当我们尝试看一个二进制编码就明白了。

这里很明显,先储存地位,再储存高位。这就是 x86 cpu。
然后大端序当然是相反了。但是注意,手机 arm 处理器是支持双端序的,就是说看配置。
我暂时没有找到纯 大端序 的编译器。不能给出例子。

2.插入元素

// 摘自 /src/ziplist.c
/* Return the pointer to the last entry of a ziplist, using the
 * last entry offset inside the ziplist header. */

//指针移动到结尾  //defined ZIPLIST_TAIL_OFFSET(zl) (*((uint32_t*)((zl)+sizeof(uint32_t))))
#define ZIPLIST_ENTRY_TAIL(zl)  ((zl)+intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl)))


/* Insert an entry at "p". */
unsigned char *ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    return __ziplistInsert(zl,p,s,slen);
}

/* Insert item at "p". */
unsigned char *__ziplistInsert(unsigned char *zl, unsigned char *p, unsigned char *s, unsigned int slen) {
    // 取出 当前长度,定义 要求长度,和新长度
    // ziplist 第一个元素为压缩表长度
    size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)), reqlen, newlen;
    unsigned int prevlensize, prevlen = 0;
    size_t offset;
    int nextdiff = 0;
    unsigned char encoding = 0;
    long long value = 123456789; /* initialized to avoid warning. Using a value
                                    that is easy to see if for some reason
                                    we use it uninitialized. */
    zlentry tail;

    /* Find out prevlen for the entry that is inserted. */
    // 如果p 不指向终点
    if (p[0] != ZIP_END) {
        //解析出上个元素的信息
        ZIP_DECODE_PREVLEN(p, prevlensize, prevlen);
    } else {
        // ptail 指回开头,开始判断是否有元素
        unsigned char *ptail = ZIPLIST_ENTRY_TAIL(zl);
        //ZIPLIST_ENTRY_TAIL 之后 *ptail 如果并没有指向尾部,说明有元素
        if (ptail[0] != ZIP_END) {
            //获取上个元素大小
            prevlen = zipRawEntryLengthSafe(zl, curlen, ptail);
        }
    }

    /* See if the entry can be encoded */
    //开始对插入的字节进行编码
    //返回 1 则是整数类型,0则是字符串类型
    if (zipTryEncoding(s,slen,&value,&encoding)) {
        /* 'encoding' is set to the appropriate integer encoding */
        // 当前 encoding 其实只有一个头元素,所以可以通过这个方法判断 size 大小
        // 放入 reqlen 中
        reqlen = zipIntSize(encoding);
    } else {
        /* 'encoding' is untouched, however zipStoreEntryEncoding will use the
         * string length to figure out how to encode it. */
        // 长度大于 32 或者等于 0 或者 或者无法转换为 允许的数字类型 则表示为 string
        reqlen = slen;
    }
    /* We need space for both the length of the previous entry and
     * the length of the payload. */
    //计算头 entry 第一个元素大小,首先是 prevlen
    reqlen += zipStorePrevEntryLength(NULL,prevlen);
    //然后是 encoding 大小
    reqlen += zipStoreEntryEncoding(NULL,encoding,slen);

    
    /* When the insert position is not equal to the tail, we need to
     * make sure that the next entry can hold this entry's length in
     * its prevlen field. */
    int forcelarge = 0;
    // 如果不是 ziplist 末尾,计算一下 当前元素大小,和 p 记录的前一个元素大小差距
    // 这里主要是为了算 ziplist 大小变化,对于 插入位置后面的节点, previous_entry_length 完全可以拿过来继续用,
    // 但是后面元素的 previous_entry_length  却要修改。简单说就是 让后一个元素,把 previous_entry_length  留下,让它滚后面去
    // 但是滚后面要滚多少?要取决于新元素的头大小 + 新元素大小
    nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : 0;
    // 因为每个元素都有上一个元素的头,然而这种插入可能导致每个元素的头大小都会变化,进而导致连锁更新,导致性能下降。
    // 一般来说,如果元素头缩小。且元素长度小于4 就是插入元素长度不是太长的话,我们就强制这个要去后面的元素继续使用大头,这样对它后面的元素影响不大,
    // 这样它的大小对于后一个元素连说不变,避免了连锁更新。
    // 但是,reqlen >= 4 的时候依然可能触发连锁更新 ,这时候 放入 __ziplistCascadeUpdate 解决
    if (nextdiff == -4 && reqlen < 4) {
        nextdiff = 0;
        forcelarge = 1;
    }

    /* Store offset because a realloc may change the address of zl. */
    offset = p-zl;
    newlen = curlen+reqlen+nextdiff;
    // 重新申请内存  注意 ziplistResize 即使申请到新位置的内存,也会把内存中的数据移动过去
    zl = ziplistResize(zl,newlen);
    // 将 p 指向 新的内存位置
    p = zl+offset;

    /* Apply memory move when necessary and update tail offset. */
    if (p[0] != ZIP_END) {
        /* Subtract one because of the ZIP_END bytes */
        //内存移动
        //拷贝下个数据到新位置
        //目标位置 p+reqlen  ,起始位置 p-nextdiff, 赋值内存长度 当前长度
        memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff);

        /* Encode this entry's raw length in the next entry. */
        //为让出位置的元素 生成新的头
        if (forcelarge)
            zipStorePrevEntryLengthLarge(p+reqlen,reqlen);
        else
            zipStorePrevEntryLength(p+reqlen,reqlen);

        /* Update offset for tail */
        ZIPLIST_TAIL_OFFSET(zl) =
            intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen);

        /* When the tail contains more than one entry, we need to take
         * "nextdiff" in account as well. Otherwise, a change in the
         * size of prevlen doesn't have an effect on the *tail* offset. */
        assert(zipEntrySafe(zl, newlen, p+reqlen, &tail, 1));
        if (p[reqlen+tail.headersize+tail.len] != ZIP_END) {
            ZIPLIST_TAIL_OFFSET(zl) =
                intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff);
        }
    } else {
        /* This element will be the new tail. */
        ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl);
    }

    /* When nextdiff != 0, the raw length of the next entry has changed, so
     * we need to cascade the update throughout the ziplist */
    if (nextdiff != 0) {
        offset = p-zl;
        //有空如何解决连锁更新吧
        zl = __ziplistCascadeUpdate(zl,p+reqlen);
        p = zl+offset;
    }

    /* Write the entry */
    p += zipStorePrevEntryLength(p,prevlen);
    p += zipStoreEntryEncoding(p,encoding,slen);
    if (ZIP_IS_STR(encoding)) {
        memcpy(p,s,slen);
    } else {
        zipSaveInteger(p,value,encoding);
    }
    ZIPLIST_INCR_LENGTH(zl,1);
    return zl;
}

/* Return the total number of bytes used by the entry pointed to by 'p'. */
static inline unsigned int zipRawEntryLengthSafe(unsigned char* zl, size_t zlbytes, unsigned char *p) {
    zlentry e;
    assert(zipEntrySafe(zl, zlbytes, p, &e, 0));
    return e.headersize + e.len;
}

/* Fills a struct with all information about an entry.
 * This function is safe to use on untrusted pointers, it'll make sure not to
 * try to access memory outside the ziplist payload.
 * Returns 1 if the entry is valid, and 0 otherwise. */
//validate_prevlen 是检测上个元素是否越界。这里调用方法给了 0 ,也就是说无需检查直接. 也许给 1 的时候有其他用途?
static inline int zipEntrySafe(unsigned char* zl, size_t zlbytes, unsigned char *p, zlentry *e, int validate_prevlen) {
    //取出第一第二个元素
    unsigned char *zlfirst = zl + ZIPLIST_HEADER_SIZE;
    unsigned char *zllast = zl + zlbytes - ZIPLIST_END_SIZE;
 
    #define OUT_OF_RANGE(p) (unlikely((p) < zlfirst || (p) > zllast))

    /* If there's no possibility for the header to reach outside the ziplist,
     * take the fast path. (max lensize and prevrawlensize are both 5 bytes) */
    // 10 是头部,p + 10 后依然小于最后元素地址,说明可以插
    if (p >= zlfirst && p + 10 < zllast) {
        //解析出该有的数据
        ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
        ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
        ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
        e->headersize = e->prevrawlensize + e->lensize;
        //数据指针
        e->p = p;
        /* We didn't call ZIP_ASSERT_ENCODING, so we check lensize was set to 0. */
        //e lensize == 0
        if (unlikely(e->lensize == 0))
            return 0;
        /* Make sure the entry doesn't reach outside the edge of the ziplist */
        // 插入位置 + e的头部 + e的长度 不超过 ziplist 最大长度
        if (OUT_OF_RANGE(p + e->headersize + e->len))
            return 0;
        //确保上个元素不超界
        /* Make sure prevlen doesn't reach outside the edge of the ziplist */
        if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
            return 0;
        return 1;
    }
    
    //判断是否越界
    /* Make sure the pointer doesn't reach outside the edge of the ziplist */
    if (OUT_OF_RANGE(p))
        return 0;

    //确保 描述上个头 没有越界
    /* Make sure the encoded prevlen header doesn't reach outside the allocation */
    ZIP_DECODE_PREVLENSIZE(p, e->prevrawlensize);
    if (OUT_OF_RANGE(p + e->prevrawlensize))
        return 0;

    /* Make sure encoded entry header is valid. */
    ZIP_ENTRY_ENCODING(p + e->prevrawlensize, e->encoding);
    // 获取头大小
    e->lensize = zipEncodingLenSize(e->encoding);
    //#define ZIP_ENCODING_SIZE_INVALID 0xff
    //头无效值      
    if (unlikely(e->lensize == ZIP_ENCODING_SIZE_INVALID))
        return 0;

    /* Make sure the encoded entry header doesn't reach outside the allocation */
    // 确保自己没有头不越界
    if (OUT_OF_RANGE(p + e->prevrawlensize + e->lensize))
        return 0;

    /* Decode the prevlen and entry len headers. */
    // 获取实际上个元素大小
    ZIP_DECODE_PREVLEN(p, e->prevrawlensize, e->prevrawlen);
    // 获取本元素实际大小
    ZIP_DECODE_LENGTH(p + e->prevrawlensize, e->encoding, e->lensize, e->len);
    //赋值 headersize
    e->headersize = e->prevrawlensize + e->lensize;

    /* Make sure the entry doesn't reach outside the edge of the ziplist */
    //越界测试
    if (OUT_OF_RANGE(p + e->headersize + e->len))
        return 0;
    // 上个元素越界测试
    /* Make sure prevlen doesn't reach outside the edge of the ziplist */
    if (validate_prevlen && OUT_OF_RANGE(p - e->prevrawlen))
        return 0;
    //指向内容
    e->p = p;
    return 1;
#undef OUT_OF_RANGE
}

#define ZIP_ENCODING_SIZE_INVALID 0xff
/* Return the number of bytes required to encode the entry type + length.
 * On error, return ZIP_ENCODING_SIZE_INVALID */
static inline unsigned int zipEncodingLenSize(unsigned char encoding) {
    if (encoding == ZIP_INT_16B || encoding == ZIP_INT_32B ||
        encoding == ZIP_INT_24B || encoding == ZIP_INT_64B ||
        encoding == ZIP_INT_8B)
        return 1;
    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
        return 1;
    if (encoding == ZIP_STR_06B)
        return 1;
    if (encoding == ZIP_STR_14B)
        return 2;
    if (encoding == ZIP_STR_32B)
        return 5;
    return ZIP_ENCODING_SIZE_INVALID;
}

/* Return bytes needed to store integer encoded by 'encoding' */
static inline unsigned int zipIntSize(unsigned char encoding) {
    switch(encoding) {
    case ZIP_INT_8B:  return 1;
    case ZIP_INT_16B: return 2;
    case ZIP_INT_24B: return 3;
    case ZIP_INT_32B: return 4;
    case ZIP_INT_64B: return 8;
    }
    if (encoding >= ZIP_INT_IMM_MIN && encoding <= ZIP_INT_IMM_MAX)
        return 0; /* 4 bit immediate */
    /* bad encoding, covered by a previous call to ZIP_ASSERT_ENCODING */
    redis_unreachable();
    return 0;
}

/* Given a pointer 'p' to the prevlen info that prefixes an entry, this
 * function returns the difference in number of bytes needed to encode
 * the prevlen if the previous entry changes of size.
 *
 * So if A is the number of bytes used right now to encode the 'prevlen'
 * field.
 *
 * And B is the number of bytes that are needed in order to encode the
 * 'prevlen' if the previous element will be updated to one of size 'len'.
 *
 * Then the function returns B - A
 *
 * So the function returns a positive number if more space is needed,
 * a negative number if less space is needed, or zero if the same space
 * is needed. */
int zipPrevLenByteDiff(unsigned char *p, unsigned int len) {
    unsigned int prevlensize;
    ZIP_DECODE_PREVLENSIZE(p, prevlensize);
    return zipStorePrevEntryLength(NULL, len) - prevlensize;
}

/* Encode the length of the previous entry and write it to "p". Return the
 * number of bytes needed to encode this length if "p" is NULL. */
unsigned int zipStorePrevEntryLength(unsigned char *p, unsigned int len) {
    if (p == NULL) {
        return (len < ZIP_BIG_PREVLEN) ? 1 : sizeof(uint32_t) + 1;
    } else {
        if (len < ZIP_BIG_PREVLEN) {
            p[0] = len;
            return 1;
        } else {
            return zipStorePrevEntryLengthLarge(p,len);
        }
    }
}

/* Encode the length of the previous entry and write it to "p". This only
 * uses the larger encoding (required in __ziplistCascadeUpdate). */
int zipStorePrevEntryLengthLarge(unsigned char *p, unsigned int len) {
    uint32_t u32;
    if (p != NULL) {
        p[0] = ZIP_BIG_PREVLEN;
        u32 = len;
        memcpy(p+1,&u32,sizeof(u32));
        memrev32ifbe(p+1);
    }
    return 1 + sizeof(uint32_t);
}

/* Write the encoding header of the entry in 'p'. If p is NULL it just returns
 * the amount of bytes required to encode such a length. Arguments:
 *
 * 'encoding' is the encoding we are using for the entry. It could be
 * ZIP_INT_* or ZIP_STR_* or between ZIP_INT_IMM_MIN and ZIP_INT_IMM_MAX
 * for single-byte small immediate integers.
 *
 * 'rawlen' is only used for ZIP_STR_* encodings and is the length of the
 * string that this entry represents.
 *
 * The function returns the number of bytes used by the encoding/length
 * header stored in 'p'. */
unsigned int zipStoreEntryEncoding(unsigned char *p, unsigned char encoding, unsigned int rawlen) {
    unsigned char len = 1, buf[5];

    if (ZIP_IS_STR(encoding)) {
        /* Although encoding is given it may not be set for strings,
         * so we determine it here using the raw length. */
        //表达字节数组只有三种, 1个字节,2个字节,5个字节。
        //其中 00 开头表示 encoding 长度只有 1个字节 后面 6位表示长度
        // 00111111
        if (rawlen <= 0x3f) {
            if (!p) return len;
            buf[0] = ZIP_STR_06B | rawlen;
        // 00111111 11111111  这里既然它不是 1个字节的 那么 小于该值就是 2个字节的。
        } else if (rawlen <= 0x3fff) {
            len += 1;
            if (!p) return len;
            buf[0] = ZIP_STR_14B | ((rawlen >> 8) & 0x3f);
            buf[1] = rawlen & 0xff;
        // 前两种都不是,剩下的是五个字节的
        } else {
            len += 4;
            if (!p) return len;
            buf[0] = ZIP_STR_32B;
            buf[1] = (rawlen >> 24) & 0xff;
            buf[2] = (rawlen >> 16) & 0xff;
            buf[3] = (rawlen >> 8) & 0xff;
            buf[4] = rawlen & 0xff;
        }
    } else {
        /* Implies integer encoding, so length is always 1. */
        // 数字只有一个字节的 encoding
        if (!p) return len;
        buf[0] = encoding;
    }

    /* Store this length at p. */
    memcpy(p,buf,len);
    return len;
}

压缩元素

/* Check if string pointed to by 'entry' can be encoded as an integer.
 * Stores the integer value in 'v' and its encoding in 'encoding'. */
// 返回 0 代表 解析为字符串, 1 代表数字
int zipTryEncoding(unsigned char *entry, unsigned int entrylen, long long *v, unsigned char *encoding) {
    long long value;
    // 过长的数字 redis 即转换数字意义不大,长度为0 也没必要转换。
    if (entrylen >= 32 || entrylen == 0) return 0;
    
    // string2ll 尝试将 string数字 转换为 longlong
    // int string2ll(const char *s, size_t slen, long long *value)
    if (string2ll((char*)entry, entrylen,&value)) {
        /* Great, the string can be encoded. Check what's the smallest
         * of our encoding types that can hold this value. */
        if (value >= 0 && value <= 12) {
            // 这里解答了之前的困惑,使用 ZIP_INT_IMM_MIN 加上当前值,解码的时候 减去 ZIP_INT_IMM_MIN 即可。所以和 1 ~ 13 无关
            *encoding = ZIP_INT_IMM_MIN+value;
        } else if (value >= INT8_MIN && value <= INT8_MAX) {
            *encoding = ZIP_INT_8B;
        } else if (value >= INT16_MIN && value <= INT16_MAX) {
            *encoding = ZIP_INT_16B;
        } else if (value >= INT24_MIN && value <= INT24_MAX) {
            *encoding = ZIP_INT_24B;
        } else if (value >= INT32_MIN && value <= INT32_MAX) {
            *encoding = ZIP_INT_32B;
        } else {
            *encoding = ZIP_INT_64B;
        }
        *v = value;
        return 1;
    }
    return 0;
}

笔记:

// 摘自 /src/config.h
// __builtin_expect 是 linux 内含有的方法。所以这里需要宏定义
#if __GNUC__ >= 3
#define likely(x) __builtin_expect(!!(x), 1)
#define unlikely(x) __builtin_expect(!!(x), 0)
#else
#define likely(x) (x)
#define unlikely(x) (x)
#endif
posted @ 2025-06-03 23:06  大俗XD  阅读(14)  评论(0)    收藏  举报