Redis ZSet

Redis ZSet

  • 有序集合类型 (Sorted Set) 相比于集合类型多了一个排序属性 score(分值),对于有序集合 ZSet 来说,每个存储元素相当于有两个值组成的,一个是有序结合的元素值,一个是排序值。有序集合的存储元素值也是不能重复的,但分值是可以重复的。
  • 有序集合是由 ziplist (压缩列表) 或 skiplist (跳跃表) 组成的

Demo

1. 添加成员(带分数)

ZADD leaderboard 100 Alice
ZADD leaderboard 95 Bob
ZADD leaderboard 98 Charlie

执行结果:

127.0.0.1:6379> ZADD leaderboard 100 Alice
(integer) 1
127.0.0.1:6379> ZADD leaderboard 95 Bob
(integer) 1
127.0.0.1:6379> ZADD leaderboard 98 Charlie
(integer) 1

2. 查看成员分数(score)

ZSCORE leaderboard Bob

执行结果:

127.0.0.1:6379> ZSCORE leaderboard Bob
"95"

3. 获取成员排名

  • 默认从小到大排名(分数越小排名越前)
ZRANK leaderboard Bob
  • 从大到小(高分靠前)
ZREVRANK leaderboard Bob

执行结果:

127.0.0.1:6379> ZRANK leaderboard Bob
(integer) 0
127.0.0.1:6379> ZREVRANK leaderboard Bob
(integer) 2

4. 获取某个排名区间内的成员

  • 升序(低分在前)
ZRANGE leaderboard 0 2 WITHSCORES
  • 降序(高分在前)
ZREVRANGE leaderboard 0 2 WITHSCORES

执行结果:

127.0.0.1:6379> ZRANGE leaderboard 0 2 WITHSCORES
1) "Bob"
2) "95"
3) "Charlie"
4) "98"
5) "Alice"
6) "100"
127.0.0.1:6379> ZREVRANGE leaderboard 0 2 WITHSCORES
1) "Alice"
2) "100"
3) "Charlie"
4) "98"
5) "Bob"
6) "95"

5. 获取指定分数范围内成员

ZRANGEBYSCORE leaderboard 90 100
ZREVRANGEBYSCORE leaderboard 100 90

执行结果:

127.0.0.1:6379> ZRANGEBYSCORE leaderboard 90 100
1) "Bob"
2) "Charlie"
3) "Alice"
127.0.0.1:6379> ZREVRANGEBYSCORE leaderboard 100 90
1) "Alice"
2) "Charlie"
3) "Bob"

6. 获取成员总数

ZCARD leaderboard

执行结果:

127.0.0.1:6379> ZCARD leaderboard
(integer) 3

7. 增加成员分数(适合动态更新)

ZINCRBY leaderboard 10 Bob

执行结果:

127.0.0.1:6379> ZINCRBY leaderboard 10 Bob
"105"

8. 删除成员

ZREM leaderboard Alice

执行结果:

127.0.0.1:6379> ZREM leaderboard Alice
(integer) 1

9. 删除分数范围成员

ZREMRANGEBYSCORE leaderboard 0 50

执行结果:

127.0.0.1:6379> ZREMRANGEBYSCORE leaderboard 0 50
(integer) 0

10. 删除排名范围内成员

ZREMRANGEBYRANK leaderboard 0 1

执行结果:

127.0.0.1:6379> ZREMRANGEBYRANK leaderboard 0 1
(integer) 2

数据编码

  1. 跳表(SkipList)
    用于大型有序集合

    127.0.0.1:6379> zadd zmaxleng 2.0 aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
    (integer) 1
    127.0.0.1:6379> object encoding zmaxleng
    "skiplist"
    
  2. 压缩列表(listpack)
    用于小型有序集合

    127.0.0.1:6379> zadd myzset 1 db 2 redis 3 mysql
    (integer) 3
    127.0.0.1:6379> object encoding myzset
    "listpack"
    

编码选择:zsetTypeCreate

zsetTypeCreate

/* Factory method to return a zset.
 *
 * The size hint indicates approximately how many items will be added,
 * and the value len hint indicates the approximate individual size of the added elements,
 * they are used to determine the initial representation.
 *
 * If the hints are not known, and underestimation or 0 is suitable. 
 * We should never pass a negative value because it will convert to a very large unsigned number. */
robj *zsetTypeCreate(size_t size_hint, size_t val_len_hint) {
    if (size_hint <= server.zset_max_listpack_entries &&
        val_len_hint <= server.zset_max_listpack_value)
    {
        return createZsetListpackObject();
    }


    robj *zobj = createZsetObject();
    zset *zs = zobj->ptr;
    dictExpand(zs->dict, size_hint);
    return zobj;
}
if (size_hint <= server.zset_max_listpack_entries &&
    val_len_hint <= server.zset_max_listpack_value)
{
    return createZsetListpackObject();
}
  • 判断是否可以使用 listpack 编码
    • listpack 是一种紧凑的二进制序列格式,用于存储小型有序集合。
    • zset_max_listpack_entries:最大元素数量(如 128)
    • zset_max_listpack_value:最大 value 长度(如 64 字节)
  • 只有当数量和元素长度都足够小,才使用 listpack。
    • 优势:节省内存
    • 适用:少量小数据(如排行榜 TOP10)
robj *zobj = createZsetObject();
zset *zs = zobj->ptr;
dictExpand(zs->dict, size_hint);
return zobj;

使用常规 zset 编码结构

  • createZsetObject() 创建包含:
    • 跳表(zskiplist):按 score 排序,支持范围查询、排序等操作。
    • 字典(dict):快速查找元素是否存在,查找 score。

编码转换:zsetTypeMaybeConvert

/* Check if the existing zset should be converted to another encoding based off the
 * the size hint. */
void zsetTypeMaybeConvert(robj *zobj, size_t size_hint) {
    if (zobj->encoding == OBJ_ENCODING_LISTPACK &&
        size_hint > server.zset_max_listpack_entries)
    {
        zsetConvertAndExpand(zobj, OBJ_ENCODING_SKIPLIST, size_hint);
    }
}

zsetConvertAndExpand

/* Converts a zset to the specified encoding, pre-sizing it for 'cap' elements. */
void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap) {
    zset *zs;
    zskiplistNode *node, *next;
    sds ele;
    double score;

    if (zobj->encoding == encoding) return;
    if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
        unsigned char *zl = zobj->ptr;
        unsigned char *eptr, *sptr;
        unsigned char *vstr;
        unsigned int vlen;
        long long vlong;

        if (encoding != OBJ_ENCODING_SKIPLIST)
            serverPanic("Unknown target encoding");

        zs = zmalloc(sizeof(*zs));
        zs->dict = dictCreate(&zsetDictType);
        zs->zsl = zslCreate();

        /* Presize the dict to avoid rehashing */
        dictExpand(zs->dict, cap);

        eptr = lpSeek(zl,0);
        if (eptr != NULL) {
            sptr = lpNext(zl,eptr);
            serverAssertWithInfo(NULL,zobj,sptr != NULL);
        }

        while (eptr != NULL) {
            score = zzlGetScore(sptr);
            vstr = lpGetValue(eptr,&vlen,&vlong);
            if (vstr == NULL)
                ele = sdsfromlonglong(vlong);
            else
                ele = sdsnewlen((char*)vstr,vlen);

            node = zslInsert(zs->zsl,score,ele);
            serverAssert(dictAdd(zs->dict,ele,&node->score) == DICT_OK);
            zzlNext(zl,&eptr,&sptr);
        }

        zfree(zobj->ptr);
        zobj->ptr = zs;
        zobj->encoding = OBJ_ENCODING_SKIPLIST;
    } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
        unsigned char *zl = lpNew(0);

        if (encoding != OBJ_ENCODING_LISTPACK)
            serverPanic("Unknown target encoding");

        /* Approach similar to zslFree(), since we want to free the skiplist at
         * the same time as creating the listpack. */
        zs = zobj->ptr;
        dictRelease(zs->dict);
        node = zs->zsl->header->level[0].forward;
        zfree(zs->zsl->header);
        zfree(zs->zsl);

        while (node) {
            zl = zzlInsertAt(zl,NULL,node->ele,node->score);
            next = node->level[0].forward;
            zslFreeNode(node);
            node = next;
        }

        zfree(zs);
        zobj->ptr = zl;
        zobj->encoding = OBJ_ENCODING_LISTPACK;
    } else {
        serverPanic("Unknown sorted set encoding");
    }
}
void zsetConvertAndExpand(robj *zobj, int encoding, unsigned long cap)
  • 将一个 zset 从当前编码转换为指定编码(OBJ_ENCODING_LISTPACKOBJ_ENCODING_SKIPLIST)。
  • 可选地预分配容量(用于字典 rehash 优化)。

1. listpack → skiplist(复杂结构)

前置校验
if (encoding != OBJ_ENCODING_SKIPLIST)
    serverPanic("Unknown target encoding");

确保只允许转换到 skiplist

初始化目标结构
zs = zmalloc(sizeof(*zs));
zs->dict = dictCreate(&zsetDictType);
zs->zsl = zslCreate();
dictExpand(zs->dict, cap);
  • 创建一个包含:
    • dict: 元素 -> 分数的映射
    • zskiplist: 支持排序、范围查找
遍历 listpack 并迁移数据
eptr = lpSeek(zl,0);  // 第一个元素(ele)
sptr = lpNext(zl,eptr); // 下一个元素是分数(score)

while (eptr != NULL) {
    score = zzlGetScore(sptr);    // 获取分数
    vstr = lpGetValue(eptr,&vlen,&vlong);  // 获取字符串 or long long 值
    ele = (vstr == NULL) ? sdsfromlonglong(vlong) : sdsnewlen((char*)vstr,vlen);

    node = zslInsert(zs->zsl, score, ele);  // 插入跳表
    dictAdd(zs->dict, ele, &node->score);  // 插入字典

    zzlNext(zl, &eptr, &sptr); // 移动到下一个 pair
}

数据从 listpack 格式解析出来,转为:

  • 跳表节点(排序用)
  • 字典键值对(查找用)
替换原数据结构
zfree(zobj->ptr);
zobj->ptr = zs;
zobj->encoding = OBJ_ENCODING_SKIPLIST;

2. skiplist → listpack(压缩结构)

前置校验

if (encoding != OBJ_ENCODING_LISTPACK)
    serverPanic("Unknown target encoding");

初始化 listpack

unsigned char *zl = lpNew(0);

遍历跳表节点,并插入 listpack

zs = zobj->ptr;
dictRelease(zs->dict);  // 释放 dict
node = zs->zsl->header->level[0].forward;

while (node) {
    zl = zzlInsertAt(zl,NULL,node->ele,node->score);
    next = node->level[0].forward;
    zslFreeNode(node);
    node = next;
}
  • 跳表头释放,遍历节点
  • 每个元素转为 listpack 的 ele/score pair

最终更新对象

zfree(zs);
zobj->ptr = zl;
zobj->encoding = OBJ_ENCODING_LISTPACK;

ZSet数据结构

typedef struct zset {
    dict *dict;
    zskiplist *zsl;
} zset;
  • 哈希表(dict):用于映射元素到分数,提供 O(1) 时间复杂度的元素查找
  • 跳表(zskiplist):用于根据分数排序元素,提供 O(log N) 时间复杂度的有序访问

这种双重数据结构的结合使得有序集合能够同时高效地支持按元素查找和按分数范围查询操作。

跳表实现

Redis 的跳表实现是 William Pugh 原始算法的 C 语言翻译版本:t_zset.c:34-41

 * This skiplist implementation is almost a C translation of the original
 * algorithm described by William Pugh in "Skip Lists: A Probabilistic
 * Alternative to Balanced Trees", modified in three ways:
 * a) this implementation allows for repeated scores.
 * b) the comparison is not just by key (our 'score') but by satellite data.
 * c) there is a back pointer, so it's a doubly linked list with the back
 * pointers being only at "level 1". This allows to traverse the list
 * from tail to head, useful for ZREVRANGE. */

节点结构

zskiplist

typedef struct zskiplist {
    struct zskiplistNode *header, *tail;
    unsigned long length;
    int level;
} zskiplist;
字段名 含义
header 表头节点(注意:header 本身不存值,仅作为起点)
tail 表尾节点,方便从尾部遍历
length 节点总数(不含 header)
level 当前跳表的最大层数(动态变化)

zskiplistNode

/* ZSETs use a specialized version of Skiplists */
typedef struct zskiplistNode {
    sds ele; // 元素值(实际的成员名)
    double score; // 排序用的分数(唯一排序依据)
    struct zskiplistNode *backward; // 后向指针(指向前一个节点)
    struct zskiplistLevel {
        struct zskiplistNode *forward;  // 各层的前向指针
        unsigned long span; // 到下一节点跨越的节点数(用于排名计算)
    } level[];
} zskiplistNode;
字段名 含义
ele 实际的成员值(sds 类型)
score 排序依据,zset 中用于比较
backward 指向前一个节点,用于向后遍历(双向)
level[] 多层前向指针数组,类似索引层
level[i].forward i 层的下一个节点指针
level[i].span 当前节点到第 i 层下一个节点间的跨度(用于计算排名)
表结构图示意(3 层)
level[2]:  H ───────────> A ──────────────> E
level[1]:  H ──> A ──> B ─────> D ──> E
level[0]:  H -> A -> B -> C -> D -> E
            ↑                       ↑
         header                   tail

  • 节点层高是随机生成的(Redis 使用几率为 1/4 的算法生成 1~32 层)
  • 越上层的节点越稀疏,加快搜索(log(N))
  • 每层节点的 span 是当前节点到下一个节点跳过了几个底层节点
    • 这允许 Redis 在 O(log N) 时间内算出某节点的“排名”

为何 Redis 使用跳表?

特性 优点
插入 / 删除 / 查找:O(log N) 比红黑树更简单易实现,性能相近
多层索引 + span 可支持 快速范围查询/排名计算
双向遍历(backward) 支持反向迭代、ZRANGE ... REV 命令

创建跳表

t_zset.c:68-83

/* Create a new skiplist. */
zskiplist *zslCreate(void) {
    int j;
    zskiplist *zsl;

    zsl = zmalloc(sizeof(*zsl));
    zsl->level = 1;
    zsl->length = 0;
    zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL,0,NULL);
    for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
        zsl->header->level[j].forward = NULL;
        zsl->header->level[j].span = 0;
    }
    zsl->header->backward = NULL;
    zsl->tail = NULL;
    return zsl;
}
1. 创建跳表结构体
zskiplist *zslCreate(void)
zsl = zmalloc(sizeof(*zsl));
zsl->level = 1;
zsl->length = 0;
  • 分配跳表结构体内存
  • 初始层数设为 1(最低层)
  • 初始元素个数设为 0
2. 创建头节点
zsl->header = zslCreateNode(ZSKIPLIST_MAXLEVEL, 0, NULL);
  • 跳表头节点不存实际元素(是哨兵节点)
  • 创建最大层级数(通常是 32 层)的头节点
  • 这个节点的 score=0,ele=NULL
3. 初始化头节点的所有层
for (j = 0; j < ZSKIPLIST_MAXLEVEL; j++) {
    zsl->header->level[j].forward = NULL;
    zsl->header->level[j].span = 0;
}
  • 每一层都初始化为:
    • forward = NULL(还没有下一个节点)
    • span = 0(跨度设为0)
4. 初始化头节点的前向指针与尾指针
zsl->header->backward = NULL;
zsl->tail = NULL;
  • 跳表头节点没有前驱
  • 整个跳表还没有尾节点

跳表结点层数

t_zset.c:106-116

/* Returns a random level for the new skiplist node we are going to create.
 * The return value of this function is between 1 and ZSKIPLIST_MAXLEVEL
 * (both inclusive), with a powerlaw-alike distribution where higher
 * levels are less likely to be returned. */
int zslRandomLevel(void) {
    static const int threshold = ZSKIPLIST_P*RAND_MAX;
    int level = 1;
    while (random() < threshold)
        level += 1;
    return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
}

用于生成新节点的层数的核心函数,它控制跳表节点的“高度”分布(即每个节点有几层 forward 指针)。

static const int threshold = ZSKIPLIST_P * RAND_MAX;
  • ZSKIPLIST_P 是一个概率因子,默认值是 0.25
  • 乘以 RAND_MAX 得到随机数阈值
int level = 1;
while (random() < threshold)
    level += 1;
  • 从 level = 1 开始(基础层)

  • 每次 random() 返回一个小于阈值的随机数,就提升一层

  • 提升层数的概率是 P 的幂次方,即:

    • level 1 概率:1
    • level 2 概率:P(0.25)
    • level 3 概率:P^2(0.0625)
    • ...
  • 所以 高层节点越来越稀疏

return (level<ZSKIPLIST_MAXLEVEL) ? level : ZSKIPLIST_MAXLEVEL;
  • 最大不超过 ZSKIPLIST_MAXLEVEL(默认是 32)

核心操作实现

添加元素

支持所有 ZADD 相关语义(如 NX, XX, GT, LT, INCR),并处理两种内部编码方式(listpackskiplist
zsetAdd

int zsetAdd(robj *zobj, double score, sds ele, int in_flags, int *out_flags, double *newscore) {
    /* Turn options into simple to check vars. */
    int incr = (in_flags & ZADD_IN_INCR) != 0;
    int nx = (in_flags & ZADD_IN_NX) != 0;
    int xx = (in_flags & ZADD_IN_XX) != 0;
    int gt = (in_flags & ZADD_IN_GT) != 0;
    int lt = (in_flags & ZADD_IN_LT) != 0;
    *out_flags = 0; /* We'll return our response flags. */
    double curscore;

    /* NaN as input is an error regardless of all the other parameters. */
    if (isnan(score)) {
        *out_flags = ZADD_OUT_NAN;
        return 0;
    }

    /* Update the sorted set according to its encoding. */
    if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
        unsigned char *eptr;

        if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
            /* NX? Return, same element already exists. */
            if (nx) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            /* Prepare the score for the increment if needed. */
            if (incr) {
                score += curscore;
                if (isnan(score)) {
                    *out_flags |= ZADD_OUT_NAN;
                    return 0;
                }
            }

            /* GT/LT? Only update if score is greater/less than current. */
            if ((lt && score >= curscore) || (gt && score <= curscore)) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            if (newscore) *newscore = score;

            /* Remove and re-insert when score changed. */
            if (score != curscore) {
                zobj->ptr = zzlDelete(zobj->ptr,eptr);
                zobj->ptr = zzlInsert(zobj->ptr,ele,score);
                *out_flags |= ZADD_OUT_UPDATED;
            }
            return 1;
        } else if (!xx) {
            /* check if the element is too large or the list
             * becomes too long *before* executing zzlInsert. */
            if (zzlLength(zobj->ptr)+1 > server.zset_max_listpack_entries ||
                sdslen(ele) > server.zset_max_listpack_value ||
                !lpSafeToAdd(zobj->ptr, sdslen(ele)))
            {
                zsetConvertAndExpand(zobj, OBJ_ENCODING_SKIPLIST, zsetLength(zobj) + 1);
            } else {
                zobj->ptr = zzlInsert(zobj->ptr,ele,score);
                if (newscore) *newscore = score;
                *out_flags |= ZADD_OUT_ADDED;
                return 1;
            }
        } else {
            *out_flags |= ZADD_OUT_NOP;
            return 1;
        }
    }

    /* Note that the above block handling listpack would have either returned or
     * converted the key to skiplist. */
    if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = zobj->ptr;
        zskiplistNode *znode;
        dictEntry *de;

        de = dictFind(zs->dict,ele);
        if (de != NULL) {
            /* NX? Return, same element already exists. */
            if (nx) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            curscore = *(double*)dictGetVal(de);

            /* Prepare the score for the increment if needed. */
            if (incr) {
                score += curscore;
                if (isnan(score)) {
                    *out_flags |= ZADD_OUT_NAN;
                    return 0;
                }
            }

            /* GT/LT? Only update if score is greater/less than current. */
            if ((lt && score >= curscore) || (gt && score <= curscore)) {
                *out_flags |= ZADD_OUT_NOP;
                return 1;
            }

            if (newscore) *newscore = score;

            /* Remove and re-insert when score changes. */
            if (score != curscore) {
                znode = zslUpdateScore(zs->zsl,curscore,ele,score);
                /* Note that we did not removed the original element from
                 * the hash table representing the sorted set, so we just
                 * update the score. */
                dictSetVal(zs->dict, de, &znode->score); /* Update score ptr. */
                *out_flags |= ZADD_OUT_UPDATED;
            }
            return 1;
        } else if (!xx) {
            ele = sdsdup(ele);
            znode = zslInsert(zs->zsl,score,ele);
            serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
            *out_flags |= ZADD_OUT_ADDED;
            if (newscore) *newscore = score;
            return 1;
        } else {
            *out_flags |= ZADD_OUT_NOP;
            return 1;
        }
    } else {
        serverPanic("Unknown sorted set encoding");
    }
    return 0; /* Never reached. */
}

1. 参数解析与预处理

int incr = (in_flags & ZADD_IN_INCR) != 0;
int nx = (in_flags & ZADD_IN_NX) != 0;
int xx = (in_flags & ZADD_IN_XX) != 0;
int gt = (in_flags & ZADD_IN_GT) != 0;
int lt = (in_flags & ZADD_IN_LT) != 0;
*out_flags = 0;

提取用户设置的操作语义:

  • NX: 如果元素存在则不更新
  • XX: 如果元素不存在则不添加
  • GT: 只在新分数大于旧分数时才更新
  • LT: 只在新分数小于旧分数时才更新
  • INCR: 分数为累加操作

校验分数是否合法(不能是 NaN)

if (isnan(score)) {
    *out_flags = ZADD_OUT_NAN;
    return 0;
}

2. 编码为 listpack 的处理

if (zobj->encoding == OBJ_ENCODING_LISTPACK) {

元素已存在

eptr = zzlFind(zobj->ptr, ele, &curscore);
if ((eptr = zzlFind(zobj->ptr,ele,&curscore)) != NULL) {
    /* NX? Return, same element already exists. */
    if (nx) {
        *out_flags |= ZADD_OUT_NOP;
        return 1;
    }

    /* Prepare the score for the increment if needed. */
    if (incr) {
        score += curscore;
        if (isnan(score)) {
            *out_flags |= ZADD_OUT_NAN;
            return 0;
        }
    }

    /* GT/LT? Only update if score is greater/less than current. */
    if ((lt && score >= curscore) || (gt && score <= curscore)) {
        *out_flags |= ZADD_OUT_NOP;
        return 1;
    }

    if (newscore) *newscore = score;

    /* Remove and re-insert when score changed. */
    if (score != curscore) {
        zobj->ptr = zzlDelete(zobj->ptr,eptr);
        zobj->ptr = zzlInsert(zobj->ptr,ele,score);
        *out_flags |= ZADD_OUT_UPDATED;
    }
    return 1;
} 
  • 如果设置了 NX → 跳过操作
  • 如果设置了 INCR → 加上旧分数
  • 如果设置了 GT/LT → 判断是否跳过
  • 如果分数变了 → 删除原项,插入新项(保持有序)

元素不存在

    } else if (!xx) {
        /* check if the element is too large or the list
            * becomes too long *before* executing zzlInsert. */
        if (zzlLength(zobj->ptr)+1 > server.zset_max_listpack_entries ||
            sdslen(ele) > server.zset_max_listpack_value ||
            !lpSafeToAdd(zobj->ptr, sdslen(ele)))
        {
            zsetConvertAndExpand(zobj, OBJ_ENCODING_SKIPLIST, zsetLength(zobj) + 1);
        } else {
            zobj->ptr = zzlInsert(zobj->ptr,ele,score);
            if (newscore) *newscore = score;
            *out_flags |= ZADD_OUT_ADDED;
            return 1;
        }
    } else {
        *out_flags |= ZADD_OUT_NOP;
        return 1;
    }
}
  • 如果开启 XX → 不插入
  • 如果 listpack 过长或元素过大 → 转为 skiplist
  • 否则,直接 zzlInsert 插入元素

3. 编码为 skiplist 的处理

if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
    zset *zs = zobj->ptr;

元素已存在

de = dictFind(zs->dict, ele);
if (de != NULL) {
    /* NX? Return, same element already exists. */
    if (nx) {
        *out_flags |= ZADD_OUT_NOP;
        return 1;
    }

    curscore = *(double*)dictGetVal(de);

    /* Prepare the score for the increment if needed. */
    if (incr) {
        score += curscore;
        if (isnan(score)) {
            *out_flags |= ZADD_OUT_NAN;
            return 0;
        }
    }

    /* GT/LT? Only update if score is greater/less than current. */
    if ((lt && score >= curscore) || (gt && score <= curscore)) {
        *out_flags |= ZADD_OUT_NOP;
        return 1;
    }

    if (newscore) *newscore = score;

    /* Remove and re-insert when score changes. */
    if (score != curscore) {
        znode = zslUpdateScore(zs->zsl,curscore,ele,score);
        /* Note that we did not removed the original element from
            * the hash table representing the sorted set, so we just
            * update the score. */
        dictSetVal(zs->dict, de, &znode->score); /* Update score ptr. */
        *out_flags |= ZADD_OUT_UPDATED;
    }
    return 1;
}
  • 类似 listpack 的流程,但分为:
    • 更新 dict 中的分数
    • 调用 zslUpdateScore 重建跳表结构(位置可能变)

元素不存在

 else if (!xx) {
    ele = sdsdup(ele);
    znode = zslInsert(zs->zsl,score,ele);
    serverAssert(dictAdd(zs->dict,ele,&znode->score) == DICT_OK);
    *out_flags |= ZADD_OUT_ADDED;
    if (newscore) *newscore = score;
    return 1;
}
  • 使用 zslInsert 插入跳表
  • 使用 dictAdd 插入字典

删除元素

genericZpopCommand

/* This command implements the generic zpop operation, used by:
 * ZPOPMIN, ZPOPMAX, BZPOPMIN, BZPOPMAX and ZMPOP. This function is also used
 * inside blocked.c in the unblocking stage of BZPOPMIN, BZPOPMAX and BZMPOP.
 *
 * If 'emitkey' is true also the key name is emitted, useful for the blocking
 * behavior of BZPOP[MIN|MAX], since we can block into multiple keys.
 * Or in ZMPOP/BZMPOP, because we also can take multiple keys.
 *
 * 'count' is the number of elements requested to pop, or -1 for plain single pop.
 *
 * 'use_nested_array' when false it generates a flat array (with or without key name).
 * When true, it generates a nested 2 level array of field + score pairs, or 3 level when emitkey is set.
 *
 * 'reply_nil_when_empty' when true we reply a NIL if we are not able to pop up any elements.
 * Like in ZMPOP/BZMPOP we reply with a structured nested array containing key name
 * and member + score pairs. In these commands, we reply with null when we have no result.
 * Otherwise in ZPOPMIN/ZPOPMAX we reply an empty array by default.
 *
 * 'deleted' is an optional output argument to get an indication
 * if the key got deleted by this function.
 * */
void genericZpopCommand(client *c, robj **keyv, int keyc, int where, int emitkey,
                        long count, int use_nested_array, int reply_nil_when_empty, int *deleted) {
    int idx;
    robj *key = NULL;
    robj *zobj = NULL;
    sds ele;
    double score;

    if (deleted) *deleted = 0;

    /* Check type and break on the first error, otherwise identify candidate. */
    idx = 0;
    while (idx < keyc) {
        key = keyv[idx++];
        zobj = lookupKeyWrite(c->db,key);
        if (!zobj) continue;
        if (checkType(c,zobj,OBJ_ZSET)) return;
        break;
    }

    /* No candidate for zpopping, return empty. */
    if (!zobj) {
        if (reply_nil_when_empty) {
            addReplyNullArray(c);
        } else {
            addReply(c,shared.emptyarray);
        }
        return;
    }

    if (count == 0) {
        /* ZPOPMIN/ZPOPMAX with count 0. */
        addReply(c, shared.emptyarray);
        return;
    }

    long result_count = 0;

    /* When count is -1, we need to correct it to 1 for plain single pop. */
    if (count == -1) count = 1;

    long llen = zsetLength(zobj);
    long rangelen = (count > llen) ? llen : count;

    if (!use_nested_array && !emitkey) {
        /* ZPOPMIN/ZPOPMAX with or without COUNT option in RESP2. */
        addReplyArrayLen(c, rangelen * 2);
    } else if (use_nested_array && !emitkey) {
        /* ZPOPMIN/ZPOPMAX with COUNT option in RESP3. */
        addReplyArrayLen(c, rangelen);
    } else if (!use_nested_array && emitkey) {
        /* BZPOPMIN/BZPOPMAX in RESP2 and RESP3. */
        addReplyArrayLen(c, rangelen * 2 + 1);
        addReplyBulk(c, key);
    } else if (use_nested_array && emitkey) {
        /* ZMPOP/BZMPOP in RESP2 and RESP3. */
        addReplyArrayLen(c, 2);
        addReplyBulk(c, key);
        addReplyArrayLen(c, rangelen);
    }

    /* Remove the element. */
    do {
        if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
            unsigned char *zl = zobj->ptr;
            unsigned char *eptr, *sptr;
            unsigned char *vstr;
            unsigned int vlen;
            long long vlong;

            /* Get the first or last element in the sorted set. */
            eptr = lpSeek(zl,where == ZSET_MAX ? -2 : 0);
            serverAssertWithInfo(c,zobj,eptr != NULL);
            vstr = lpGetValue(eptr,&vlen,&vlong);
            if (vstr == NULL)
                ele = sdsfromlonglong(vlong);
            else
                ele = sdsnewlen(vstr,vlen);

            /* Get the score. */
            sptr = lpNext(zl,eptr);
            serverAssertWithInfo(c,zobj,sptr != NULL);
            score = zzlGetScore(sptr);
        } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
            zset *zs = zobj->ptr;
            zskiplist *zsl = zs->zsl;
            zskiplistNode *zln;

            /* Get the first or last element in the sorted set. */
            zln = (where == ZSET_MAX ? zsl->tail :
                                       zsl->header->level[0].forward);

            /* There must be an element in the sorted set. */
            serverAssertWithInfo(c,zobj,zln != NULL);
            ele = sdsdup(zln->ele);
            score = zln->score;
        } else {
            serverPanic("Unknown sorted set encoding");
        }

        serverAssertWithInfo(c,zobj,zsetDel(zobj,ele));
        server.dirty++;

        if (result_count == 0) { /* Do this only for the first iteration. */
            char *events[2] = {"zpopmin","zpopmax"};
            notifyKeyspaceEvent(NOTIFY_ZSET,events[where],key,c->db->id);
        }

        if (use_nested_array) {
            addReplyArrayLen(c,2);
        }
        addReplyBulkCBuffer(c,ele,sdslen(ele));
        addReplyDouble(c,score);
        sdsfree(ele);
        ++result_count;
    } while(--rangelen);

    /* Remove the key, if indeed needed. */
    if (zsetLength(zobj) == 0) {
        if (deleted) *deleted = 1;

        dbDelete(c->db,key);
        notifyKeyspaceEvent(NOTIFY_GENERIC,"del",key,c->db->id);
        /* No need updateKeysizesHist(). dbDelete() done it already. */
    } else {
        updateKeysizesHist(c->db, getKeySlot(key->ptr), OBJ_ZSET, llen, llen - result_count);
    }
    signalModifiedKey(c,c->db,key);

    if (c->cmd->proc == zmpopCommand) {
        /* Always replicate it as ZPOP[MIN|MAX] with COUNT option instead of ZMPOP. */
        robj *count_obj = createStringObjectFromLongLong((count > llen) ? llen : count);
        rewriteClientCommandVector(c, 3,
                                   (where == ZSET_MAX) ? shared.zpopmax : shared.zpopmin,
                                   key, count_obj);
        decrRefCount(count_obj);
    }
}

zsetDel

/* Delete the element 'ele' from the sorted set, returning 1 if the element
 * existed and was deleted, 0 otherwise (the element was not there). */
int zsetDel(robj *zobj, sds ele) {
    if (zobj->encoding == OBJ_ENCODING_LISTPACK) {
        unsigned char *eptr;


        if ((eptr = zzlFind(zobj->ptr,ele,NULL)) != NULL) {
            zobj->ptr = zzlDelete(zobj->ptr,eptr);
            return 1;
        }
    } else if (zobj->encoding == OBJ_ENCODING_SKIPLIST) {
        zset *zs = zobj->ptr;
        if (zsetRemoveFromSkiplist(zs, ele)) {
            return 1;
        }
    } else {
        serverPanic("Unknown sorted set encoding");
    }
    return 0; /* No such element found. */
}

范围查询

zrangeGenericCommand

/**
 * This function handles ZRANGE and ZRANGESTORE, and also the deprecated
 * Z[REV]RANGE[BYSCORE|BYLEX] commands.
 *
 * The simple ZRANGE and ZRANGESTORE can take _AUTO in rangetype and direction,
 * other command pass explicit value.
 *
 * The argc_start points to the src key argument, so following syntax is like:
 * <src> <min> <max> [BYSCORE | BYLEX] [REV] [WITHSCORES] [LIMIT offset count]
 */
void zrangeGenericCommand(zrange_result_handler *handler, int argc_start, int store,
                          zrange_type rangetype, zrange_direction direction)
{
    client *c = handler->client;
    robj *key = c->argv[argc_start];
    robj *zobj;
    zrangespec range;
    zlexrangespec lexrange;
    int minidx = argc_start + 1;
    int maxidx = argc_start + 2;

    /* Options common to all */
    long opt_start = 0;
    long opt_end = 0;
    int opt_withscores = 0;
    long opt_offset = 0;
    long opt_limit = -1;

    /* Step 1: Skip the <src> <min> <max> args and parse remaining optional arguments. */
    for (int j=argc_start + 3; j < c->argc; j++) {
        int leftargs = c->argc-j-1;
        if (!store && !strcasecmp(c->argv[j]->ptr,"withscores")) {
            opt_withscores = 1;
        } else if (!strcasecmp(c->argv[j]->ptr,"limit") && leftargs >= 2) {
            if ((getLongFromObjectOrReply(c, c->argv[j+1], &opt_offset, NULL) != C_OK) ||
                (getLongFromObjectOrReply(c, c->argv[j+2], &opt_limit, NULL) != C_OK))
            {
                return;
            }
            j += 2;
        } else if (direction == ZRANGE_DIRECTION_AUTO &&
                   !strcasecmp(c->argv[j]->ptr,"rev"))
        {
            direction = ZRANGE_DIRECTION_REVERSE;
        } else if (rangetype == ZRANGE_AUTO &&
                   !strcasecmp(c->argv[j]->ptr,"bylex"))
        {
            rangetype = ZRANGE_LEX;
        } else if (rangetype == ZRANGE_AUTO &&
                   !strcasecmp(c->argv[j]->ptr,"byscore"))
        {
            rangetype = ZRANGE_SCORE;
        } else {
            addReplyErrorObject(c,shared.syntaxerr);
            return;
        }
    }

    /* Use defaults if not overridden by arguments. */
    if (direction == ZRANGE_DIRECTION_AUTO)
        direction = ZRANGE_DIRECTION_FORWARD;
    if (rangetype == ZRANGE_AUTO)
        rangetype = ZRANGE_RANK;

    /* Check for conflicting arguments. */
    if (opt_limit != -1 && rangetype == ZRANGE_RANK) {
        addReplyError(c,"syntax error, LIMIT is only supported in combination with either BYSCORE or BYLEX");
        return;
    }
    if (opt_withscores && rangetype == ZRANGE_LEX) {
        addReplyError(c,"syntax error, WITHSCORES not supported in combination with BYLEX");
        return;
    }

    if (direction == ZRANGE_DIRECTION_REVERSE &&
        ((ZRANGE_SCORE == rangetype) || (ZRANGE_LEX == rangetype)))
    {
        /* Range is given as [max,min] */
        int tmp = maxidx;
        maxidx = minidx;
        minidx = tmp;
    }

    /* Step 2: Parse the range. */
    switch (rangetype) {
    case ZRANGE_AUTO:
    case ZRANGE_RANK:
        /* Z[REV]RANGE, ZRANGESTORE [REV]RANGE */
        if ((getLongFromObjectOrReply(c, c->argv[minidx], &opt_start,NULL) != C_OK) ||
            (getLongFromObjectOrReply(c, c->argv[maxidx], &opt_end,NULL) != C_OK))
        {
            return;
        }
        break;

    case ZRANGE_SCORE:
        /* Z[REV]RANGEBYSCORE, ZRANGESTORE [REV]RANGEBYSCORE */
        if (zslParseRange(c->argv[minidx], c->argv[maxidx], &range) != C_OK) {
            addReplyError(c, "min or max is not a float");
            return;
        }
        break;

    case ZRANGE_LEX:
        /* Z[REV]RANGEBYLEX, ZRANGESTORE [REV]RANGEBYLEX */
        if (zslParseLexRange(c->argv[minidx], c->argv[maxidx], &lexrange) != C_OK) {
            addReplyError(c, "min or max not valid string range item");
            return;
        }
        break;
    }

    if (opt_withscores || store) {
        zrangeResultHandlerScoreEmissionEnable(handler);
    }

    /* Step 3: Lookup the key and get the range. */
    zobj = lookupKeyRead(c->db, key);
    if (zobj == NULL) {
        if (store) {
            handler->beginResultEmission(handler, -1);
            handler->finalizeResultEmission(handler, 0);
        } else {
            addReply(c, shared.emptyarray);
        }
        goto cleanup;
    }

    if (checkType(c,zobj,OBJ_ZSET)) goto cleanup;

    /* Step 4: Pass this to the command-specific handler. */
    switch (rangetype) {
    case ZRANGE_AUTO:
    case ZRANGE_RANK:
        genericZrangebyrankCommand(handler, zobj, opt_start, opt_end,
            opt_withscores || store, direction == ZRANGE_DIRECTION_REVERSE);
        break;

    case ZRANGE_SCORE:
        genericZrangebyscoreCommand(handler, &range, zobj, opt_offset,
            opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
        break;

    case ZRANGE_LEX:
        genericZrangebylexCommand(handler, &lexrange, zobj, opt_withscores || store,
            opt_offset, opt_limit, direction == ZRANGE_DIRECTION_REVERSE);
        break;
    }

    /* Instead of returning here, we'll just fall-through the clean-up. */

cleanup:

    if (rangetype == ZRANGE_LEX) {
        zslFreeLexRange(&lexrange);
    }
}

附录

  1. 官方文档
  2. 菜鸟教程-Sorted Set
posted @ 2025-05-01 03:05  Eiffelzero  阅读(183)  评论(0)    收藏  举报