redis源码之集群(六)

集群

集群初始化

// cluster.c  初始化集群配置
void clusterInit(void) {
    int saveconf = 0;
    server.cluster = zmalloc(sizeof(clusterState));
    ...
    for (int i = 0; i < CLUSTERMSG_TYPE_COUNT; i++) {
        server.cluster->stats_bus_messages_sent[i] = 0;
        server.cluster->stats_bus_messages_received[i] = 0;
    }
    server.cluster->stats_pfail_nodes = 0;
    memset(server.cluster->slots,0, sizeof(server.cluster->slots));
    clusterCloseAllSlots();

    /* Lock the cluster config file to make sure every node uses
     * its own nodes.conf. */
    server.cluster_config_file_lock_fd = -1;
    if (clusterLockConfig(server.cluster_configfile) == C_ERR)
        exit(1);

    /* Load or create a new nodes configuration. */
	   // 加载集群配置文件node.conf, 建立name->node的映射,slots->node的映射
    if (clusterLoadConfig(server.cluster_configfile) == C_ERR) {
        /* No configuration found. We will just use the random name provided
         * by the createClusterNode() function. */
        myself = server.cluster->myself =
            createClusterNode(NULL,CLUSTER_NODE_MYSELF|CLUSTER_NODE_MASTER);
        serverLog(LL_NOTICE,"No cluster configuration found, I'm %.40s",
            myself->name);
        clusterAddNode(myself);
        saveconf = 1;
    }
    if (saveconf) clusterSaveConfigOrDie(1);

    /* We need a listening TCP port for our cluster messaging needs. */
    server.cfd.count = 0;
    /* Port sanity check II
     * The other handshake port check is triggered too late to stop
     * us from trying to use a too-high cluster port number. */
	   // 建立端口的监听
    int port = server.tls_cluster ? server.tls_port : server.port;
    if (port > (65535-CLUSTER_PORT_INCR)) {
        serverLog(LL_WARNING, "Redis port number too high. "
                   "Cluster communication port is 10,000 port "
                   "numbers higher than your Redis port. "
                   "Your Redis port number must be 55535 or less.");
        exit(1);
    }
	   // 启动集群端口监听
    if (listenToPort(port+CLUSTER_PORT_INCR, &server.cfd) == C_ERR) {
        /* Note: the following log text is matched by the test suite. */
        serverLog(LL_WARNING, "Failed listening on port %u (cluster), aborting.", port);
        exit(1);
    }
	   // 
    if (createSocketAcceptHandler(&server.cfd, clusterAcceptHandler) != C_OK) {
        serverPanic("Unrecoverable error creating Redis Cluster socket accept handler.");
    }

    /* The slots -> keys map is a radix tree. Initialize it here. */
    server.cluster->slots_to_keys = raxNew();
    memset(server.cluster->slots_keys_count,0,
           sizeof(server.cluster->slots_keys_count));

    /* Set myself->port/cport/pport to my listening ports, we'll just need to
     * discover the IP address via MEET messages. */
    deriveAnnouncedPorts(&myself->port, &myself->pport, &myself->cport);

    server.cluster->mf_end = 0;
    server.cluster->mf_slave = NULL;
    resetManualFailover();
    clusterUpdateMyselfFlags();
}

定时任务

void clusterCron(void) {
    dictIterator *di;
    dictEntry *de;
    int update_state = 0;
    int orphaned_masters; /* How many masters there are without ok slaves. */
    int max_slaves; /* Max number of ok slaves for a single master. */
    int this_slaves; /* Number of ok slaves for our master (if we are slave). */
    mstime_t min_pong = 0, now = mstime();
    clusterNode *min_pong_node = NULL;
    static unsigned long long iteration = 0;
    mstime_t handshake_timeout;

    iteration++; /* Number of times this function was called so far. */

    /* We want to take myself->ip in sync with the cluster-announce-ip option.
     * The option can be set at runtime via CONFIG SET, so we periodically check
     * if the option changed to reflect this into myself->ip. */
	 	 // config set可能动态调整配置,需要检查
	   // 检查ip是否发生变化
    {
        // 静态变量,记录上次执行的时候的server.cluster_announce_ip
        static char *prev_ip = NULL;
        // 获取当前cluster_announce_ip
        // config set命令可以动态修改cluster_announce_ip
        char *curr_ip = server.cluster_announce_ip;
        int changed = 0;

        if (prev_ip == NULL && curr_ip != NULL) changed = 1;
        else if (prev_ip != NULL && curr_ip == NULL) changed = 1;
        else if (prev_ip && curr_ip && strcmp(prev_ip,curr_ip)) changed = 1;
        // 如果ip发生了变化
        if (changed) {
            if (prev_ip) zfree(prev_ip);
            prev_ip = curr_ip;
                        // 如果当前的server.cluster_announce_ip不为空
                        // 拷贝server.cluster_announce_ip到myself->ip
                        // 所以myself->ip的更新比server.cluster_announce_ip慢
            if (curr_ip) {
                /* We always take a copy of the previous IP address, by
                 * duplicating the string. This way later we can check if
                 * the address really changed. */
                prev_ip = zstrdup(prev_ip);
                strncpy(myself->ip,server.cluster_announce_ip,NET_IP_STR_LEN);
                myself->ip[NET_IP_STR_LEN-1] = '\0';
            } else {
                myself->ip[0] = '\0'; /* Force autodetection. */
            }
        }
    }

    /* The handshake timeout is the time after which a handshake node that was
     * not turned into a normal node is removed from the nodes. Usually it is
     * just the NODE_TIMEOUT value, but when NODE_TIMEOUT is too small we use
     * the value of 1 second. */
    // 握手时间最小为1秒
    handshake_timeout = server.cluster_node_timeout;
    if (handshake_timeout < 1000) handshake_timeout = 1000;

    /* Update myself flags. */
    // 检查是否调整了nofailover
    // 如果调整了就要检查更新集群状态和保存配置到配置文件
    clusterUpdateMyselfFlags();

    /* Check if we have disconnected nodes and re-establish the connection.
     * Also update a few stats while we are here, that can be used to make
     * better decisions in other part of the code. */
    di = dictGetSafeIterator(server.cluster->nodes);
    server.cluster->stats_pfail_nodes = 0;
    // 遍历cluster->nodes, 创建本地节点到其他节点的连接,记录在node->link中
    while((de = dictNext(di)) != NULL) {
        clusterNode *node = dictGetVal(de);

        /* Not interested in reconnecting the link with myself or nodes
         * for which we have no address. */
                // 跳过当前节点和没有设置ip的节点
        if (node->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR)) continue;
                // 如果节点的状态是可疑下线
        if (node->flags & CLUSTER_NODE_PFAIL)
            server.cluster->stats_pfail_nodes++;

        /* A Node in HANDSHAKE state has a limited lifespan equal to the
         * configured node timeout. */
        // 如果节点是handshake状态,且当前时间和node->ctime的时间超过了handshake_timeout
        if (nodeInHandshake(node) && now - node->ctime > handshake_timeout) {
            // 从集群删除节点node
            clusterDelNode(node);
            continue;
        }

        if (node->link == NULL) {
            clusterLink *link = createClusterLink(node);
            // 创建一个空的连接对象
            link->conn = server.tls_cluster ? connCreateTLS() : connCreateSocket();
            connSetPrivateData(link->conn, link);
            // 创建到node的连接
            if (connConnect(link->conn, node->ip, node->cport, NET_FIRST_BIND_ADDR,
                        clusterLinkConnectHandler) == -1) {
                /* We got a synchronous error from connect before
                 * clusterSendPing() had a chance to be called.
                 * If node->ping_sent is zero, failure detection can't work,
                 * so we claim we actually sent a ping now (that will
                 * be really sent as soon as the link is obtained). */
                // 如果创建连接失败
                if (node->ping_sent == 0) node->ping_sent = mstime();
                serverLog(LL_DEBUG, "Unable to connect to "
                    "Cluster Node [%s]:%d -> %s", node->ip,
                    node->cport, server.neterr);

                freeClusterLink(link);
                continue;
            }
            // 记录了本地节点到node的连接
            node->link = link;
        }
    }
    dictReleaseIterator(di);
	    /* Ping some random node 1 time every 10 iterations, so that we usually ping
     * one random node every second. */
    // 每十次执行一次下面的块, 也就是每秒执行一次,从随机抽取的5个节点里面选择最长没有通信的节点发送ping
    if (!(iteration % 10)) {
        int j;

        /* Check a few random nodes and ping the one with the oldest
         * pong_received time. */
        for (j = 0; j < 5; j++) {
            de = dictGetRandomKey(server.cluster->nodes);
            clusterNode *this = dictGetVal(de);

            /* Don't ping nodes disconnected or with a ping currently active. */
            // 与this节点的连接断了或者已经给this发送了ping
            if (this->link == NULL || this->ping_sent != 0) continue;
            //  如果this节点是当前节点自身,也不会给自己发送ping
            if (this->flags & (CLUSTER_NODE_MYSELF|CLUSTER_NODE_HANDSHAKE))
                continue;
            if (min_pong_node == NULL || min_pong > this->pong_received) {
                min_pong_node = this;
                min_pong = this->pong_received;
            }
        }
        if (min_pong_node) {
            serverLog(LL_DEBUG,"Pinging node %.40s", min_pong_node->name);
             // 给min_pong_node发送ping
            clusterSendPing(min_pong_node->link, CLUSTERMSG_TYPE_PING);
        }
    }

    /* Iterate nodes to check if we need to flag something as failing.
     * This loop is also responsible to:
     * 1) Check if there are orphaned masters (masters without non failing
     *    slaves).
     * 2) Count the max number of non failing slaves for a single master.
     * 3) Count the number of slaves for our master, if we are a slave. */
    orphaned_masters = 0;
    max_slaves = 0;
    this_slaves = 0;
    di = dictGetSafeIterator(server.cluster->nodes);
    // 遍历server.cluster->node, 检查myself与nodes的连接状态
    while((de = dictNext(di)) != NULL) {
        clusterNode *node = dictGetVal(de);
        now = mstime(); /* Use an updated time at every iteration. */
        // 跳过节点自身,和没有ip的节点,和正在进行handshake的节点
        if (node->flags &
            (CLUSTER_NODE_MYSELF|CLUSTER_NODE_NOADDR|CLUSTER_NODE_HANDSHAKE))
                continue;

        /* Orphaned master check, useful only if the current instance
         * is a slave that may migrate to another master. */
                // 如果当前节点是从节点, node是master节点,node的状态不是FAIL
        if (nodeIsSlave(myself) && nodeIsMaster(node) && !nodeFailed(node)) {
                        // node的从节点中不是FAIL状态的从节点数量  遍历node->slaves
            int okslaves = clusterCountNonFailingSlaves(node);

            /* A master is orphaned if it is serving a non-zero number of
             * slots, have no working slaves, but used to have at least one
             * slave, or failed over a master that used to have slaves. */
            // 如果node服务部分slots, 而且没有正常的从节点, 而且node的标记是表示存在从节点
            // 应该是发生了node的从节点转为故障状态了
            if (okslaves == 0 && node->numslots > 0 &&
                node->flags & CLUSTER_NODE_MIGRATE_TO)
            {
                orphaned_masters++;
            }
            if (okslaves > max_slaves) max_slaves = okslaves;
            // 如果myself是node的从节点
            // this_slaves记录myself的主节点的从节点数量
            if (nodeIsSlave(myself) && myself->slaveof == node)
                this_slaves = okslaves;
        }

        /* If we are not receiving any data for more than half the cluster
         * timeout, reconnect the link: maybe there is a connection
         * issue even if the node is alive. */
        // 从发送ping到现在的时间
        mstime_t ping_delay = now - node->ping_sent;
        // 上次接收到数据的时间
        mstime_t data_delay = now - node->data_received;
        // 如果ping超时了,那就断开与node的连接
        // 如果连接的创建时间大于cluster_node_timeout, 发送了ping
        if (node->link && /* is connected */
            now - node->link->ctime >
            server.cluster_node_timeout && /* was not already reconnected */
            node->ping_sent && /* we already sent a ping */
            /* and we are waiting for the pong more than timeout/2 */
            ping_delay > server.cluster_node_timeout/2 &&
            /* and in such interval we are not seeing any traffic at all. */
            data_delay > server.cluster_node_timeout/2)
        {
            /* Disconnect the link, it will be reconnected automatically. */
            freeClusterLink(node->link);
        }

        /* If we have currently no active ping in this instance, and the
         * received PONG is older than half the cluster timeout, send
         * a new ping now, to ensure all the nodes are pinged without
         * a too big delay. */
        // 如果没有收到到node的ping, 而且太长时间没有收到node新的pong了, 就给node发送ping
        if (node->link &&node->ping_sent == 0 &&(now - node->pong_received) > server.cluster_node_timeout/2)
        {
            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
            continue;
        }

        /* If we are a master and one of the slaves requested a manual
         * failover, ping it continuously. */
        // 如果myself节点是主节点,然后node是myself的从节点并且启动了手动切换, 持续ping node
        if (server.cluster->mf_end &&nodeIsMaster(myself) &&server.cluster->mf_slave == node &&node->link)
        {
            clusterSendPing(node->link, CLUSTERMSG_TYPE_PING);
            continue;
        }

        /* Check only if we have an active ping for this instance. */
        // 如果node没有活动的ping,就进入下个节点的检查
        if (node->ping_sent == 0) continue;
        // 如果node有活动的ping , 检查node是否是可疑下线
        /* Check if this node looks unreachable.
         * Note that if we already received the PONG, then node->ping_sent
         * is zero, so can't reach this code at all, so we don't risk of
         * checking for a PONG delay if we didn't sent the PING.
         *
         * We also consider every incoming data as proof of liveness, since
         * our cluster bus link is also used for data: under heavy data
         * load pong delays are possible. */
        // 取ping_delay和data_delay中较小的
        mstime_t node_delay = (ping_delay < data_delay) ? ping_delay :
                                                          data_delay;
        // 如果node_delay超过了server.cluster_node_timeout, 据设置节点状态为可疑fail
        if (node_delay > server.cluster_node_timeout) {
            /* Timeout reached. Set the node as possibly failing if it is
             * not already in this state. */
            // 如果没有设置PFAIL或FAIL, 那就设置PFAIL
            if (!(node->flags & (CLUSTER_NODE_PFAIL|CLUSTER_NODE_FAIL))) {
                serverLog(LL_DEBUG,"*** NODE %.40s possibly failing",
                    node->name);
                node->flags |= CLUSTER_NODE_PFAIL;
                update_state = 1;
            }
        }
    }
    dictReleaseIterator(di);

    /* If we are a slave node but the replication is still turned off,
     * enable it if we know the address of our master and it appears to
     * be up. */
    // 如果myself设置了主节点,但是没有到主节点的连接,就建立连接
    if (nodeIsSlave(myself) &&
        server.masterhost == NULL &&
        myself->slaveof &&
        nodeHasAddr(myself->slaveof))
    {
        replicationSetMaster(myself->slaveof->ip, myself->slaveof->port);
    }

    /* Abort a manual failover if the timeout is reached. */
    // 如果手动切换超时,就取消切换
    manualFailoverCheckTimeout();

    if (nodeIsSlave(myself)) {
        // 如果有进行中的手动切换,就检查数据同步状态,并设置todo_before_sleep
        clusterHandleManualFailover();
        if (!(server.cluster_module_flags & CLUSTER_MODULE_FLAG_NO_FAILOVER))
            clusterHandleSlaveFailover();
        /* If there are orphaned slaves, and we are a slave among the masters
         * with the max number of non-failing slaves, consider migrating to
         * the orphaned masters. Note that it does not make sense to try
         * a migration if there is no master with at least *two* working
         * slaves. */
       // 如果存在孤立的主节点,而且存在大于2个从节点的主节点,而且myself节点的主节点的从节点数量为max_slaves
        if (orphaned_masters && max_slaves >= 2 && this_slaves == max_slaves &&
                        server.cluster_allow_replica_migration)
            clusterHandleSlaveMigration(max_slaves);
    }

    if (update_state || server.cluster->state == CLUSTER_FAIL)
        clusterUpdateState();
}

clusterCron的主要功能就是检查myself与其他node的连接,节点是否需要进行主备切换,备节点迁移。

  • 因为可以通过config set动态调整服务器参数,有些参数需要随时检查:ip, nofailover
  • 检查myself到其他node的连接是否超时,没有连接的就创建连接,连接失败就删除节点
  • 每秒从集群节点中随机抽取5个节点发送ping,从中选取最长时间没有收到pong的节点发送ping
  • 遍历节点:检查是否存在孤立主节点;检查是否有ping超时的节点,超时就设置节点状态为pfail;是否有太长时间没有发送ping的节点,是就发送ping;
  • 如果存在孤立的主节点,而且myself是从节点,满足切换条件,就切换当前节点为孤立主节点的从节点
  • 如果有节点进入pfail状态,就更新集群状态

集群通信

节点间通信消息类型

/* Message types.
 *
 * Note that the PING, PONG and MEET messages are actually the same exact
 * kind of packet. PONG is the reply to ping, in the exact format as a PING,
 * while MEET is a special PING that forces the receiver to add the sender
 * as a node (if it is not already in the list). */
#define CLUSTERMSG_TYPE_PING 0          /* Ping */
#define CLUSTERMSG_TYPE_PONG 1          /* Pong (reply to Ping) */
#define CLUSTERMSG_TYPE_MEET 2          /* Meet "let's join" message */
#define CLUSTERMSG_TYPE_FAIL 3          /* Mark node xxx as failing */
#define CLUSTERMSG_TYPE_PUBLISH 4       /* Pub/Sub Publish propagation */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_REQUEST 5 /* May I failover? */
#define CLUSTERMSG_TYPE_FAILOVER_AUTH_ACK 6     /* Yes, you have my vote */
#define CLUSTERMSG_TYPE_UPDATE 7        /* Another node slots configuration */
#define CLUSTERMSG_TYPE_MFSTART 8       /* Pause clients for manual failover */
#define CLUSTERMSG_TYPE_MODULE 9        /* Module cluster API message. */
#define CLUSTERMSG_TYPE_COUNT 10        /* Total number of message types. */

查看不通类型消息的计数

ip:port> cluster info
cluster_stats_messages_ping_sent:615811
cluster_stats_messages_pong_sent:580837
cluster_stats_messages_auth-req_sent:5
cluster_stats_messages_mfstart_sent:1
cluster_stats_messages_sent:1196654
cluster_stats_messages_ping_received:580832
cluster_stats_messages_pong_received:615811
cluster_stats_messages_auth-ack_received:3
cluster_stats_messages_received:1196646

集群状态

typedef struct clusterState {
    // 当前节点
    clusterNode *myself;  /* This node */
    uint64_t currentEpoch;
    int state;            /* CLUSTER_OK, CLUSTER_FAIL, ... */
    int size;             /* Num of master nodes with at least one slot */
	   // name->clusterNode的映射
    dict *nodes;          /* Hash table of name -> clusterNode structures */
    dict *nodes_black_list; /* Nodes we don't re-add for a few seconds. */
	   // 迁移槽位,记录在其他节点的槽位
    clusterNode *migrating_slots_to[CLUSTER_SLOTS];
    clusterNode *importing_slots_from[CLUSTER_SLOTS];
	   // 槽位n所在的节点
    clusterNode *slots[CLUSTER_SLOTS];
    uint64_t slots_keys_count[CLUSTER_SLOTS];
    rax *slots_to_keys;
    /* The following fields are used to take the slave state on elections. */
    mstime_t failover_auth_time; /* Time of previous or next election. */
    int failover_auth_count;    /* Number of votes received so far. */
    int failover_auth_sent;     /* True if we already asked for votes. */
    int failover_auth_rank;     /* This slave rank for current auth request. */
    uint64_t failover_auth_epoch; /* Epoch of the current election. */
    int cant_failover_reason;   /* Why a slave is currently not able to
                                   failover. See the CANT_FAILOVER_* macros. */
    /* Manual failover state in common. */
    mstime_t mf_end;            /* Manual failover time limit (ms unixtime).
                                   It is zero if there is no MF in progress. */
		 // 手动切换状态
    /* Manual failover state of master. */
	  // 执行手动切换的备节点
    clusterNode *mf_slave;      /* Slave performing the manual failover. */
    /* Manual failover state of slave. */
    long long mf_master_offset; /* Master offset the slave needs to start MF
                                   or zero if still not received. */
			// 表示是否可以手动启动failover
    int mf_can_start;           /* If non-zero signal that the manual failover
                                   can start requesting masters vote. */
    /* The following fields are used by masters to take state on elections. */
    uint64_t lastVoteEpoch;     /* Epoch of the last vote granted. */
    int todo_before_sleep; /* Things to do in clusterBeforeSleep(). */
    /* Messages received and sent by type. */
    long long stats_bus_messages_sent[CLUSTERMSG_TYPE_COUNT];
    long long stats_bus_messages_received[CLUSTERMSG_TYPE_COUNT];
    long long stats_pfail_nodes;    /* Number of nodes in PFAIL status,
                                       excluding nodes without address. */
} clusterState;

节点

typedef struct clusterNode {
    mstime_t ctime; /* Node object creation time. */
	   // 节点名称,随机生成的一个长度为40的字符串
    char name[CLUSTER_NAMELEN]; /* Node name, hex string, sha1-size */
	   // 节点的角色
    int flags;      /* CLUSTER_NODE_... */
    uint64_t configEpoch; /* Last configEpoch observed for this node */
    unsigned char slots[CLUSTER_SLOTS/8]; /* slots handled by this node */
    sds slots_info; /* Slots info represented by string. */
    int numslots;   /* Number of slots handled by this node */
    int numslaves;  /* Number of slave nodes, if this is a master */
    struct clusterNode **slaves; /* pointers to slave nodes */
    struct clusterNode *slaveof; /* pointer to the master node. Note that it
                                    may be NULL even if the node is a slave
                                    if we don't have the master node in our
                                    tables. */
    mstime_t ping_sent;      /* Unix time we sent latest ping */
    mstime_t pong_received;  /* Unix time we received the pong */
    mstime_t data_received;  /* Unix time we received any data */
    mstime_t fail_time;      /* Unix time when FAIL flag was set */
    mstime_t voted_time;     /* Last time we voted for a slave of this master */
    mstime_t repl_offset_time;  /* Unix time we received offset for this node */
    mstime_t orphaned_time;     /* Starting time of orphaned master condition */
    long long repl_offset;      /* Last known repl offset for this node. */
    char ip[NET_IP_STR_LEN];  /* Latest known IP address of this node */
	   // 节点的服务端口,客户端通过该端口连接
    int port;                   /* Latest known clients port (TLS or plain). */
    int pport;                  /* Latest known clients plaintext port. Only used
                                   if the main clients port is for TLS. */
			// 节点的集群通信端口,默认是port+10000, 不能大于65536
    int cport;                  /* Latest known cluster port of this node. */
    clusterLink *link;          /* TCP/IP link with this node */
    list *fail_reports;         /* List of nodes signaling this as failing */
} clusterNode;

cluster.h定义了节点flags的值和表示的意思:


#define CLUSTER_NODE_MASTER 1     /* The node is a master */
#define CLUSTER_NODE_SLAVE 2      /* The node is a slave */
#define CLUSTER_NODE_PFAIL 4      /* Failure? Need acknowledge */
#define CLUSTER_NODE_FAIL 8       /* The node is believed to be malfunctioning */
#define CLUSTER_NODE_MYSELF 16    /* This node is myself */
#define CLUSTER_NODE_HANDSHAKE 32 /* We have still to exchange the first ping */
#define CLUSTER_NODE_NOADDR   64  /* We don't know the address of this node */
#define CLUSTER_NODE_MEET 128     /* Send a MEET message to this node */
#define CLUSTER_NODE_MIGRATE_TO 256 /* Master eligible for replica migration. */
#define CLUSTER_NODE_NOFAILOVER 512 /* Slave will not try to failover. */

集群配置文件

nodes.conf定义了集群节点的信息, 该文件的信息会实时更新

name ip:port@cport master,nofailover masternodename ping_set pong_rec epoch connected 5461-10922
  • 0 name: 表示当前行是哪个节点的信息
  • 1 ip:port@cport:ip表示节点的ip, port表示对外服务端口,cport集群间通信端口,集群间的通信端口配置项是cluster-announce-port xxx,不配置就会默认设置为port+10000。
  • 2 节点角色和配置相关,master表示是主节点,slave表示是从节点,myself表示是当前节点。fail表示客观下线,fail?表示主观下线。
  • 3 masternodename:对于从节点,该节点记录了主节点的名称;对于主节点,该值为-
  • 4,5 ping_set: 如果不为0,就设置节点的clusterNode.ping_sent=mstime(); pong_rec如果不为0,就设置节点的clusterNode.ping_received=mstime(); 也就是记录上次发送ping和接收到pong的时间
  • 6 epoch: clusterNode.configEpoch
  • 7 节点是否正常连接
  • 8-... 记录了slots的信息,有几种格式
a  表示slot a属于节点
a-b  表示从a到b的slots都属于节点
[a-<-nodename] 表示要将slots a 从nodename迁移到本节点
[a->-nodename] 表示要将slots a 从本节点迁移到nodename

查看集群信息

cluster help
cluster info
cluster nodes
cluster slots
posted @ 2024-09-27 17:04  董少奇  阅读(17)  评论(0)    收藏  举报