Redis 底层机制解析(八):持久化、内存管理与高可用架构
1. 引言:Redis 系统级设计的深度剖析
在前七篇文章中,我们深入探讨了 Redis 的各种数据结构和其底层实现。然而,Redis 的强大不仅仅在于其丰富的数据类型,更在于其完整的系统架构设计。本篇将聚焦 Redis 的三个核心系统级机制:持久化、内存管理和高可用架构,揭示 Redis 如何在这些方面实现高性能与高可靠性的平衡。
Redis 的系统设计体现了多个精妙的权衡:内存与磁盘的权衡、性能与持久性的权衡、数据一致性与系统可用性的权衡。理解这些底层机制,对于在生产环境中正确配置和优化 Redis 至关重要。
2. 持久化机制:RDB 与 AOF 的深度解析
简单内容整合可参考之前的文章:Redis RDB和AOF 流程、优缺点详细介绍
本模块更偏向源码实战
2.1 RDB(Redis Database)持久化
RDB 是 Redis 的快照式持久化机制,通过创建数据集的二进制快照来实现持久化。
RDB 文件结构:
| REDIS | RDB版本 | 数据库号 | 键值对1 | … | 键值对N | 结束符 | CRC64校验 |
|---|---|---|---|---|---|---|---|
| (5字节) | (4字节) | (1字节) | xxx字节 | … | xxx字节 | (1字节) | (8字节) |
RDB 创建过程(rdb.c):
int rdbSave(char *filename, rdbSaveInfo *rsi) {
snprintf(tmpfile, 256, "temp-%d.rdb", (int)getpid());
fp = fopen(tmpfile, "w");
// 写入文件头
redisLog(REDIS_NOTICE, "RDBSAVE: saving DB on disk");
if (rdbWriteRaw(fp, "REDIS", 5) == -1) goto werr;
snprintf(magic, sizeof(magic), "%04d", REDIS_RDB_VERSION);
if (rdbWriteRaw(fp, magic, 4) == -1) goto werr;
// 遍历所有数据库
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
// 写入数据库选择器
if (rdbSaveType(fp, RDB_OPCODE_SELECTDB) == -1) goto werr;
if (rdbSaveLen(fp, j) == -1) goto werr;
// 遍历数据库中的键值对
di = dictGetSafeIterator(d);
while((de = dictNext(di)) != NULL) {
sds keystr = dictGetKey(de);
robj key, *o = dictGetVal(de);
initStaticStringObject(key, keystr);
// 保存键值对
if (rdbSaveKeyValuePair(fp, &key, o, expiretime) == -1) goto werr;
}
dictReleaseIterator(di);
}
// 写入结束符和校验码
if (rdbSaveType(fp, RDB_OPCODE_EOF) == -1) goto werr;
cksum = rdbChecksum(fp, &buf);
if (rioWrite(fp, &cksum, 8) == 0) goto werr;
// 原子重命名文件
if (rename(tmpfile, filename) == -1) {
redisLog(REDIS_WARNING, "Error moving temp DB file on final destination");
unlink(tmpfile);
return REDIS_ERR;
}
return REDIS_OK;
}
RDB 的 COW(Copy-On-Write)机制:
Redis 使用 fork() 创建子进程来生成 RDB 快照,利用操作系统的写时复制机制:
int rdbSaveBackground(char *filename, rdbSaveInfo *rsi) {
pid_t childpid;
if ((childpid = fork()) == 0) {
// 子进程
closeListeningSockets(0);
redisSetProcTitle("redis-rdb-bgsave");
retval = rdbSave(filename, rsi);
exitFromChild((retval == REDIS_OK) ? 0 : 1);
} else {
// 父进程
server.stat_bgsave_status = REDIS_OK;
server.rdb_save_time_start = time(NULL);
server.rdb_child_pid = childpid;
server.rdb_child_type = REDIS_RDB_CHILD_TYPE_DISK;
updateDictResizePolicy();
}
return REDIS_OK;
}
2.2 AOF(Append Only File)持久化
AOF 通过记录所有写操作命令来实现持久化,提供更好的持久性保证。
AOF 文件格式:
*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nmyvalue\r\n
*3\r\n$3\r\nSET\r\n$5\r\nmykey\r\n$7\r\nnewvalue\r\n
AOF 持久化流程(aof.c):
void feedAppendOnlyFile(struct redisCommand *cmd, int dictid, robj **argv, int argc) {
sds buf = sdsempty();
// 选择正确的数据库
if (dictid != server.aof_selected_db) {
char seldb[64];
snprintf(seldb, sizeof(seldb), "%d", dictid);
buf = sdscatprintf(buf, "*2\r\n$6\r\nSELECT\r\n$%lu\r\n%s\r\n",
(unsigned long)strlen(seldb), seldb);
server.aof_selected_db = dictid;
}
// 将命令转换为AOF格式
if (cmd->proc == expireCommand || cmd->proc == pexpireCommand) {
buf = catAppendOnlyExpireAtCommand(buf, cmd, argv[1], argv[2]);
} else {
buf = catAppendOnlyGenericCommand(buf, argc, argv);
}
// 写入AOF缓冲区
if (server.aof_state == AOF_ON)
server.aof_buf = sdscatlen(server.aof_buf, buf, sdslen(buf));
sdsfree(buf);
}
AOF 重写机制:
AOF 重写通过创建当前数据集的最小命令集来压缩 AOF 文件:
int rewriteAppendOnlyFile(char *filename) {
snprintf(tmpfile, 256, "temp-rewriteaof-%d.aof", (int)getpid());
fp = fopen(tmpfile, "w");
// 创建重写子进程
if (fork() == 0) {
closeListeningSockets(0);
redisSetProcTitle("redis-aof-rewrite");
// 遍历所有数据库和键
for (j = 0; j < server.dbnum; j++) {
redisDb *db = server.db+j;
dict *d = db->dict;
if (dictSize(d) == 0) continue;
di = dictGetSafeIterator(d);
while((de = dictNext(di)) != NULL) {
robj *key = dictGetKey(de);
robj *val = dictGetVal(de);
long long expiretime = getExpire(db, key);
// 跳过过期键
if (expiretime != -1 && expiretime < now) continue;
// 将键值对转换为SET命令
if (rewriteListObject(fp, key, val)) continue;
if (rewriteSetObject(fp, key, val)) continue;
if (rewriteZsetObject(fp, key, val)) continue;
if (rewriteHashObject(fp, key, val)) continue;
rewriteStringObject(fp, key, val);
// 添加过期时间
if (expiretime != -1) {
char cmd[256], arg[64];
snprintf(cmd, sizeof(cmd), "PEXPIREAT");
snprintf(arg, sizeof(arg), "%lld", expiretime);
rewriteCommand(fp, cmd, key, arg);
}
}
dictReleaseIterator(di);
}
fclose(fp);
exit(0);
}
// 父进程处理
server.aof_child_pid = childpid;
server.aof_rewrite_time_start = time(NULL);
return REDIS_OK;
}
3. 内存管理机制
3.1 内存分配器
Redis 使用自定义的内存分配器来优化内存使用和减少碎片:
zmalloc 实现(zmalloc.c):
void *zmalloc(size_t size) {
void *ptr = malloc(size + PREFIX_SIZE);
if (!ptr) zmalloc_oom_handler(size);
// 记录内存分配统计
update_zmalloc_stat_alloc(zmalloc_size(ptr));
return (char*)ptr + PREFIX_SIZE;
}
void update_zmalloc_stat_alloc(size_t size) {
atomicIncr(used_memory, size);
atomicIncr(used_memory_peak, size);
// 内存碎片统计
size_t overhead = size - request_size;
if (overhead > 0) {
atomicIncr(mem_fragmentation, overhead);
}
}
3.2 内存淘汰策略
Redis 支持多种内存淘汰策略,当内存不足时自动删除键:
LRU 算法实现(evict.c):
void freeMemoryIfNeeded(void) {
size_t mem_used, mem_tofree, mem_freed;
int slaves = listLength(server.slaves);
// 计算已使用内存
mem_used = zmalloc_used_memory();
if (mem_used <= server.maxmemory) return;
mem_tofree = mem_used - server.maxmemory;
mem_freed = 0;
// 根据策略选择淘汰算法
if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_LRU ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_LRU) {
mem_freed = freeMemoryByLRU(mem_tofree);
} else if (server.maxmemory_policy == MAXMEMORY_ALLKEYS_RANDOM ||
server.maxmemory_policy == MAXMEMORY_VOLATILE_RANDOM) {
mem_freed = freeMemoryByRandom(mem_tofree);
}
// 通知副本和AOF
if (mem_freed) {
signalModifiedKey(c, c->db, key);
server.dirty++;
}
}
近似 LRU 算法:
Redis 使用近似 LRU 算法来平衡精度和性能:
unsigned long long estimateObjectIdleTime(robj *o) {
unsigned long long lruclock = LRU_CLOCK();
if (lruclock >= o->lru) {
return (lruclock - o->lru) * LRU_CLOCK_RESOLUTION;
} else {
return (lruclock + (LRU_CLOCK_MAX - o->lru)) * LRU_CLOCK_RESOLUTION;
}
}
4. 高可用架构:主从复制与哨兵机制
4.1 主从复制
Redis 主从复制提供数据冗余和读写分离能力:
复制流程(replication.c):
void syncWithMaster(aeEventLoop *el, int fd, void *privdata, int mask) {
// 发送PING确认连接
if (server.repl_state == REPL_STATE_CONNECTING) {
err = sendSynchronousCommand(fd, "PING", NULL);
if (err) goto error;
server.repl_state = REPL_STATE_RECEIVE_PONG;
return;
}
// 身份验证
if (server.masterauth) {
err = sendSynchronousCommand(fd, "AUTH", server.masterauth, NULL);
if (err) goto error;
server.repl_state = REPL_STATE_RECEIVE_AUTH;
return;
}
// 发送复制端口
err = sendSynchronousCommand(fd, "REPLCONF", "listening-port",
server.port, NULL);
if (err) goto error;
server.repl_state = REPL_STATE_RECEIVE_PORT;
// 发送PSYNC命令
if (server.cached_master) {
psync_replid = server.cached_master->replid;
snprintf(psync_offset, sizeof(psync_offset), "%lld",
server.cached_master->reploff+1);
err = sendSynchronousCommand(fd, "PSYNC", psync_replid, psync_offset, NULL);
} else {
err = sendSynchronousCommand(fd, "PSYNC", "?", "-1", NULL);
}
// 处理主节点响应
server.repl_state = REPL_STATE_RECEIVE_PSYNC;
}
部分重同步机制:
void replicationFeedSlaves(list *slaves, int dictid, robj **argv, int argc) {
listNode *node;
listIter li;
listRewind(slaves, &li);
while((node = listNext(&li))) {
client *slave = node->value;
// 跳过延迟过高的副本
if (slave->replstate == SLAVE_STATE_ONLINE &&
slave->repl_put_online_on_ack &&
slave->repl_ack_time > server.unixtime - 5) {
continue;
}
// 将命令发送给副本
if (slave->flags & CLIENT_CLOSE_AFTER_REPLY) continue;
addReplyMultiBulkSize(slave, argc);
for (j = 0; j < argc; j++) {
addReplyBulk(slave, argv[j]);
}
}
}
4.2 哨兵系统
Redis Sentinel 提供自动故障转移和监控:
哨兵状态检查(sentinel.c):
void sentinelCheckSubjectivelyDown(sentinelRedisInstance *ri) {
mstime_t elapsed = mstime() - ri->last_avail_time;
// 检查主观下线
if (elapsed > ri->down_after_period) {
if ((ri->flags & SRI_S_DOWN) == 0) {
sentinelEvent(LL_WARNING, "+sdown", ri, "%@");
ri->flags |= SRI_S_DOWN;
}
} else {
if (ri->flags & SRI_S_DOWN) {
sentinelEvent(LL_WARNING, "-sdown", ri, "%@");
ri->flags &= ~SRI_S_DOWN;
}
}
}
void sentinelCheckObjectivelyDown(sentinelRedisInstance *ri) {
// 统计其他哨兵的意见
int votes = 0, ovotes = 0;
uint64_t current_epoch;
dictIterator *di;
dictEntry *de;
di = dictGetIterator(ri->sentinels);
while((de = dictNext(di))) {
sentinelRedisInstance *other = dictGetVal(de);
if (other->flags & SRI_MASTER_DOWN) votes++;
ovotes++;
}
dictReleaseIterator(di);
// 判断是否客观下线
if (votes >= sentinel_quorum(ri)) {
if ((ri->flags & SRI_O_DOWN) == 0) {
sentinelEvent(LL_WARNING, "+odown", ri, "%@ quorum=%d/%d",
votes, sentinel_quorum(ri));
ri->flags |= SRI_O_DOWN;
ri->o_down_since_time = mstime();
}
} else {
if (ri->flags & SRI_O_DOWN) {
sentinelEvent(LL_WARNING, "-odown", ri, "%@");
ri->flags &= ~SRI_O_DOWN;
}
}
}
故障转移流程:
void sentinelFailoverStateMachine(sentinelRedisInstance *ri) {
switch(ri->failover_state) {
case SENTINEL_FAILOVER_STATE_WAIT_START:
if (mstime() - ri->failover_state_change_time > ri->failover_timeout) {
sentinelEvent(LL_WARNING, "-failover-aborted", ri, "%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
return;
}
// 等待故障转移开始
break;
case SENTINEL_FAILOVER_STATE_SELECT_SLAVE:
// 选择最佳副本进行提升
ri->promoted_slave = sentinelSelectSlave(ri);
if (ri->promoted_slave) {
sentinelEvent(LL_WARNING, "+selected-slave", ri->promoted_slave, "%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE;
ri->failover_state_change_time = mstime();
}
break;
case SENTINEL_FAILOVER_STATE_SEND_SLAVEOF_NOONE:
// 发送SLAVEOF NOONE命令
retval = sentinelSendSlaveOf(ri->promoted_slave, NULL, 0);
if (retval == C_OK) {
ri->failover_state = SENTINEL_FAILOVER_STATE_WAIT_PROMOTION;
ri->failover_state_change_time = mstime();
}
break;
case SENTINEL_FAILOVER_STATE_WAIT_PROMOTION:
// 等待副本提升完成
if (mstime() - ri->failover_state_change_time > SENTINEL_FAILOVER_PROMOTION_TIMEOUT) {
sentinelEvent(LL_WARNING, "-failover-aborted", ri, "%@");
ri->failover_state = SENTINEL_FAILOVER_STATE_NONE;
return;
}
break;
case SENTINEL_FAILOVER_STATE_RECONF_SLAVES:
// 重新配置其他副本
sentinelReconfigSlaves(ri);
ri->failover_state = SENTINEL_FAILOVER_STATE_UPDATE_CONFIG;
ri->failover_state_change_time = mstime();
break;
}
}
5. 集群模式:分布式数据存储
5.1 数据分片与哈希槽
Redis Cluster 使用哈希槽(hash slot)进行数据分片:
哈希槽分配(cluster.c):
unsigned int keyHashSlot(char *key, int keylen) {
int s, e; // start, end
// 查找{...}模式
for (s = 0; s < keylen; s++)
if (key[s] == '{') break;
if (s == keylen) return crc16(key, keylen) & 0x3FFF;
for (e = s+1; e < keylen; e++)
if (key[e] == '}') break;
if (e == keylen || e == s+1) return crc16(key, keylen) & 0x3FFF;
return crc16(key+s+1, e-s-1) & 0x3FFF;
}
5.2 节点间通信
集群节点使用 Gossip 协议进行状态通信:
Gossip 消息传播:
void clusterSendGossipMsg(clusterLink *link, int gossip) {
clusterMsg msg;
clusterMsgDataGossip *gossip_msg;
int maxgossip = gossip ? CLUSTER_SLOTS/5 : 0;
// 准备消息头
clusterBuildMessageHdr(&msg, CLUSTERMSG_TYPE_PING);
// 添加已知节点信息
dictIterator *di = dictGetSafeIterator(server.cluster->nodes);
while((de = dictNext(di)) && gossipcount < maxgossip) {
clusterNode *node = dictGetVal(de);
if (node == myself) continue;
if (node->flags & (NODE_HANDSHAKE|NODE_NOADDR)) continue;
if (node->ping_sent != 0 && node->pong_received < node->ping_sent) continue;
// 添加到gossip消息
gossip_msg = &msg.data.ping.gossip[gossipcount++];
memcpy(gossip_msg->nodename, node->name, CLUSTER_NAMELEN);
gossip_msg->ping_sent = htonl(node->ping_sent);
gossip_msg->pong_received = htonl(node->pong_received);
memcpy(gossip_msg->ip, node->ip, NET_IP_STR_LEN);
gossip_msg->port = htons(node->port);
gossip_msg->flags = htons(node->flags);
}
dictReleaseIterator(di);
// 发送消息
clusterSendMessage(link, (unsigned char*)&msg, totlen);
}
6. 性能优化与最佳实践
6.1 持久化配置优化
RDB 与 AOF 的混合持久化:
# redis.conf 配置示例
save 900 1 # 900秒内至少1个变更
save 300 10 # 300秒内至少10个变更
save 60 10000 # 60秒内至少10000个变更
appendonly yes # 开启AOF
appendfsync everysec # 每秒同步
aof-use-rdb-preamble yes # 混合持久化
6.2 内存优化策略
使用适当的数据结构:
# 监控内存使用
redis-cli info memory
# 优化配置
hash-max-ziplist-entries 512
hash-max-ziplist-value 64
list-max-ziplist-size -2
set-max-intset-entries 512
zset-max-ziplist-entries 128
zset-max-ziplist-value 64
6.3 高可用部署建议
哨兵部署方案:
# 至少3个哨兵实例
sentinel monitor mymaster 127.0.0.1 6379 2
sentinel down-after-milliseconds mymaster 30000
sentinel failover-timeout mymaster 180000
sentinel parallel-syncs mymaster 1
7. 监控与故障排查
7.1 关键指标监控
性能监控命令:
# 查看性能指标
redis-cli info
redis-cli info stats
redis-cli info replication
redis-cli info persistence
# 监控慢查询
redis-cli slowlog get
7.2 常见问题诊断
内存问题诊断:
def analyze_memory_usage(r):
# 分析内存使用情况
info = r.info('memory')
print(f"Used memory: {info['used_memory_human']}")
print(f"Peak memory: {info['used_memory_peak_human']}")
print(f"Fragmentation: {info['mem_fragmentation_ratio']}")
# 查找大键
cursor = 0
large_keys = []
while True:
cursor, keys = r.scan(cursor, count=100)
if not keys:
break
for key in keys:
size = r.memory_usage(key)
if size > 1024 * 1024: # 大于1MB
large_keys.append((key, size))
return large_keys
8. 总结
Redis 的系统级设计体现了多个精妙的权衡和优化:
核心优势:
- 灵活的持久化策略:RDB 提供高性能快照,AOF 保证数据安全,混合模式兼顾两者优势
- 高效的内存管理:自定义分配器减少碎片,多种淘汰策略应对内存压力
- 可靠的高可用架构:主从复制提供数据冗余,哨兵系统实现自动故障转移
- 分布式扩展能力:集群模式支持水平扩展和数据分片
设计哲学:
- 简单性:基于简单组件构建复杂功能
- 效率:极致优化内存和CPU使用
- 可靠性:通过持久化和复制保证数据安全
- 扩展性:支持从单机到分布式集群的平滑扩展
理解 Redis 的系统级机制,有助于开发者和运维人员更好地配置、监控和优化 Redis,充分发挥其在高性能数据存储方面的优势。无论是单机部署还是大规模集群,Redis 都提供了相应的解决方案来满足不同的业务需求。
本文来自博客园,作者:NeoLshu,转载请注明原文链接:https://www.cnblogs.com/neolshu/p/19120363

浙公网安备 33010602011771号