MIT6.5840 2024 Spring Lab4
MIT6.5840 2024 Spring Lab4
前言
lab4是在lab3的基础上完成一个容错的Key/Value Service,至少lab3能够稳定通过继续做lab4比较好,我在lab3稳定通过的情况下做lab4仍然发现不少bug,更不用说lab3没法稳过的情况下了,肯定是没法通过lab4的。lab4可以说是面向日志debug了,折磨我了好几天,最常出现的问题就是没有线性一致和速度太慢。因为发现了lab3很多没发现的bug,我会先给出raft的修改代码。
Lab3代码修改
Raft层的 Struct
- 首先是ApplyMsg这个struct,防止服务层提交错误的操作结果给Client,需要当前raft提交的日志的term,假设一种情况,如图:

为了避免这种情况需要在AplyMsg内添加一个term,当raft算法向服务层apply的时候需要把包含该操作的日志的term带上。 - 其次在Raft内新加了一个
InstallSnapshotMutex sync.Mutex具体用处到对应函数再说。
sendHeartsbeats
func (rf *Raft) sendHeartsbeats(curterm int, leaderCommit int, me int, peerslen int) {
//用logs的副本操作
rf.statusAccessMutex.Lock()
curlogs := make([]logEntry, len(rf.logs))
copy(curlogs, rf.logs)
rf.statusAccessMutex.Unlock()
for i := 0; i < peerslen; i++ {
if i == me {
continue
}
go func(serverid int) {
//code...
if nextIndex <= rf.lastSnapshotIndex {
//发送快照
//code...
sendEnd := rf.peers[serverid] //小修改
rf.statusAccessMutex.Unlock()
ok := sendEnd.Call("Raft.InstallSnapshot", &request, &response)
//code....
} else {
//下面两个条件主要判断,锁释放期间logs副本和原logs是否还相同,防止期间生成快照,发送的日志是快照内的日志
//其实不需要我这么麻烦,可以不用logs副本,直接在原logs切片上操作也可以,当时用logs副本主要是为了,同一次
//心跳给不同follower发送日志时能够减少对logs切片的资源竞争,但后面遇到了bug修改之后,发现还不如直接用原切片。
//当然如果没使用日志副本,就不需要这两个判断了,因为,nextIndex和lastSnapshotIndex一定和源日志切片匹配
if len(curlogs) != len(rf.logs) {
rf.statusAccessMutex.Unlock()
return
}
for i := 0; i < len(curlogs); i++ {
if curlogs[i] != rf.logs[i] {
rf.statusAccessMutex.Unlock()
return
}
}
//code...
//新开一个切片
request.Entries = make([]logEntry, len(curlogs)-nextIndex+rf.lastSnapshotIndex+1)
copy(request.Entries, curlogs[nextIndex-rf.lastSnapshotIndex-1:])
request.PreLogIndex = nextIndex - 1
if (nextIndex - rf.lastSnapshotIndex - 1 - 1) >= 0 {
request.PreLogTerm = curlogs[nextIndex-rf.lastSnapshotIndex-1-1].Term
} else {
request.PreLogTerm = rf.lastSnapshotTerm
}
sendEnd := rf.peers[serverid]
rf.statusAccessMutex.Unlock()
ok := sendEnd.Call("Raft.AppendEntries", &request, &response)
//code ...
}
}(i)
}
}
matchNewEntries
这个函数是重要修改,很多bug都这个函数导致的,之前的代码都是把preLogIndex之后的日志全都删除,不管后面的日志是不是本term下收到的最新日志,因为Lab4A对速度的要求,Start函数每次收到新的日志就接着发送心跳,这就导致一种情况,leader假设连续收到三个日志追加,那可能就会往每个follower发送上次请求,上次请求的日志长度分别为,1、2、3,因为网络不稳定可能会出现长度为3的先到达follower,并且leader的commitIndex如果和最新日志的index相同,follower会更新自己的commitIndex,之后长度为1的最后到达,如果按照之前的实现,长度1的请求会把长度3的请求后面两个日志直接丢弃,那此时follower的commitIndex比其最后一个日志的index还大,等往服务层apply的时候可能会越界,应该修改为,出现term与新日志term不匹配时再把不匹配日志及其后面的日志丢弃,如果没出现冲突,就不丢弃。
func (rf *Raft) matchNewEntries(Entries []logEntry, preLogIndex int, preLogTerm int, response *AppendEntriesResponse) bool {
if preLogIndex == -1 || preLogIndex <= rf.lastSnapshotIndex {
if preLogIndex < rf.lastSnapshotIndex {
response.FastBack = rf.commitIndex + 1
response.Success = false
return false
}
} else if len(rf.logs) <= preLogIndex-rf.lastSnapshotIndex-1 { //该节点的所有日志中没有下标为preLogIndex的项
DPrintf("server[%v]>>don't contain index = %v log entry", rf.me, preLogIndex)
response.Success = false
response.FastBack = len(rf.logs) + rf.lastSnapshotIndex + 1
return false
} else if rf.logs[preLogIndex-rf.lastSnapshotIndex-1].Term != preLogTerm {
//同下标的日志项的term与新日志的term不匹配
DPrintf("server[%v]>>log entry[term:[%v]] can't match new log entry[term:[%v]] whose index = %v ", rf.me, rf.logs[preLogIndex-rf.lastSnapshotIndex-1].Term, preLogTerm, preLogIndex)
response.FastBack = rf.commitIndex + 1 //让leader的nextIndex直接跳到当前节点的commitIndex+1处
response.Success = false
return false
}
index := 0
for ; index < len(Entries); index++ {
//如果要添加的日志超出了当前节点日志长度直接跳出循环,在其后面把请求剩下的日志全都追加到后面
if index+preLogIndex-rf.lastSnapshotIndex >= len(rf.logs) {
break
}
//查找出现冲突的index,冲突日志后的日志全都删除
if rf.logs[index+preLogIndex-rf.lastSnapshotIndex].Term != Entries[index].Term {
rf.logs = rf.logs[0 : index+preLogIndex-rf.lastSnapshotIndex]
break
}
}
rf.logs = append(rf.logs, Entries[index:]...)
DPrintf("server:%v>>receive newlogs from leader,beign index:%v,end:%v append new log%v,now logs %v", rf.me, preLogIndex+1, preLogIndex+len(Entries), Entries, rf.logs)
return true
}
applyEntries
这个的磁怪主要是为了应对3D的一个测试,那个测试的主要目的是看,当某次上层收到的快照lastSnapshotIndex=x时,下次日志的index应该是t+1,也就是紧挨着快照最后一个日志的index,这就不能用logs的副本进行提交了,因为副本的logs可能会因为生成了快照而过期,因为从leader收到的快照会单独在一个routine往apply channel中发送,可能会插队在logs副本某次日志提交的前面:假设logs副本的日志index范围是100-110,这时候收到leader发来的快照请求最后一条日志的index为115,然后发送到channel,但此时副本发送到了index=105,发送index=106之前,快照先到了channel,index=115,这时候上层收到快照后会判断lastApplied为115,下次收到index=106的时候会判断操作失败。所以不能用副本发了。
func (rf *Raft) applyEntries(sleep int) {
for !rf.killed() {
time.Sleep(time.Duration(sleep) * time.Millisecond)
rf.statusAccessMutex.Lock()
for ;rf.lastApplied<rf.commitIndex;{
nextApplied := rf.lastApplied+1 //保存下次要发送的index
var sendApply ApplyMsg
sendApply.Command = rf.logs[rf.lastApplied-rf.lastSnapshotIndex-1+1].Command
sendApply.CommandIndex = rf.lastApplied + 2
sendApply.CommandValid = true
sendApply.CommandTerm = rf.logs[rf.lastApplied-rf.lastSnapshotIndex-1+1].Term
rf.statusAccessMutex.Unlock()
rf.applyChan <- sendApply
rf.statusAccessMutex.Lock()
if rf.lastApplied + 1 == nextApplied{ //如果发送中途lastApplied没变(可能会因为leader快照请求改变)
rf.lastApplied = nextApplied //更新lastApplied
}
}
rf.statusAccessMutex.Unlock()
}
}
AppendEntries
这个函数主要是小修改,主要加了个同步用的锁,具体到InstallSnapshot再说,其他地方都是小修改。
func (rf *Raft) AppendEntries(request *AppendEntriesRequest, response *AppendEntriesResponse) {
rf.InstallSnapshotMutex.Lock() //先检查此时有没有快照在往channel发送,如果有先不能追开日志
rf.InstallSnapshotMutex.Unlock()
rf.statusAccessMutex.Lock()
defer rf.statusAccessMutex.Unlock()
if request.Term < rf.currentTerm {
response.Success = false
response.Term = rf.currentTerm
return
}
rf.currentTerm = request.Term
rf.state = 0
rf.leaderId = request.LeaderId
response.Term = request.Term
response.Success = true
if rf.matchNewEntries(request.Entries, request.PreLogIndex, request.PreLogTerm, response) {
//只有成功匹配上leader的日志,才能更新commitIndex
//防止先发请求因网络问题比后发请求晚到达,并且后发请求commitIndex还更大的情况
if request.LeaderCommit >= rf.commitIndex {
if len(rf.logs)-1 < request.LeaderCommit-rf.lastSnapshotIndex-1 {
rf.commitIndex = len(rf.logs) + rf.lastSnapshotIndex + 1 - 1
} else {
rf.commitIndex = request.LeaderCommit
}
}
}
rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
rf.persist(rf.persister.ReadSnapshot())
}
InstallSnapshot
这个函数的改变极其重要,否则4B的一些测试没法通过,主要改变是,接收到leader快照并往上层aplly的时候先上锁,主要是为了避免以下问题如图:

快照之后的日志先于快照放入channel,服务层收到之后就会在旧值上处理新的操作,而不是在新快照包含的新值上处理新操作。(快照包含了服务层的keyvalue,具体在Lab4中解释)
所以我么需要在接收到leader的下载快照请求后,在处理函数返回前先上锁表示有快照正在提交,等快照放入channel之后再解锁,新到的日志就会在新快照之后,之所以单独开routine发送快照到channel是为了防止出现死锁问题,如果不单独开routine往channel发送快照可能会出现以下死锁问题:

其实类似于Lab3提到的applyEntries函数死锁问题。
func (rf *Raft) InstallSnapshot(request *InstallSnapshotRequest, response *InstallSnapshotResponse) {
rf.statusAccessMutex.Lock()
defer rf.statusAccessMutex.Unlock()
response.Term = request.Term
if request.Term < rf.currentTerm {
response.Term = rf.currentTerm
return
}
if request.LastIncludeIndex <= rf.commitIndex { //比当前节点commitIndex就没必要生成leader请求的快照了
return
}
curindex := request.LastIncludeIndex - rf.lastSnapshotIndex - 1
if curindex < len(rf.logs) {
if rf.logs[curindex].Term != request.Term {
rf.logs = make([]logEntry, 0)
} else {
logs := rf.logs[curindex+1:]
rf.logs = make([]logEntry, len(rf.logs)-curindex-1)
copy(rf.logs, logs)
}
} else {
rf.logs = make([]logEntry, 0)
}
rf.lastSnapshotIndex = request.LastIncludeIndex
rf.lastSnapshotTerm = request.LastIncludeTerm
rf.lastApplied = request.LastIncludeIndex
rf.commitIndex = request.LastIncludeIndex
rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
rf.currentTerm = request.Term
rf.state = 0
rf.leaderId = request.LeaderId
rf.persist(request.Data)
//这段代码从func拿出来了,避免在真正执行下面func前节点信息过期,尤其是lastSnapshotIndex可能会变更。
var sendApply ApplyMsg
sendApply.CommandValid = false
sendApply.Snapshot = request.Data
sendApply.SnapshotIndex = rf.lastSnapshotIndex + 1
sendApply.SnapshotTerm = rf.lastSnapshotTerm
sendApply.SnapshotValid = true
//防止这个快照之后的新日志在这个快照之前放入channel,导致线性一致失效
//假设一种情况,如果单独开线程发送快照,锁释放后,当前服务器就能正常追加日志,但是新追加的日志严格来说应该在这个快照之后
//但是却比这个快照提前放入了channel,服务层收到后是用原来的旧kvvalue进行的操作,而不是新的日志包含的kvvalue执行的操作
//这就导致了历史没有线性一致,等到这个快照被收到后,上层判断过期快照直接忽略
rf.InstallSnapshotMutex.Lock() //上锁
go func () {
rf.applyChan <- sendApply
rf.InstallSnapshotMutex.Unlock()//发送完成解锁
}()
}
Start
4A有速度测试,也就是客户端请求响应速度,之前的代码是等待下一次心跳一块发送给follower进行共识,但是那样就太慢了,测试要求一个请求平均处理时间小于33ms,所以每次追加日志就接着发送心跳给follower进行日志同步。
func (rf *Raft) Start(command interface{}) (int, int, bool) {
index := -1
term := -1
isLeader := true
rf.statusAccessMutex.Lock()
defer rf.statusAccessMutex.Unlock()
if rf.state != 1 {
isLeader = false
} else {
//code...
go rf.sendHeartsbeats(rf.currentTerm, rf.commitIndex, rf.me, len(rf.peers))
rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime) * time.Millisecond)
}
return index, term, isLeader
}
Lab4代码实现
RPC消息类型
和lab2不同的是,Put Append操作不需要返回旧值了,但是需要返回错误类型,通过错误类型判断需要执行什么操作。
type PutAppendArgs struct {
Key string
Value string
ClientID int64 //
RequestID int64
}
type PutAppendReply struct {
Err Err
}
type GetArgs struct {
Key string
ClientID int64
RequestID int64
}
type GetReply struct {
Err Err
Value string
}
Client
CLient比较简单不多赘述。
type Clerk struct {
servers []*labrpc.ClientEnd
clientid int64 //记录当前客户端唯一id
requestid int64 //请求唯一id(单调递增)
leaderid int64 //记录leader防止每次都要重新找leader
}
func MakeClerk(servers []*labrpc.ClientEnd) *Clerk {
ck := new(Clerk)
ck.servers = servers
ck.clientid = nrand() //随机生成id
ck.requestid = 0 //q请求id从0开始
ck.leaderid = 0 //默认leader是0号服务器
return ck
}
func (ck *Clerk) Get(key string) string {
request := GetArgs{key,ck.clientid,ck.requestid}
response := GetReply{}
ck.requestid++
ok := false
for !ok{ //直到发送成功
response = GetReply{}
ok = ck.servers[ck.leaderid].Call("KVServer.Get",&request,&response)
if !ok || response.Err!=OK{ //发送失败
ok = false
ck.leaderid = (ck.leaderid+1) % int64(len(ck.servers)) //更新leaderid
}
}
return response.Value
}
func (ck *Clerk) PutAppend(key string, value string, op string) {
request := PutAppendArgs{key,value,ck.clientid,ck.requestid}
response := PutAppendReply{}
ck.requestid++
ok := false
for !ok{
response =PutAppendReply{}
if op == PUT{
ok = ck.servers[ck.leaderid].Call("KVServer.Put",&request,&response)
}else{
ok = ck.servers[ck.leaderid].Call("KVServer.Append",&request,&response)
}
if !ok || response.Err!=OK{ //发送失败
ok = false
ck.leaderid = (ck.leaderid+1) % int64(len(ck.servers)) //更新leaderid
}
}
}
Server
一些struct类型
type Op struct { //记录命令信息
CommandType string
Key string
Value string
//主要是为了筛选重复操作用的,因为同一个请求可能会被提交多次,只能执行一次
ClientID int64 //请求客户端id
RequestID int64 //请求id
}
// 每个客户端请求都会生成一个等待channel,等待raft达成共识后server执行操作,得到的结果放入该类型
type notifyChanMsg struct {
err Err
Result string
}
// 某一个客户端的最后一个请求的信息:1.快速返回 2.去重复操作
type clientLastReply struct {
Requestid int64
ReplyMsg string
}
//server的信息
type KVServer struct {
mu sync.Mutex
me int
rf *raft.Raft
applyCh chan raft.ApplyMsg
dead int32 // set by Kill()
maxraftstate int // snapshot if log grows this big
timeout time.Duration //等待请求结果超时时间
Keyvalue map[string]string //存储键值对
ClientLastRequest map[int64]clientLastReply //每个客户端最后一个请求的信息
notifyChan map[int64]chan notifyChanMsg //操作执行完通知用的channel
persister *raft.Persister
snapshoting bool //是否正在执行快照(可以不要,之前单独开辟routine执行snapshot,所以加了个这个)
MaxCommitIndex int64 //处理过的日志最大下标,不管有没有真的执行(可能有重复日志不执行)都需要记录下日志最大下标
}
applyLogs
等待raft层提交快照或者命令。
func (kv *KVServer) applyLogs() {
for !kv.killed() {
applymsg := <-kv.applyCh
if applymsg.CommandValid { //上传的是命令
kv.commandApplyHandler(applymsg)
} else if applymsg.SnapshotValid {
kv.snapshotHandler(applymsg)
}
}
}
snapshot
防止持久化信息超过最大长度maxraftstate。
func (kv *KVServer) snapshot(index int) {
datalen := kv.persister.RaftStateSize() //获取当前持久化信息大小
if float64(datalen/kv.maxraftstate) >= 0.9 {
buffer := new(bytes.Buffer)
datacoder := labgob.NewEncoder(buffer)
datacoder.Encode(kv.Keyvalue)
datacoder.Encode(kv.ClientLastRequest)
datacoder.Encode(kv.MaxCommitIndex)
dataBUffer := buffer.Bytes()
kv.rf.Snapshot(index, dataBUffer)
}
kv.snapshoting = false
}
snapshotHandler
处理leader发来的下载快照请求,一般收到这个请求原因是当前的server日志比较落后了,leader都没有这部分日志了就会发送下载快照请求。
func (kv *KVServer) snapshotHandler(applymsg raft.ApplyMsg) {
kv.mu.Lock()
defer kv.mu.Unlock()
if applymsg.SnapshotIndex >= int(kv.MaxCommitIndex) { //快照最后的操作index大于当前最大index
snapdata := bytes.NewBuffer(applymsg.Snapshot)
datadecoder := labgob.NewDecoder(snapdata)
keyvalue := map[string]string{} //获取快照包含的keyvalue信息,这个信息是leader服务层执行若干操作的结果
clientLastRequest := map[int64]clientLastReply{} //leader server包含的客户端最后请求的信息,防止该server执行重复操作
var maxCommitIndex int64
if datadecoder.Decode(&keyvalue) != nil || datadecoder.Decode(&clientLastRequest) != nil || datadecoder.Decode(&maxCommitIndex) != nil {
DPrintf("snapshot解析失败")
return
}
kv.Keyvalue = keyvalue
kv.ClientLastRequest = clientLastRequest
kv.MaxCommitIndex = maxCommitIndex
}
}
readPersist
读取存储的信息,宕机重启用,server的信息keyvalue、clientLastRequest和maxCommitIndex保存在了snapshot内。
func (kv *KVServer) readPersist(data []byte) {
if data == nil || len(data) < 1 {
return
}
DPrintf("server:%v>>重启", kv.me)
snapdata := bytes.NewBuffer(data)
datadecoder := labgob.NewDecoder(snapdata)
keyvalue := map[string]string{}
clientLastRequest := map[int64]clientLastReply{}
var maxCommitIndex int64
if datadecoder.Decode(&keyvalue) != nil || datadecoder.Decode(&clientLastRequest) != nil || datadecoder.Decode(&maxCommitIndex) != nil {
DPrintf("snapshot解析失败")
return
}
kv.Keyvalue = keyvalue
kv.ClientLastRequest = clientLastRequest
kv.MaxCommitIndex = int64(maxCommitIndex)
}
commandApplyHandler
处理raft层提交的命令并执行,将结果保存并通知等待的channel(也可能超时被释放了,但是没关系,超时之后会重新请求,可以直接直接获得结果),进而告知client。
func (kv *KVServer) commandApplyHandler(applymsg raft.ApplyMsg) {
command := applymsg.Command.(Op) //断言这个command是Op类型
logterm := applymsg.CommandTerm
logindex := applymsg.CommandIndex
clientid := command.ClientID
requestid := command.RequestID
optype := command.CommandType
key := command.Key
value := command.Value
kv.mu.Lock()
if logindex < int(kv.MaxCommitIndex) {
kv.mu.Unlock()
return
}
lastOp, ok := kv.ClientLastRequest[clientid]
if optype != GET && ok && lastOp.Requestid >= requestid { //这个命令已经执行了,同一个客户端的不同请求id肯定不同,所以如果id重复那就是请求重复
kv.MaxCommitIndex = int64(logindex)
if kv.maxraftstate != -1 {
if !kv.snapshoting {
kv.snapshoting = true
kv.snapshot(logindex)
}
}
kv.mu.Unlock()
return
} else { //执行并保存
if optype == APPEND {
kv.Keyvalue[key] = kv.Keyvalue[key] + value
}
if optype == PUT {
kv.Keyvalue[key] = value
}
//更新本服务器所保存的客户端clientid最后一条命令的执行状态
kv.ClientLastRequest[clientid] = clientLastReply{requestid, kv.Keyvalue[key]}
}
kv.MaxCommitIndex = int64(logindex)
if kv.maxraftstate != -1 {
if !kv.snapshoting {
kv.snapshot(logindex)
}
}
notifychan, ok1 := kv.notifyChan[int64(logindex)]
notifychanmsg := notifyChanMsg{}
notifychanmsg.Result = kv.Keyvalue[key]
kv.mu.Unlock()
curterm, isleader := kv.rf.GetState()
if !isleader {
notifychanmsg.err = ErrWrongLeader
}
//不能对不是本term的操作结果进行通知,因为该操作可能是其他客户端的请求,具体原因看上文Raft层的 Struct修改
if ok1 && curterm == logterm {
go func() {
select {
case notifychan <- notifychanmsg:
DPrintf("server %v>> notify client %v request %v", kv.me, clientid, requestid)
case <-time.After(time.Millisecond * 50):
}
}()
}
}
waitRequestCommit
等待操作请求结果。
func (kv *KVServer) waitRequestCommit(err *Err, value *string, notifychan chan notifyChanMsg, clientid int64, requestid int64) {
select {
case notifymsg := <-notifychan:
if notifymsg.err == ErrWrongLeader {
DPrintf("server:%v>> request: clientid:%v,requestid:%v leader false", kv.me, clientid, requestid)
*err = ErrWrongLeader
return
}
*err = OK
*value = notifymsg.Result
DPrintf("server:%v>> request: clientid:%v,requestid:%v success", kv.me, clientid, requestid)
case <-time.After(kv.timeout):
DPrintf("server:%v>> request: clientid:%v,requestid:%v timeout", kv.me, clientid, requestid)
*err = ErrTimeOut
}
}
Get、Put和Append操作
唯一需要注意的是,等待返回结果的过程中需要释放锁。
func (kv *KVServer) Get(args *GetArgs, reply *GetReply) {
// Your code here.
kv.mu.Lock()
value, ok := kv.ClientLastRequest[args.ClientID]
if ok && value.Requestid >= args.RequestID {
reply.Err = OK
reply.Value = value.ReplyMsg
kv.mu.Unlock()
return
}
kv.mu.Unlock()
index, _, isleader := kv.rf.Start(Op{GET, args.Key, "", args.ClientID, args.RequestID})
if !isleader {
reply.Err = ErrWrongLeader
return
}
kv.mu.Lock()
kv.notifyChan[int64(index)] = make(chan notifyChanMsg)
notifychan := kv.notifyChan[int64(index)]
kv.mu.Unlock()
//等待raft算法提交命令并回复
kv.waitRequestCommit(&reply.Err, &reply.Value, notifychan, args.ClientID, args.RequestID)
go func() {
kv.mu.Lock()
delete(kv.notifyChan, args.ClientID)
kv.mu.Unlock()
}()
}
func (kv *KVServer) Put(args *PutAppendArgs, reply *PutAppendReply) {
// Your code here.
kv.mu.Lock()
value, ok := kv.ClientLastRequest[args.ClientID]
if ok && value.Requestid == args.RequestID {
reply.Err = OK
kv.mu.Unlock()
return
}
kv.mu.Unlock()
index, _, isleader := kv.rf.Start(Op{PUT, args.Key, args.Value, args.ClientID, args.RequestID})
if !isleader {
reply.Err = ErrWrongLeader
return
}
kv.mu.Lock()
kv.notifyChan[int64(index)] = make(chan notifyChanMsg)
notifychan := kv.notifyChan[int64(index)]
kv.mu.Unlock()
//等待raft算法提交命令并回复
valuetmp := ""
kv.waitRequestCommit(&reply.Err, &valuetmp, notifychan, args.ClientID, args.RequestID)
go func() {
kv.mu.Lock()
delete(kv.notifyChan, args.ClientID)
kv.mu.Unlock()
}()
}
func (kv *KVServer) Append(args *PutAppendArgs, reply *PutAppendReply) {
// Your code here.
kv.mu.Lock()
value, ok := kv.ClientLastRequest[args.ClientID]
if ok && value.Requestid == args.RequestID {
reply.Err = OK
kv.mu.Unlock()
return
}
kv.mu.Unlock()
index, _, isleader := kv.rf.Start(Op{APPEND, args.Key, args.Value, args.ClientID, args.RequestID})
if !isleader {
reply.Err = ErrWrongLeader
return
}
kv.mu.Lock()
kv.notifyChan[int64(index)] = make(chan notifyChanMsg)
notifychan := kv.notifyChan[int64(index)]
kv.mu.Unlock()
//等待raft算法提交命令并回复
valuetmp := ""
kv.waitRequestCommit(&reply.Err, &valuetmp, notifychan, args.ClientID, args.RequestID)
go func() {
kv.mu.Lock()
delete(kv.notifyChan, args.ClientID)
kv.mu.Unlock()
}()
}
StartKVServer
启动KVserver服务。
func StartKVServer(servers []*labrpc.ClientEnd, me int, persister *raft.Persister, maxraftstate int) *KVServer {
labgob.Register(Op{})
kv := new(KVServer)
kv.me = me
kv.maxraftstate = maxraftstate
kv.applyCh = make(chan raft.ApplyMsg) //生成raft上传数据的channel,通过这个channel获取raft达到共识的操作
kv.rf = raft.Make(servers, me, persister, kv.applyCh) //生成一个raft server
kv.persister = persister //保存持久化接口
kv.Keyvalue = make(map[string]string)
kv.notifyChan = make(map[int64]chan notifyChanMsg)
kv.ClientLastRequest = make(map[int64]clientLastReply)
kv.timeout = time.Duration(500) * time.Millisecond
kv.snapshoting = false
kv.MaxCommitIndex = -1
kv.readPersist(kv.persister.ReadSnapshot()) //读取snapshot
go kv.applyLogs() //持续从raft层接受数据
return kv
}
总结
lab4难不难单纯看lab3实现的好不好,如果lab3实现的完美无缺,lab4只需要写server和client的代码就行了。

浙公网安备 33010602011771号