MIT6.5840 2024 Spring Lab3

MIT6.5840 2024 Spring Lab3

前言

  此lab是该课程的第三个实验,这个实验会让你实现Raft算法,最终实现一个容错的KV存储系统。该实验主要有四部分组成3A:leader选举,3B:日志,3C:持久化,3D:日志压缩。这四个部分我是单独完成的,也就是做完3A再做3B以此类推,很多人说3A和3B可以一起,不过为了后面自己看的时候不会懵逼我就分开来做了,代码也会分开展示,因为不同部分可能会用到同一段代码会有重复,所以下面的文章可能看起来全是代码。

前置知识

GO

1.select:
  select时go里面的控制语句,很像switch的结构,内部也有很多case,每个case都是接收channel信息或者发送channel信息,select会选择一个可执行case执行,如果没有可执行的case就会阻塞,如果有default,就会执default。

select {
    case <-channel1:
        //channel1成功收到数据
        code...
    case channel2 <- i:
        //channel2成功接受数据,channel2如果是无缓冲channel,如果没人执行 <-channel2,就会一直阻塞在这个位置
        code... 
    default:
        //默认执行
        code...
}

2.Timer:
  一次性的定时器,时间到达执行一次,之后需要Reset才能重新启动。

timer := time.NewTimer(1*time.MilliSecond) //返回值类型为 *time.Timer
t := <-timer.C //Time类型的channel,等定时器时间到了才会执行
code...
timer.Reset(1*time.MilliSecond) //重置时间

Raft算法Leader选举部分

  其实这部分论文原文写的非常详细了,直接看论文就行,下面这部分,3B,3C,3D通用。
Raft中文版本
Raft英文原文
Raft可视化

3A Leader选举

Raft结构体部分

type Raft struct {
	mu        sync.Mutex          // Lock to protect shared access to this peer's state
	peers     []*labrpc.ClientEnd // RPC end points of all peers
	persister *Persister          // Object to hold this peer's persisted state
	me        int                 // this peer's index into peers[]
	dead      int32               // set by Kill()
	/*****moyoj-3A******/
	statusAccessMutex sync.Mutex  //其实可以直接用上面的mu,当时没注意到,就自己定义了一个
	heartsbeatsTimer *time.Timer  //leader心跳定时器
	electionTimer *time.Timer     //选举超时定时器
	heartsbeatsTime  int          //发送一次心跳时间间隔
	electionTimeout  int          //选举超时时间
    state  int                    //0:follower 1:leader 2:candidate
    currentTerm  int              //当前任期
	votedFor  int                 //投票投给了谁,-1代表没有投票

	leaderId  int                 //当前leader的id,为了重定向
	logs  []logEntry              //日志
}

RPC消息类型

这部分跟论文一致就行

type RequestVoteArgs struct {
	// Your data here (3A, 3B).
	/*****moyoj-3A******/
	Term        int
	CandidateId int
}

// example RequestVote RPC reply structure.
// field names must start with capital letters!
type RequestVoteReply struct {
	// Your data here (3A).
	/*****moyoj-3A******/
	Term        int
	VoteGranted bool
}

type AppendEntriesRequest struct {
	/*****moyoj-3A******/
	Term         int
	LeaderId     int
	Entries      []logEntry
	PreLogIndex  int
	PreLogTerm   int
	LeaderCommit int
}

type AppendEntriesResponse struct {
	/*****moyoj-3A******/
	Term    int
	Success bool
}

electStart

  函数三个参数:启动leader选举时的term、启动leader选举的节点号、对端数量(为了防止data race),这部分我启动了多个go routine同时发送RequestVote消息,并设置了一个waitgroup等待收集到所有的结果,但是因为这个函数本身就是单独启动的routine所以整个程序并不会阻塞在这个函数,在进入该函数的同时,选举超时定时器一直启动着,如果一定时间内没有收到leader心跳或者本身选举失败(选票被瓜分)会再次启动选举。

func (rf *Raft) electStart(curterm int, me int, peerslen int) {

	var sendWaitGroup sync.WaitGroup
	voteCount := 1 //选票数,初始值为1,代表自己给自己投票
	DPrintf("[%v],begin leader election,term:%v", me, curterm)
	for i := 0; i < peerslen; i++ {
		if i == me {   //不用给自己发送投票请求
			continue
		}
		sendWaitGroup.Add(1)
		go func(serverid int) {
			var request RequestVoteArgs
			var response RequestVoteReply
			request.CandidateId = me
			request.Term = curterm
			ok := rf.sendRequestVote(serverid, &request, &response)
			if ok { //正常收到信息
				rf.statusAccessMutex.Lock()                //防止处理过程状态改变加锁
				if rf.state==2 && rf.currentTerm==curterm{ //进入处理之前,先判断term是否改变,自身的状态是否还是候选者
					if response.VoteGranted {              //对端同意投票
						voteCount += 1                     //票数加1
						if voteCount >= peerslen/2 +1{     //如果票数超过一半,
							rf.state = 1                   //变成leader
							rf.votedFor = -1               //重置投票对象为空,为了当leader变成follower的时候能正常投票
                        
							go rf.sendHeartsbeats(curterm,me,peerslen)  //立刻发送心跳给其他人
							rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime)*time.Millisecond) //重置心跳计时
						}
					} else {                                //对端拒绝投票
						if response.Term > rf.currentTerm { //对端拒绝投票的原因:1:当前term过期 2:已经投给其他人,如果term过期就会进入次if语句
							rf.currentTerm = response.Term  //更新term
							rf.state = 0                    //变成follower
							rf.votedFor = -1                //!!!!!选举失败,置成未投票状态
                            //重置选举超时计时
							rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
						}
					}
				}
				rf.statusAccessMutex.Unlock()
			}
			sendWaitGroup.Done()
		}(i)
	}
	sendWaitGroup.Wait() //等待所有请求结果
    /**进入一下代码之前,当前节点可能呢过分为多种状态 :
        1.term没变,并且变成了leader
        2.term没变,但是中途收到其他同term的leader发的心跳,变成了follower
        3.term过期,变成follower,导致此情况的可能性
            1.可能是回复消息给定的term更大
            2.其他term更大候选者请求此节点投票
            3.term更大的leader发送心跳给此节点
        4.term没变,状态仍是candidate,说明没得到足够选票,选票被瓜分了
    */
	rf.statusAccessMutex.Lock()
	if rf.state==2 && rf.currentTerm == curterm{ //处于状态4的时候,进入这次if,状态1-3不用处理因为其他routine已经正常处理了
		rf.state=0                               //变成follower
		rf.votedFor = -1                         //投票对象重置以便能继续投票
		rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
		
	}
	rf.statusAccessMutex.Unlock()
}

sendHeartsbeats

  leader发送的心跳也和投票选举一样启动多个线程一起发送,参数的含义也相同。

func (rf *Raft) sendHeartsbeats(curterm int, me int, peerslen int) {
	for i := 0; i < peerslen; i++ {
		if i == me { //跳过自己
			continue
		}
		go func(serverid int) {
			var request AppendEntriesRequest
			var response AppendEntriesResponse
			request.Entries = []logEntry{}
			request.LeaderId = me
			request.Term = curterm
			ok := rf.peers[serverid].Call("Raft.AppendEntries", &request, &response)
			if ok {
				rf.statusAccessMutex.Lock()
				if !response.Success && rf.currentTerm == curterm { //如果心跳发送回复失败,并且当前term和发送时相比没变
					rf.currentTerm = response.Term       //改为最新term
					rf.state = 0                         //变为follower
					rf.votedFor = -1                     //投票对象重置
                    //超时选举时间重置
					rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
				}
				rf.statusAccessMutex.Unlock()
			}

		}(i)
	}
}

AppendEntries

func (rf *Raft) AppendEntries(request *AppendEntriesRequest, response *AppendEntriesResponse) {

	/*****moyoj-3A******/
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if request.Term < rf.currentTerm {  //请求的term小于当前term,回复false
		response.Success = false        //失败标志
		response.Term = rf.currentTerm  //让旧leader更新自己term
		return
	}
    /**这里不能重置voteFor = -1 
    假设一种情况:节点0(term=1),发起leader选举,并且该节点成功投票给节点0,节点0成功成为leader(term=1)
    然后该节点收到节点0的心跳,并重置了voteFor,但是由于某个节点i因为网络不好没收到过节点0的任何消息,某一时刻
    网络恢复,超时选举还正好触发,宣布开始leader选举(term=1),由于该节点voteFor重置了,那么仍然能投票给i,其他的节点
    除了leader 0都能给节点i投票,成功收到半数以上的投票,i变成leader(term=1),此时就出现了两个leader,显然是不允许的!
    我代码的逻辑是,“只有自己作为candidate并且参选失败时,或者作为leader时由于term落后变为follower时,这两种情况下才会重置
    voteFor为-1”
    */
    rf.state = 0                   //变为follower
	response.Success = true        //回复成功
	rf.currentTerm = request.Term  //刷新term       
	rf.leaderId = request.LeaderId //切换leader
    //重置超时选举时间
    rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)  
}

RequestVote

func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
	// Your code here (3A, 3B).
	/*****moyoj-3A******/
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if args.Term < rf.currentTerm { //请求的term小于当前节点,直接拒绝投票,回复最新的term
		reply.VoteGranted = false
		reply.Term = rf.currentTerm

		return
	}
	if args.Term == rf.currentTerm { //如果term和当前节点相同
        //如果当前节点已经是leader 或者 已经给别人投票,也回复拒绝投票,防止出现多个leader
		if rf.state == 1 || rf.votedFor != -1{
			reply.VoteGranted = false
			reply.Term = rf.currentTerm
			return
		}
	}

	/**下面几种情况会选择投票
    1.自己的term小,那肯定会选择投票并变成follower
    2.自己的term和请求的term相同,此时肯定还没投票,自己也不是leader,这种情况极有可能是当前节点本来是
      candidate但是选票不够一半以上,变成了follower(上文重置了voteFor就是为了这种情况),正好此时另一
      个候选人投票请求到了,你就可以继续投票,你可能会说这种情况太难遇到了,但是面对复杂的网络就算很难发
      生的情况,为了稳定也要考虑!
     */
	rf.votedFor = args.CandidateId //投票候选人
	rf.currentTerm = args.Term     //更新当前term
	rf.state = 0                   //变为follower
	reply.VoteGranted = true       //同意投票
	reply.Term = args.Term         //这里设置不设置都行
    //选举超时重置
    rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
}

tricker

func (rf *Raft) ticker() {
	for rf.killed() == false {
		select {
		case <-rf.electionTimer.C:            //选举超时事件发生
			rf.statusAccessMutex.Lock()
			if rf.state==0 || rf.state==2{    //当前状态为follower或者candidate
				rf.state=2                    //声明为candidate  
				rf.votedFor = rf.me           //给自己投票,防止给其他candidate投票,导致多个leader 
				rf.currentTerm+=1             //term增加(逻辑时钟肯定要往前)
				go rf.electStart(rf.currentTerm,rf.me,len(rf.peers)) //启动选举
                //重设选举超时计时
				rf.electionTimer.Reset(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
			}
			rf.statusAccessMutex.Unlock()
		case <-rf.heartsbeatsTimer.C:        //定期发送心跳事件
			rf.statusAccessMutex.Lock()      
			if rf.state==1{                  //如果当前还是leader
				go rf.sendHeartsbeats(rf.currentTerm,rf.me,len(rf.peers)) //启动发送心跳routine
                //重设心跳超时计时
				rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime)*time.Millisecond)
			}
			rf.statusAccessMutex.Unlock()
		}
	}
}

Make

func Make(peers []*labrpc.ClientEnd, me int,
	persister *Persister, applyCh chan ApplyMsg) *Raft {
	rf := &Raft{}
	rf.peers = peers
	rf.persister = persister
	rf.me = me //可以作为唯一标识符

	// Your initialization code here (3A, 3B, 3C).
	/*****moyoj-3A******/
	rf.currentTerm = 0   //初始term为0
	rf.votedFor = -1     //初始未投票
	rf.state = 0         //初始都为follower 
	rf.logs = []logEntry{}
	rf.heartsbeatsTime = 110 //心跳时间间隔(ms)
	rf.electionTimeout = 1000 //选举超时时间[1000,2000]之间,随机产生
    //随机选举超时时间,防止瓜分选票迟迟选不出leader
	rf.electionTimer = time.NewTimer(time.Duration(rf.electionTimeout + rand.Intn(rf.electionTimeout))*time.Millisecond)
	rf.heartsbeatsTimer = time.NewTimer(time.Duration(rf.heartsbeatsTime)*time.Millisecond)

	// initialize from state persisted before a crash
	rf.readPersist(persister.ReadRaftState())
	// start ticker goroutine to start elections
	go rf.ticker()

	return rf
}

压测结果

img

3B日志

修改部分

该部分只给出原本的函数或者结构体修改的部分,可能会看到原本就有的代码是为了给出修改代码的相对位置。

Raft结构体部分

	/*****moyoj-3B******/
	//to test
	applyChan chan ApplyMsg //将已经commit的日志发送到该通道,用于通过测试

	commitIndex int        //当前节点已知的已经commit的最后一条日志的index
	lastApplied int        //当前节点已经应用到状态机的最后一条日志的index
	//leader
	nextIndex  []int       //记录每个节点的需要发送到该节点的下一条日志index
	matchIndex []int       //记录每个节点的已经和leader匹配上的最后一条日志的index

RPC消息类型

1.RequestVoteArgs

	/*****moyoj-3B******/
	LastLogIndex int       //candidate最后一条日志的index
	LastLogTerm  int       //candidate最后一条日志的term

2.AppendEntriesResponse

	/*****moyoj-3B******/
	FastBack int           //为了让leader快速更新follower的nextIndex

Maske

	/*****moyoj-3B******/
	rf.commitIndex = -1    
	rf.lastApplied = -1
	rf.nextIndex = make([]int, len(rf.peers))
	rf.matchIndex = make([]int, len(rf.peers))
	rf.applyChan = applyCh
	go rf.applyEntries(20) //20ms apply一次日志到状态机
    
    // initialize from state persisted before a crash
	rf.readPersist(persister.ReadRaftState())
	// start ticker goroutine to start elections
	go rf.ticker()

	return rf

ticker

case <-rf.electionTimer.C:
	rf.statusAccessMutex.Lock()
	if rf.state == 0 || rf.state == 2 {
		rf.state = 2
		rf.votedFor = rf.me
		rf.currentTerm += 1

		/*****moyoj-3B******/
		lastLogIndex := -1    //进行leader选举为了确保安全性,candidate需要发送自己的最后一条日志的index和term
		lastLogTerm := -1
		if len(rf.logs) != 0 {                         //没有日志就默认-1
			lastLogIndex = len(rf.logs) - 1            //最后一条日志的index
			lastLogTerm = rf.logs[len(rf.logs)-1].Term //最后一条日志的term
		}

		go rf.electStart(rf.currentTerm, lastLogIndex, lastLogTerm, rf.me, len(rf.peers))
		rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	}
	rf.statusAccessMutex.Unlock()

RequestVote(函数末尾)

  新增的部分主要是为了确保日志复制的安全,确保新的领导人日志最新,候选人日志记录必须不旧于(大于或者等于)大部分节点的日志记录,何为较新?最后一条日志term更大的日志记录更新 或者 最后一条日志term相等但是日志记录数目多的更新。

以上两点共同确保了安全性

	reply.VoteGranted = true  //默认为true:投票给该请求candidate
	if len(rf.logs) != 0 && rf.logs[len(rf.logs)-1].Term > args.LastLogTerm { //请求者最后一条日志term落后
		reply.VoteGranted = false

	}
	//请求者最后一条日志term与当前节点相同,但是日志数目较少
	if len(rf.logs) != 0 && rf.logs[len(rf.logs)-1].Term == args.LastLogTerm && args.LastLogIndex < len(rf.logs)-1 {
		reply.VoteGranted = false
	}
    //如果投票成功则更新voteFor,重置选举超时时间
	if reply.VoteGranted{
		//更新leaderid和term
		rf.votedFor = args.CandidateId //投票候选人
		rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	}
	rf.currentTerm = args.Term
	rf.state = 0
	reply.Term = args.Term

AppendEntries

增加了检测leader发送的新日志前面的日志放入term是否与follower匹配,如果不匹配则追加日志失败,leader的nextIndex应该后移到与follower对应位置日志匹配的地方,匹配指的是该位置日志的index相同,term也相同。如果发送的新日志前面没有日志,也视作匹配成功,就直接把follower的日志全都替换成leader发送的日志。

	/*****moyoj-3B******/
	response.Success = true //默认追加日志(或者心跳)成功
    
	if rf.matchNewEntries(request.Entries, request.PreLogIndex, request.PreLogTerm,response) {
		//只有成功匹配上leader的日志,才能更新commitIndex,如果没有成功匹配上日志说明当前节点日志与leader不同步,日志可能落后,如果更新commitIndex可能会导致旧的日志提交到状态机
		if request.LeaderCommit > rf.commitIndex {
			if len(rf.logs)-1 < request.LeaderCommit { //选择当前节点最后一个新日志的index和leader的commitIndex更小的那个
				rf.commitIndex = len(rf.logs) - 1
			} else {
				rf.commitIndex = request.LeaderCommit
			}
		}
	}
    
	rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	rf.currentTerm = request.Term
	rf.state = 0
	rf.leaderId = request.LeaderId
	response.Term = request.Term

sendHeartsbeats

参数增加一个(leaderCommit int)传入当前leader的commitIndex(leader已知的已经提交日志的最大index),该函数主要增加了发送心跳的同时携带日志发送到follower,并对commitIndex、nextIdex和matchIndex进行更新。

go func(serverid int) {
	var request AppendEntriesRequest
	var response AppendEntriesResponse
	request.Entries = []logEntry{}
	request.LeaderId = me
	request.Term = curterm
	request.LeaderCommit = leaderCommit

	/*****moyoj-3B******/
	request.PreLogIndex = -1 //最前面要追加日志的前一条日志index默认为-1
	request.PreLogTerm = -1
	rf.statusAccessMutex.Lock()
	if rf.currentTerm != curterm {
		rf.statusAccessMutex.Unlock()
		return
	}
	nextIndex := rf.nextIndex[serverid]
	//一次性发送没发送的日志
	request.Entries = rf.logs[nextIndex:len(rf.logs)] //获取要追加的日志
	request.PreLogIndex = nextIndex - 1               
	if (nextIndex - 1) >= 0 {                         //存在前一条日志
		request.PreLogTerm = rf.logs[nextIndex-1].Term
	}
	rf.statusAccessMutex.Unlock()

	ok := rf.peers[serverid].Call("Raft.AppendEntries", &request, &response)

    if ok {
    	rf.statusAccessMutex.Lock()
    	defer rf.statusAccessMutex.Unlock()
    	if rf.currentTerm == curterm {
    		if !response.Success {
    			if rf.currentTerm < response.Term {
    				rf.currentTerm = response.Term
    				rf.state = 0
    				rf.votedFor = -1
    				rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
    			} else { //term并没落后,说明是日志不匹配导致的失败
    				/*****moyoj-3B******/
    				//nextIndex后移,定位到与该follower日志匹配的地方
    				rf.nextIndex[serverid] = response.FastBack
    			}
    		} 
    
    		if len(request.Entries)==0 || !response.Success{ //如果追加日志为空说明是心跳就不需要更新了,如果追加失败也不需要更新
    			return
    		}
    		//成功发送日志追加
    		/*****moyoj-3B******/
    		rf.nextIndex[serverid] = rf.nextIndex[serverid] + len(request.Entries) //下一条要发送到该follower的日志index更新
    		rf.matchIndex[serverid] = rf.nextIndex[serverid] - 1                
    
    		//这里更新一下commitIndex
    		for maxMatchIndex := rf.commitIndex; maxMatchIndex < len(rf.logs); maxMatchIndex++ {
            // 领导人只能对自己任期内的日志commit,不会对旧term的日志进行提交(如果提交旧的日志就会碰到论文5.4.2的情况),新领导人对自己任期内新日志提交会顺带把旧的提交。这里所谓的提交其实就是领导人更新commitIndex!只有当前term有新的日志产生才会更新commitIndex,非常重要!!
    			if maxMatchIndex == -1 || rf.logs[maxMatchIndex].Term != rf.currentTerm { 
    				continue
    			}
    			//统计该该位置日志在对端的复制情况
    			count := 1 //算上自己
    			for peersIndex := 0; peersIndex < len(rf.peers); peersIndex++ {
    				if peersIndex == me {
    					continue
    				}
                    //当前这个follower的最后一个匹配日志的index大于等于当前验证的matchIndex
    				if maxMatchIndex <= rf.matchIndex[peersIndex] {
    					count++
    				}
    			}
    			//对于当前Index的日志的复制情况,自身加上follower超过了一半则更新commitIndex
    			if count >= len(rf.peers)/2+1 {
    				rf.commitIndex = maxMatchIndex
    			}
    		}
    	}
    }
}

electStart

主要增加了(lastLogIndex int, lastLogTerm int)两个参数,leader选举之前传入最后一条日志的term和index为了保证选举安全,不会选出日志比较落后的leader,成为leader时更新nextIndex和matchIndex。

go func(serverid int) {
	var request RequestVoteArgs
	var response RequestVoteReply
	request.CandidateId = me
	request.Term = curterm

	/*****moyoj-3B******/
	request.LastLogIndex = lastLogIndex
	request.LastLogTerm = lastLogTerm

	ok := rf.sendRequestVote(serverid, &request, &response)

	if ok {
		rf.statusAccessMutex.Lock()

		if rf.state == 2 && rf.currentTerm == curterm {
			if response.VoteGranted {
				voteCount += 1
				if voteCount >= peerslen/2+1 {
					rf.state = 1
					rf.votedFor = -1

					/*****moyoj-3B******/
					//重置next和match数组
					for index := 0; index < len(rf.peers); index++ {
						rf.nextIndex[index] = len(rf.logs)         //每个follower下一条要发送日志的index更新成自身日志的总数
						rf.matchIndex[index] = -1                  //每个follower已经和leader匹配的最后一条日志index
					}

				
					go rf.sendHeartsbeats(curterm, rf.commitIndex, me, peerslen)
					rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime) * time.Millisecond)
				}
			} else if response.Term > rf.currentTerm{ //是因为term落后导致的投票失败,把自身变成follower,日志落后不需要变成follower,因为可能只是和当前发送的follower相比日志落后,但和其他的比未必落后。
				rf.currentTerm = response.Term
				rf.state = 0
				rf.votedFor = -1 //!!!!!选举失败,置成未投票状态
				rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
			}
		}
		rf.statusAccessMutex.Unlock()
	}
	sendWaitGroup.Done()
}(i)

新增函数

Start

追加一条新的command到leader节点,返回值:

  1. command如果提交成功对应日志的Index
  2. 追加命令时leader的term
  3. 该节点是否为leader
func (rf *Raft) Start(command interface{}) (int, int, bool) {
	index := -1
	term := -1
	isLeader := true

	// Your code here (3B).
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if rf.state != 1 {
		isLeader = false
	} else {
		rf.logs = append(rf.logs, logEntry{command, rf.currentTerm})

		index = len(rf.logs)     //返回追加命令对应日志的Index,测试默认初始index为1,所以返回日志个数就是新增命令对应日志的index
		term = rf.currentTerm    //返回当前term
	}
	return index, term, isLeader
}

applyEntries

向服务层提交达到共识的操作。

func (rf *Raft) applyEntries(sleep int) {
	for !rf.killed(){ //当前进程没被杀死
		time.Sleep(time.Duration(sleep)*time.Millisecond)
		rf.statusAccessMutex.Lock()
		for ; rf.lastApplied < rf.commitIndex; rf.lastApplied++ {
			var sendApply ApplyMsg
			sendApply.Command = rf.logs[rf.lastApplied+1].Command //待apply的command
			sendApply.CommandIndex = rf.lastApplied + 2           //待apply的command的index(因为论文默认index初始为1,所以这里+2)
			sendApply.CommandValid = true                         

			rf.applyChan <- sendApply
		}
		rf.statusAccessMutex.Unlock()
	}
}

matchNewEntries

判断日志是否匹配指的是,当leader发送新的日志是否和当前存在的日志同index下term也相同。

func (rf *Raft) matchNewEntries(Entries []logEntry, preLogIndex int, preLogTerm int,response *AppendEntriesResponse) bool {

	if preLogIndex != -1 && len(rf.logs) <= preLogIndex { //该节点的所有日志中没有下标为preLogIndex的项,也就是当前节点日志没这么多
		response.Success = false
		response.FastBack = len(rf.logs)                  //让leader快速定位应该发送的下一个日志的位置
		return false
	}
	if preLogIndex != -1 && rf.logs[preLogIndex].Term != preLogTerm { //同下标的日志项的term与新日志的term不匹配
		response.FastBack = rf.commitIndex+1 //让leader的nextIndex直接跳到当前节点的commitIndex+1处,因为commitIndex处的日志肯定和leader匹配,0-commitIndex的所有日志都是已经复制到大多数节点的日志,如果想成为leader,至少包含这些日志,否则不可能成为leader
		response.Success = false
		return false
	} 
    //匹配成功,preLogIndex=-1说明当前节点没有日志或者包含的日志全都与leader不匹配(直接全删掉就行,leader的日志最优先)
	response.Success = true
	rf.logs = rf.logs[0 : preLogIndex+1] //follower匹配的日志后面的所有日志删除掉
	rf.logs = append(rf.logs, Entries...) //追加leader发来的日志

	return true
}

压测结果

1000次稳定运行
img

3C 持久化

3C实现是实现日志持久化,课程给出了对应的函数,只需要稍作修改,然后在原来的代码合适的位置调用就能完成3C,难不难全看3A,3B实现的如何,如果3A,3B稳定运行1000次以上不出错,3C就很少会遇到bug,至少在我实现3C的时候只遇到一个bug需要修改之前的代码。

修改部分

只要需要持久化的数据改变,就应该调用一下persist保存该数据。

electStart

if response.VoteGranted {
	voteCount += 1
	if voteCount >= peerslen/2+1 {
		rf.state = 1
		rf.votedFor = -1

		/*****moyoj-3C******/
		rf.persist()

		/*****moyoj-3B******/
		code....
        ........
	}
} else if response.Term > rf.currentTerm {
	code....
    ......
	/*****moyoj-3C******/
	rf.persist()
}

code...
......

if rf.state == 2 && rf.currentTerm == curterm {
	code....
	.......
	/*****moyoj-3C******/
	rf.persist()

}

sendHeartsbeats

在我遇到的唯一一个bug里,就是发生在这个函数中更新nextIndex的地方,该bug会导致下一次往这个follower发送日志时导致切片越界。假设可能的一种情况如图:
img
  这只是一种可能,由于不太了解go的rpc调用具体细节,我无法给出这种情况发生的具体原因,我只能给出猜测。这个实验的调用都是模拟出,宕机也是模拟出的,如果让我写一个rpc远程调用框架,那我肯定会用TCP连接服务端,这样客户端发起的请求和返回的结果按理说都是有序的(先发的请求,回复的结果先收到),不会出现这种先发的请求,请求结果后到的情况。我本以为是这原因,但实际上就算有序到达也有可能会导致切片越界的情况,但仔细考虑之后发现主要原因可能是:允许leader对同一个follower追击日志的routine可以有多个并行导致的,如果同一时间只允许对一个follower启动一个routine进行日志追加就绝对不会出现:多个日志追加请求对同一个follower追加同一段日志的情况。
  因为追加日志的routine都是并发运行,可能在某个请求返回结果更新当前节点的nextIndex前,另一个请求就发送出去了,然后对方处理完之后很快就返回了请求结果,导致两个请求结果几乎同时返回,然后抢夺rf.statusAccessMutex这把锁,先进入的先更新nextIndex,后面进入的在此基础上更新nextIndex,这就会导致错误,因为两个请求可能是对同一段日志进行的追加请求,返回的结果应该只应用到nextIndex上一次而不是两次。最理想的请求情况应该如下:
img
上图实现其实也好实现,只需要对每个follower设一把锁,当针对某个follower发送的请求得到回复并运用到对应的nextIndex时才释放锁,才允许开始处理并发送下一个针对该follower的请求。
  以上都是我的猜测,真正的原因我不确定,如果有佬看到愿意给出解答我将十分感激!

if ok {
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if rf.currentTerm == curterm {
		if !response.Success {
			if rf.currentTerm < response.Term {
				code...
                ......
				/*****moyoj-3C******/
				rf.persist()

			} else {
				/*****moyoj-3B******/
				//nextIndex后移
				rf.nextIndex[serverid] = response.FastBack
			}
		}
		if len(request.Entries) == 0 || !response.Success {
			return
		}
		//成功发送日志追加
		/*****moyoj-3B 3C******/
        //3C Figure8(unreliable)bug1:更新rf.nextIndex前检查一下当前rf.nextIndex[serverid]是否还和发送日志时一样,如果不一样说明,在等待Call返回期间改变了(已经成功追加日志到follower),因为调用Call的时候并没有加锁,对同一个follower可能会多次调用,这次调用的返回结果就是过期的。
		if nextIndex == rf.nextIndex[serverid]{ //rf.nextIndex[serverid]没有变更,还是和当时发送消息时一样
			rf.nextIndex[serverid] = nextIndex + len(request.Entries)
		}
		rf.matchIndex[serverid] = rf.nextIndex[serverid] - 1

		//这里更新一下commitIndex
		code...
        .......
	}
}

AppendEntries

unc (rf *Raft) AppendEntries(request *AppendEntriesRequest, response *AppendEntriesResponse) {

	/*****moyoj-3A******/
	code...
    .....
    /*****moyoj-3B******/
    code....
    ......
	/*****moyoj-3C******/
	rf.persist()
}

RequestVote

func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {

	/*****moyoj-3A******/
	code...
    .......

	/*****moyoj-3B******/
	code...
    ......

	/*****moyoj-3C******/
	rf.persist()
}

Start

func (rf *Raft) Start(command interface{}) (int, int, bool) {
	code....
    ........
	if rf.state != 1 {
		isLeader = false
	} else {
		code...
        ......
		/*****moyoj-3C******/
		rf.persist()
	}
	return index, term, isLeader
}

ticker

case <-rf.electionTimer.C:
	rf.statusAccessMutex.Lock()
	if rf.state == 0 || rf.state == 2 {
		code...
        ......

		/*****moyoj-3C******/
		rf.persist()

		/*****moyoj-3B******/
		code...
        ......
	}
	rf.statusAccessMutex.Unlock()

新增函数

persiste

持久化信息。

func (rf *Raft) persist() {
	/*****moyoj-3C******/
	w := new(bytes.Buffer)
	e := labgob.NewEncoder(w)
	e.Encode(rf.currentTerm)
	e.Encode(rf.votedFor)
	e.Encode(rf.logs)
	raftstate := w.Bytes()
	rf.persister.Save(raftstate, nil)
	
}

readPersist

读取持久化信息。

func (rf *Raft) readPersist(data []byte) {
	if data == nil || len(data) < 1 { // bootstrap without any state?
		return
	}
	/*****moyoj-3C******/
	r := bytes.NewBuffer(data)
	d := labgob.NewDecoder(r)
	var currentTerm int
	var votrFor int
	var logs []logEntry
	if d.Decode(&currentTerm) != nil || d.Decode(&votrFor) != nil || d.Decode(&logs) != nil {
		DPrintf("持久化数据解析失败")
	} else {
		rf.currentTerm = currentTerm
		rf.votedFor = votrFor
		rf.logs = logs
	}
	
}

压测结果

因为运行时间比较长,我就测试了500次。
img

3D 日志压缩

这部分修改了非常多的代码,只要涉及了日志index几乎都修改了,因为多了快照这一部分,logs切片的下标和实际日志的index就没有固定的对应关系,实际上我的代码logs切片的下标就对应了日志的下标(index从0开始),不过我在往channel发送数据的时候日志index都加了1,代表日志index是从1开始的,但是加了快照之后就不行了,假设快照已经存储最后一条日志index为10,原本logs切片中index=15的日志在新的logs切片中index就是15-10-1=4,这是造成代码大量修的的原因之一,还有一部分原因是新增快照功能,不管是定时往channel上传日志还是发送日志的rpc请求都需要修改。所以我在单独给出新增函数的代码后,我会把整个raft.go文件的代码一次贴出来。

可能会遇到的死锁问题

在实验中你可能会需要定期的向applyCh chan ApplyMsg中发送日志,也就是applyEntries函数,可以看出我在3B和3D中applyEntries代码不一样,最大的区别就是,3B中往channel发送日志的时候,是一直获取着锁,但是3D中我在往channel发送数据的时候先释放了锁,也就是用各个变量的副本传输的数据,等发送完成再获取锁更改变量。当然这样可以提高性能,但主要的原因是如果不释放锁,会导致测试文件与raft.go文件互相等待从而导致死锁。
在config.go的applierSnap函数中,有这样一段代码:

	for m := range applyCh {
		err_msg := ""
		if m.SnapshotValid {
			cfg.mu.Lock()
			err_msg = cfg.ingestSnap(i, m.Snapshot, m.SnapshotIndex)
			cfg.mu.Unlock()
		} else if m.CommandValid {
			if m.CommandIndex != cfg.lastApplied[i]+1 {
				err_msg = fmt.Sprintf("server %v apply out of order, expected index %v, got %v", i, cfg.lastApplied[i]+1, m.CommandIndex)
			}

			if err_msg == "" {
				cfg.mu.Lock()
				var prevok bool
				err_msg, prevok = cfg.checkLogs(i, m)
				cfg.mu.Unlock()
				if m.CommandIndex > 1 && prevok == false {
					err_msg = fmt.Sprintf("server %v apply out of order %v", i, m.CommandIndex)
				}
			}

			cfg.mu.Lock()
			cfg.lastApplied[i] = m.CommandIndex
			cfg.mu.Unlock()

			if (m.CommandIndex+1)%SnapShotInterval == 0 {
				w := new(bytes.Buffer)
				e := labgob.NewEncoder(w)
				e.Encode(m.CommandIndex)
				var xlog []interface{}
				for j := 0; j <= m.CommandIndex; j++ {
					xlog = append(xlog, cfg.logs[i][j])
				}
				e.Encode(xlog)
				rf.Snapshot(m.CommandIndex, w.Bytes()) 
			}
		} else {
			// Ignore other types of ApplyMsg.
		}
		if err_msg != "" {
			log.Fatalf("apply error: %v", err_msg)
			cfg.applyErr[i] = err_msg
			// keep reading after error so that Raft doesn't block
			// holding locks...
		}
	}

可以看到这个函数是一直从channel中等待日志,也就是等待我们实现的applyEntries函数中发送给channel的日志,如果没有日志发送到channel,for循环就会阻塞,在循环体内,调用了rf.Snapshot也就是我们需要实现的那部分代码,而在Snapshot代码中我们又上锁了临界区,假设一种情况:某个时刻applyEntries正在上传日志到channel,之后上面的代码 if (m.CommandIndex+1)%SnapShotInterval == 0 这个条件满足后会调用Snapshot,但是因为锁现在在applyEntries手上,他在发送日志呢,所以Snapshot会阻塞在这,上述代码的for循环也会停到这个位置,这又导致了没有人从channel中获取数据,这是个无缓冲的channel,没人从channel取数据,发送数据到channel的那一端就会阻塞,也就是applyEntries会阻塞在rf.applyChan <- sendApply,导致它持有锁迟迟没法释放,这就导致了死锁(关系如下),所以往channel发送数据的时候需要先释放锁,等发送完成再获得锁进行后续操作。
img

新增RPC消息类型

type InstallSnapshotRequest struct {
	/*****moyoj-3D******/
	Term             int
	LeaderId         int 
	LastIncludeIndex int //快照最后一个日志的index
	LastIncludeTerm  int //快照最后一个日志的term
	// Offset           int 
	// Data             []byte
	// Done             bool
}
type InstallSnapshotResponse struct {
	/*****moyoj-3D******/
	Term int
}

新增函数

InstallSnapshot

follower落后,给follower传送leader的snapshot。

func (rf *Raft) InstallSnapshot(request *InstallSnapshotRequest, response *InstallSnapshotResponse) {
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if request.Term < rf.currentTerm { //请求term落后
		response.Term = rf.currentTerm
		return
	}
	curindex := request.LastIncludeIndex - rf.lastSnapshotIndex - 1 //快照中最后一条日志的index在当前节点的logs切片中对应的index
    //造成这种情况的原因就是因为模拟网络延迟,同一部分快照发送了多次,同时到达follower,一个请求完成后lastSnapshotIndex会更新,第二个同样的请求计算出的curindex就会成负数,正常情况下curindex>=0
	if curindex < 0 { 
		response.Term = rf.currentTerm
		return
	}
	if curindex < len(rf.logs) { //比当前节点logs长度小,说明当前logs切片最后日志的index比请求中的index更大(或者相等)
        //如果请求的index在当前切片对应位置的日志term与请求的term一致,则保留后续日志,否则全部删除,同样的index但是term却和leader的不一样说明这条日志以及后面的日志全都过期了
		if rf.logs[curindex].Term != request.Term { 
			rf.logs = make([]logEntry, 0)
		} else {
			logs := rf.logs[curindex+1:]
			rf.logs = make([]logEntry, len(rf.logs)-curindex-1)
			copy(rf.logs, logs)
		}
	} else { //比当前节点logs长度大,说明当前节点的所有日志在leader中已经全都形成快照,所以直接把logs清空,快照直接用leader的快照
		rf.logs = make([]logEntry, 0)
	}
    
    //更新状态
	rf.lastSnapshotIndex = request.LastIncludeIndex
	rf.lastSnapshotTerm = request.LastIncludeTerm
	rf.lastApplied = request.LastIncludeIndex
	rf.commitIndex = request.LastIncludeIndex

    //当成一次心跳,所以需要重置选举超时时间
	rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	rf.currentTerm = request.Term
	rf.state = 0
	rf.leaderId = request.LeaderId
	response.Term = request.Term
    
    //持久化
	rf.persist(request.Data)
    
    //apply snapshot
	go func() {
		var sendApply ApplyMsg
		sendApply.CommandValid = false
		sendApply.Snapshot = request.Data
		sendApply.SnapshotIndex = rf.lastSnapshotIndex + 1
		sendApply.SnapshotTerm = rf.lastSnapshotTerm
		sendApply.SnapshotValid = true
		rf.applyChan <- sendApply
	}()

}

updateNextMatch

更新nextIndex和matchIndex,防止发送心跳函数的代码过于冗余就把更新nextIndex和matchIndex的代码单独拿出来了,参数是leader的id。

/*****moyoj-3D******/
func (rf *Raft) updateNextMatch(me int) {
	for maxMatchIndex := rf.commitIndex + 1; maxMatchIndex-rf.lastSnapshotIndex-1 < len(rf.logs); maxMatchIndex++ {
		if rf.logs[maxMatchIndex-rf.lastSnapshotIndex-1].Term != rf.currentTerm { //不对旧term的日志进行提交,通过对当前term的日志提交顺便提交旧term的日志								continue
			continue
		}
		//统计该该位置日志在对端的复制情况
		count := 1
		for peersIndex := 0; peersIndex < len(rf.peers); peersIndex++ {
			if peersIndex == me {
				continue
			}
			if maxMatchIndex <= rf.matchIndex[peersIndex] {
				count++
			}
		}
		//对于该下标日志的复制情况,自身加上对端超过了一半更新commit
		if count >= len(rf.peers)/2+1 {
			
			rf.commitIndex = maxMatchIndex
		}
	}
}

压测结果

本以为速度有点慢,但是看mit官方的运行结果大差不差的我就没继续优化了。

img

下图是四个部分都运行一次的总时间。

img

所有代码

/*****moyoj-3A******/
type logEntry struct {
	Command interface{}
	Term    int
}

// A Go object implementing a single Raft peer.
type Raft struct {
	mu        sync.Mutex          // Lock to protect shared access to this peer's state
	peers     []*labrpc.ClientEnd // RPC end points of all peers
	persister *Persister          // Object to hold this peer's persisted state
	me        int                 // this peer's index into peers[]
	dead      int32               // set by Kill()

	// Your data here (3A, 3B, 3C).
	// Look at the paper's Figure 2 for a description of what
	// state a Raft server must maintain.
	/*****moyoj-3A******/
	statusAccessMutex sync.Mutex
	//electionTimeout >> heartsbeatsTime
	heartsbeatsTimer *time.Timer
	electionTimer    *time.Timer
	heartsbeatsTime  int //发送一次心跳时间间隔
	electionTimeout  int //选举超时时间(超过这个时间(实验要求大于100ms),该服务器就作为候选人开始选举)

	leaderId int //当前leader的id
	state    int //0:follower 1:leader 2:candidater

	/*****moyoj-3B******/
	//to test
	applyChan   chan ApplyMsg
	commitIndex int
	lastApplied int
	//leader
	nextIndex  []int
	matchIndex []int

	/*****moyoj-3D******/
	lastSnapshotIndex int //快照中最后一个日志的下标
	lastSnapshotTerm  int //快照中最后一个日志的term

	currentTerm int
	votedFor    int
	logs        []logEntry
}

func (rf *Raft) GetState() (int, bool) {

	var term int
	var isleader bool
	// Your code here (3A).
	/*****moyoj-3A******/
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	term = rf.currentTerm
	isleader = rf.state == 1
	return term, isleader
}

func (rf *Raft) persist(snapshot []byte) {
	/*****moyoj-3C******/
	w := new(bytes.Buffer)
	e := labgob.NewEncoder(w)
	e.Encode(rf.currentTerm)
	e.Encode(rf.votedFor)
	e.Encode(rf.logs)

	/*****moyoj-3D******/
	e.Encode(rf.lastSnapshotIndex)
	e.Encode(rf.lastSnapshotTerm)

	raftstate := w.Bytes()
	rf.persister.Save(raftstate, snapshot)
}

// restore previously persisted state.
func (rf *Raft) readPersist(data []byte) {
	if data == nil || len(data) < 1 { // bootstrap without any state?
		return
	}
	/*****moyoj-3C******/
	r := bytes.NewBuffer(data)
	d := labgob.NewDecoder(r)
	var currentTerm int
	var votrFor int
	var logs []logEntry

	/*****moyoj-3D******/
	var lastSnapshotIndex int
	var lastSnapshotTerm int
	if d.Decode(&currentTerm) != nil || d.Decode(&votrFor) != nil || d.Decode(&logs) != nil || d.Decode(&lastSnapshotIndex) != nil || d.Decode(&lastSnapshotTerm) != nil {
		DPrintf("持久化数据解析失败")
	} else {
		rf.currentTerm = currentTerm
		rf.votedFor = votrFor
		rf.logs = logs
		rf.lastSnapshotIndex = lastSnapshotIndex
		rf.lastSnapshotTerm = lastSnapshotTerm

		//初始化为snapshot最后一条日志的下标,否则crash重启applyEntries会越界
		rf.lastApplied = lastSnapshotIndex
		rf.commitIndex = lastSnapshotIndex
	}
}


func (rf *Raft) Snapshot(index int, snapshot []byte) {
	// Your code here (3D).
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if index-1 <= rf.lastSnapshotIndex {
		return
	}
	if index-1 > rf.commitIndex {
		return
	}

	rf.lastSnapshotTerm = rf.logs[index-1-rf.lastSnapshotIndex-1].Term
	temp := rf.logs[index-rf.lastSnapshotIndex-1:]
	rf.lastSnapshotIndex = index - 1
	rf.logs = make([]logEntry, len(temp))
	copy(rf.logs, temp)
	rf.persist(snapshot)
	
}

type RequestVoteArgs struct {
	// Your data here (3A, 3B).
	/*****moyoj-3A******/
	Term        int
	CandidateId int

	/*****moyoj-3B******/
	LastLogIndex int
	LastLogTerm  int
}

type RequestVoteReply struct {
	// Your data here (3A).
	/*****moyoj-3A******/
	Term        int
	VoteGranted bool
}

type AppendEntriesRequest struct {
	/*****moyoj-3A******/
	Term         int
	LeaderId     int
	Entries      []logEntry
	PreLogIndex  int
	PreLogTerm   int
	LeaderCommit int
}

type AppendEntriesResponse struct {
	/*****moyoj-3A******/
	Term    int
	Success bool
	/*****moyoj-3B******/
	FastBack int
}

type InstallSnapshotRequest struct {
	/*****moyoj-3D******/
	Term             int
	LeaderId         int
	LastIncludeIndex int
	LastIncludeTerm  int
	Offset           int
	Data             []byte
	Done             bool
}
type InstallSnapshotResponse struct {
	/*****moyoj-3D******/
	Term int
}

func (rf *Raft) electStart(curterm int, lastLogIndex int, lastLogTerm int, me int, peerslen int) {

	var sendWaitGroup sync.WaitGroup
	voteCount := 1 //选票数

	for i := 0; i < peerslen; i++ {
		if i == me {
			continue
		}
		sendWaitGroup.Add(1)
		go func(serverid int) {
			var request RequestVoteArgs
			var response RequestVoteReply
			request.CandidateId = me
			request.Term = curterm

			/*****moyoj-3B******/
			request.LastLogIndex = lastLogIndex
			request.LastLogTerm = lastLogTerm

			ok := rf.sendRequestVote(serverid, &request, &response)

			if ok {
				rf.statusAccessMutex.Lock()
				if rf.state == 2 && rf.currentTerm == curterm {
					if response.VoteGranted {
						voteCount += 1
						if voteCount >= peerslen/2+1 {
							rf.state = 1
							rf.votedFor = -1

							/*****moyoj-3C******/
							rf.persist(rf.persister.ReadSnapshot())

							/*****moyoj-3B******/
							//重置next和match数组
							for index := 0; index < len(rf.peers); index++ {
								rf.nextIndex[index] = len(rf.logs) + rf.lastSnapshotIndex + 1
								rf.matchIndex[index] = -1
							}

							go rf.sendHeartsbeats(curterm, rf.commitIndex, me, peerslen)
							rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime) * time.Millisecond)
						}
					} else if response.Term > rf.currentTerm {
						rf.currentTerm = response.Term
						rf.state = 0
						rf.votedFor = -1 //!!!!!选举失败,置成未投票状态
						rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)

						/*****moyoj-3C******/
						rf.persist(rf.persister.ReadSnapshot())
					}
				}
				rf.statusAccessMutex.Unlock()
			}
			sendWaitGroup.Done()
		}(i)
	}
	sendWaitGroup.Wait()
	rf.statusAccessMutex.Lock()
	if rf.state == 2 && rf.currentTerm == curterm {
		rf.state = 0
		rf.votedFor = -1 //!!!选举失败变为-1
		rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)

		/*****moyoj-3C******/
		rf.persist(rf.persister.ReadSnapshot())

	}
	rf.statusAccessMutex.Unlock()
}

func (rf *Raft) sendHeartsbeats(curterm int, leaderCommit int, me int, peerslen int) {
	for i := 0; i < peerslen; i++ {
		if i == me {
			continue
		}
		go func(serverid int) {
			rf.statusAccessMutex.Lock()
			if rf.currentTerm != curterm {
				rf.statusAccessMutex.Unlock()
				return
			}
			nextIndex := rf.nextIndex[serverid]

			//说明要发送的日志已经形成快照,follower远远落后,直接发送installsnapshotrpc了,这也是为什么论文指出installsnapshotrpc当成一次心跳的原因,就是在发送追加日志(心跳)的途中发现没法追加时才发送installSnapshot rpc
			if nextIndex <= rf.lastSnapshotIndex {
				//发送快照
				var request InstallSnapshotRequest
				var response InstallSnapshotResponse

				curLastSnapshotIndex := rf.lastSnapshotIndex

				request.Data = rf.persister.ReadSnapshot()
				request.LastIncludeIndex = curLastSnapshotIndex
				request.LeaderId = me
				request.Term = curterm
				rf.statusAccessMutex.Unlock()

				ok := rf.peers[serverid].Call("Raft.InstallSnapshot", &request, &response)

				if ok {
					rf.statusAccessMutex.Lock()
					defer rf.statusAccessMutex.Unlock()
					if rf.currentTerm == curterm {
						if response.Term > rf.currentTerm {
							rf.currentTerm = response.Term
							rf.state = 0
							rf.votedFor = -1
							rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)

							/*****moyoj-3C******/
							rf.persist(rf.persister.ReadSnapshot())
							return
						}
						if rf.nextIndex[serverid] == nextIndex {
							rf.nextIndex[serverid] = curLastSnapshotIndex + 1
						}
						rf.matchIndex[serverid] = rf.nextIndex[serverid] - 1
						//更新commitindex
						rf.updateNextMatch(me)
					}
				}

			} else {
				var request AppendEntriesRequest
				var response AppendEntriesResponse
				request.Entries = []logEntry{}
				request.LeaderId = me
				request.Term = curterm
				request.LeaderCommit = leaderCommit

				/*****moyoj-3B******/
				request.PreLogIndex = -1
				request.PreLogTerm = -1

				//一次性发送没发送的日志
				request.Entries = rf.logs[nextIndex-rf.lastSnapshotIndex-1 : len(rf.logs)]
				request.PreLogIndex = nextIndex - 1
				if (nextIndex - rf.lastSnapshotIndex - 1 - 1) >= 0 {
					request.PreLogTerm = rf.logs[nextIndex-rf.lastSnapshotIndex-1-1].Term
				} else {
					request.PreLogTerm = rf.lastSnapshotTerm
				}
				rf.statusAccessMutex.Unlock()

				ok := rf.peers[serverid].Call("Raft.AppendEntries", &request, &response)

				if ok {
					rf.statusAccessMutex.Lock()
					defer rf.statusAccessMutex.Unlock()
					if rf.currentTerm == curterm {
						if !response.Success {
							if rf.currentTerm < response.Term {
								rf.currentTerm = response.Term
								rf.state = 0
								rf.votedFor = -1
								rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)

								/*****moyoj-3C******/
								rf.persist(rf.persister.ReadSnapshot())

							} else {
								/*****moyoj-3B******/
								//nextIndex后移
								rf.nextIndex[serverid] = response.FastBack
							}
						}

						if len(request.Entries) == 0 || !response.Success {
							return
						}

						//成功发送日志追加
						/*****moyoj-3B******/
						if nextIndex == rf.nextIndex[serverid] { //nextIndex没有变更,还是和当时发送消息时一样
							rf.nextIndex[serverid] = nextIndex + len(request.Entries)
						}
						rf.matchIndex[serverid] = rf.nextIndex[serverid] - 1

						//这里更新一下commitIndex
						rf.updateNextMatch(me)
					}
				}
			}
		}(i)
	}
}

/*****moyoj-3B******/
//追加日志是否匹配
func (rf *Raft) matchNewEntries(Entries []logEntry, preLogIndex int, preLogTerm int, response *AppendEntriesResponse) bool {
	if preLogIndex != -1 && len(rf.logs) <= preLogIndex-rf.lastSnapshotIndex-1 { //该节点的所有日志中没有下标为preLogIndex的项
		response.Success = false
		response.FastBack = len(rf.logs) + rf.lastSnapshotIndex + 1
		return false
	}
    //preLogIndex >= rf.lastSnapshotIndex,如果是相等,则一定能把term匹配上,则不需要让leader回退nextIndex
	if preLogIndex != -1 && preLogIndex != rf.lastSnapshotIndex && rf.logs[preLogIndex-rf.lastSnapshotIndex-1].Term != preLogTerm {
		//同下标的日志项的term与新日志的term不匹配
		response.FastBack = rf.commitIndex + 1 //让leader的nextIndex直接跳到当前节点的commitIndex+1处
		response.Success = false
		return false
	}
	response.Success = true
	rf.logs = rf.logs[0 : preLogIndex-rf.lastSnapshotIndex-1+1]
	rf.logs = append(rf.logs, Entries...)
	return true
}

/*****moyoj-3B******/
func (rf *Raft) applyEntries(sleep int) {
	for !rf.killed() {
		time.Sleep(time.Duration(sleep) * time.Millisecond)

		rf.statusAccessMutex.Lock()
		appliedIndex := rf.lastApplied
		commitIndex := rf.commitIndex
		logs := make([]logEntry, len(rf.logs))
		lastSnapshotIndex := rf.lastSnapshotIndex
		copy(logs, rf.logs)
		rf.statusAccessMutex.Unlock()

		//向上层提交的过程可以先释放锁
		for ; appliedIndex < commitIndex; appliedIndex++ {
			var sendApply ApplyMsg
			sendApply.Command = logs[appliedIndex-lastSnapshotIndex-1+1].Command
			sendApply.CommandIndex = appliedIndex + 2
			sendApply.CommandValid = true
			rf.applyChan <- sendApply
		}

		rf.statusAccessMutex.Lock()
		if rf.lastApplied < commitIndex { //有可能在apply的过程中,rf.lastApplied被installSnapshot函数改变的更大了,防止lastApplied回退
			rf.lastApplied = commitIndex
		}
		rf.statusAccessMutex.Unlock()
	}
}

/*****moyoj-3D******/
func (rf *Raft) updateNextMatch(me int) {
	for maxMatchIndex := rf.commitIndex + 1; maxMatchIndex-rf.lastSnapshotIndex-1 < len(rf.logs); maxMatchIndex++ {
		if rf.logs[maxMatchIndex-rf.lastSnapshotIndex-1].Term != rf.currentTerm { //不对旧term的日志进行提交,通过对当前term的日志提交顺便提交旧term的日志								continue
			continue
		}
		//统计该该位置日志在对端的复制情况
		count := 1
		for peersIndex := 0; peersIndex < len(rf.peers); peersIndex++ {
			if peersIndex == me {
				continue
			}
			if maxMatchIndex <= rf.matchIndex[peersIndex] {
				count++
			}
		}
		//对于该下标日志的复制情况,自身加上对端超过了一半更新commit
		if count >= len(rf.peers)/2+1 {
			rf.commitIndex = maxMatchIndex
		}
	}
}

// 心跳或者追加日志条目的处理函数,由别的节点调用后,该函数处理(会有多个该函数同时执行)
func (rf *Raft) AppendEntries(request *AppendEntriesRequest, response *AppendEntriesResponse) {

	/*****moyoj-3A******/
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if request.Term < rf.currentTerm {
		response.Success = false
		response.Term = rf.currentTerm
		return
	}

	/*****moyoj-3B******/
	response.Success = true
	if rf.matchNewEntries(request.Entries, request.PreLogIndex, request.PreLogTerm, response) {
		//只有成功匹配上leader的日志,才能更新commitIndex
		if request.LeaderCommit > rf.commitIndex {
			if len(rf.logs)-1 < request.LeaderCommit-rf.lastSnapshotIndex-1 {
				rf.commitIndex = len(rf.logs) + rf.lastSnapshotIndex + 1 - 1
			} else {
				rf.commitIndex = request.LeaderCommit
			}
		}
	}

	rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	rf.currentTerm = request.Term
	rf.state = 0
	rf.leaderId = request.LeaderId
	response.Term = request.Term

	/*****moyoj-3C******/
	rf.persist(rf.persister.ReadSnapshot())
}

// example RequestVote RPC handler.
func (rf *Raft) RequestVote(args *RequestVoteArgs, reply *RequestVoteReply) {
	// Your code here (3A, 3B).
	/*****moyoj-3A******/
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if args.Term < rf.currentTerm {
		reply.VoteGranted = false
		reply.Term = rf.currentTerm
		return
	}
	if args.Term == rf.currentTerm {
		if rf.state == 1 || rf.votedFor != -1 {
			reply.VoteGranted = false
			reply.Term = rf.currentTerm
			return
		}
	}

	/*****moyoj-3B******/
	/**主要是为了确保日志复制的安全,确保新的领导人不会用自己较为旧的日志覆盖掉已经提交的日志
	1.领导人只能对自己任期内的日志commit,不会对旧任期的日志进行提交(如果提交旧的日志就会碰到论文5.4.2的情况),新
		领导人对自己任期内新日志提交会顺带把旧的提交。这里所谓的提交其实就是领导人更新commitIndex!非常重要!!
	2.候选人日志记录必须不旧于(大于或者等于)大部分节点的日志记录,何为较新?
		最后一条日志term更大的日志记录更新 或者 最后一条日志term相等但是日志记录数目多的更新
	以上两点共同确保了安全性
	*/

	reply.VoteGranted = true
	if len(rf.logs) != 0 && rf.logs[len(rf.logs)-1].Term > args.LastLogTerm {
		reply.VoteGranted = false
	}
	//请求的日志数目较少
	if len(rf.logs) != 0 && rf.logs[len(rf.logs)-1].Term == args.LastLogTerm && args.LastLogIndex < len(rf.logs)+rf.lastSnapshotIndex+1-1 {
		reply.VoteGranted = false
	}

	/*****moyoj-3D******/
	if rf.lastSnapshotIndex != -1 && rf.lastSnapshotTerm > args.LastLogTerm {
		reply.VoteGranted = false
	}
	if rf.lastSnapshotIndex != -1 && rf.lastSnapshotTerm == args.LastLogTerm && args.LastLogIndex < rf.lastSnapshotIndex {
		reply.VoteGranted = false
	}

	if reply.VoteGranted {
		//更新leaderid和term
		rf.votedFor = args.CandidateId //投票候选人
		rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	}
	rf.currentTerm = args.Term
	rf.state = 0
	reply.Term = args.Term

	/*****moyoj-3C******/
	rf.persist(rf.persister.ReadSnapshot())
}

func (rf *Raft) InstallSnapshot(request *InstallSnapshotRequest, response *InstallSnapshotResponse) {
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if request.Term < rf.currentTerm {
		response.Term = rf.currentTerm
		return
	}
	curindex := request.LastIncludeIndex - rf.lastSnapshotIndex - 1
	if curindex < 0 {
		response.Term = rf.currentTerm
		return
	}
	if curindex < len(rf.logs) {
		if rf.logs[curindex].Term != request.Term {
			rf.logs = make([]logEntry, 0)
		} else {
			logs := rf.logs[curindex+1:]
			rf.logs = make([]logEntry, len(rf.logs)-curindex-1)
			copy(rf.logs, logs)
		}
	} else {
		rf.logs = make([]logEntry, 0)
	}

	rf.lastSnapshotIndex = request.LastIncludeIndex
	rf.lastSnapshotTerm = request.LastIncludeTerm
	rf.lastApplied = request.LastIncludeIndex
	rf.commitIndex = request.LastIncludeIndex

	rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	rf.currentTerm = request.Term
	rf.state = 0
	rf.leaderId = request.LeaderId
	response.Term = request.Term

	rf.persist(request.Data)

	go func() {
		var sendApply ApplyMsg
		sendApply.CommandValid = false
		sendApply.Snapshot = request.Data
		sendApply.SnapshotIndex = rf.lastSnapshotIndex + 1
		sendApply.SnapshotTerm = rf.lastSnapshotTerm
		sendApply.SnapshotValid = true
		rf.applyChan <- sendApply
	}()

}

func (rf *Raft) sendRequestVote(server int, args *RequestVoteArgs, reply *RequestVoteReply) bool {
	ok := rf.peers[server].Call("Raft.RequestVote", args, reply)
	return ok
}

func (rf *Raft) Start(command interface{}) (int, int, bool) {
	index := -1
	term := -1
	isLeader := true

	// Your code here (3B).
	rf.statusAccessMutex.Lock()
	defer rf.statusAccessMutex.Unlock()
	if rf.state != 1 {
		isLeader = false
	} else {
		rf.logs = append(rf.logs, logEntry{command, rf.currentTerm})
		index = len(rf.logs) + rf.lastSnapshotIndex + 1
		term = rf.currentTerm

		/*****moyoj-3C******/
		rf.persist(rf.persister.ReadSnapshot())
	}
	return index, term, isLeader
}

func (rf *Raft) Kill() {
	atomic.StoreInt32(&rf.dead, 1)
	// Your code here, if desired.
}

func (rf *Raft) killed() bool {
	z := atomic.LoadInt32(&rf.dead)
	return z == 1
}

func (rf *Raft) ticker() {
	for rf.killed() == false {
		select {
		case <-rf.electionTimer.C:
			rf.statusAccessMutex.Lock()
			if rf.state == 0 || rf.state == 2 {
				rf.state = 2
				rf.votedFor = rf.me
				rf.currentTerm += 1

				/*****moyoj-3C******/
				rf.persist(rf.persister.ReadSnapshot())

				/*****moyoj-3B******/
				lastLogIndex := -1
				lastLogTerm := -1
				if len(rf.logs) != 0 || rf.lastSnapshotIndex != -1 {
					if len(rf.logs) != 0 {
						lastLogIndex = len(rf.logs) + rf.lastSnapshotIndex + 1 - 1
						lastLogTerm = rf.logs[len(rf.logs)-1].Term
					} else {
						lastLogIndex = rf.lastSnapshotIndex
						lastLogTerm = rf.lastSnapshotTerm
					}
				}

				go rf.electStart(rf.currentTerm, lastLogIndex, lastLogTerm, rf.me, len(rf.peers))
				rf.electionTimer.Reset(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
			}
			rf.statusAccessMutex.Unlock()
		case <-rf.heartsbeatsTimer.C:
			rf.statusAccessMutex.Lock()
			if rf.state == 1 {
				go rf.sendHeartsbeats(rf.currentTerm, rf.commitIndex, rf.me, len(rf.peers))
				rf.heartsbeatsTimer.Reset(time.Duration(rf.heartsbeatsTime) * time.Millisecond)
			}
			rf.statusAccessMutex.Unlock()
		}
	}
}

func Make(peers []*labrpc.ClientEnd, me int,
	persister *Persister, applyCh chan ApplyMsg) *Raft {
	rf := &Raft{}
	rf.peers = peers
	rf.persister = persister
	rf.me = me //可以作为唯一标识符

	// Your initialization code here (3A, 3B, 3C).
	/*****moyoj-3A******/
	rf.currentTerm = 0
	rf.votedFor = -1
	rf.state = 0
	rf.logs = []logEntry{}
	rf.heartsbeatsTime = 110
	rf.electionTimeout = 1000
	rf.electionTimer = time.NewTimer(time.Duration(rf.electionTimeout+rand.Intn(rf.electionTimeout)) * time.Millisecond)
	rf.heartsbeatsTimer = time.NewTimer(time.Duration(rf.heartsbeatsTime) * time.Millisecond)

	/*****moyoj-3B******/
	rf.commitIndex = -1
	rf.lastApplied = -1
	rf.nextIndex = make([]int, len(rf.peers))
	rf.matchIndex = make([]int, len(rf.peers))
	rf.applyChan = applyCh
	go rf.applyEntries(10) //20msapply一次日志

	/*****moyoj-3D******/
	rf.lastSnapshotIndex = -1
	rf.lastSnapshotTerm = -1

	// initialize from state persisted before a crash
	rf.readPersist(persister.ReadRaftState())
	// start ticker goroutine to start elections
	go rf.ticker()

	return rf
}


其他

2025-4-16 :在做Lab4的时候发现了Lab3的代码一些没检测到的bug。
主要分为两个bug:1.日志在不同节点同步和commit的速度太慢。2.服务层丢失操作,也就是会丢失日志,没法达到线性一致性。
为了防止混乱,修改的代码放在Lab4。

资料

GO中文学习资料

MIT6.5840 LAB3主页

Raft中文版本

Raft英文原文

Raft可视化

posted @ 2025-03-27 20:51  Moyoj_USTC  阅读(192)  评论(0)    收藏  举报