private int ProcessRequestVoteResult(RequestVote rpc, Connector c) { lock (this) { if (LogSequence.TrySetTerm(rpc.Result.Term)) { // new term found ConvertStateTo(RaftState.Follower); return(Procedure.Success); } } if (rpc.Result.VoteGranted && VoteSuccess.TryAdd(c.Name, c)) { lock (this) { if ( // 确保当前状态是选举中。没有判断这个, // 后面 ConvertStateTo 也会忽略不正确的状态转换。 State == RaftState.Candidate // 加上自己就是多数派了。 && VoteSuccess.Count >= RaftConfig.HalfCount) { ConvertStateTo(RaftState.Leader); } } } return(Procedure.Success); }
public void Close() { // 0 Cancel Task. Only Leader Has Task Server.TaskOneByOne.Shutdown(); if (!IsLeader) { // 如果是 Leader,那么 Shutdown 用户请求任务队列 Server.TaskOneByOne 即可。 // 用户请求处理依赖 ImportantThreadPool。 // 如果是 Follower,那么安全关闭 ImportantThreadPool, // 但是Follower的请求是来自 Leader,需要考虑一下拒绝方式: // 目前考虑是ImportantThreadPool.Shutdown后,直接丢掉来自 Leader的请求。 // 此时认为 Follower 不再能响应了。 ImportantThreadPool.Shutdown(); } // 1. close network first. Server.Stop(); // 2. clear pending task if is leader lock (this) { // see WaitLeaderReady. // 这里只用使用状态改变,不直接想办法唤醒等待的任务, // 可以避免状态设置对不对的问题。关闭时转换成Follower也是对的。 ConvertStateTo(RaftState.Follower); // Cancel Follower TimerTask LeaderLostTimerTask?.Cancel(); LeaderLostTimerTask = null; } // 3. close LogSequence (rocksdb) LogSequence.Close(); }
private int ProcessRequestVote(Protocol p) { lock (this) { var r = p as RequestVote; if (LogSequence.TrySetTerm(r.Argument.Term)) { // new term found. ConvertStateTo(RaftState.Follower); } // else continue process r.Result.Term = LogSequence.Term; // RequestVote RPC // Receiver implementation: // 1.Reply false if term < currentTerm(§5.1) // 2.If votedFor is null or candidateId, and candidate’s log is at // least as up - to - date as receiver’s log, grant vote(§5.2, §5.4) r.Result.VoteGranted = (r.Argument.Term >= LogSequence.Term) && LogSequence.CanVoteFor(r.Argument.CandidateId) && IsLastLogUpToDate(r.Argument.LastLogTerm, r.Argument.LastLogIndex); if (r.Result.VoteGranted) { LogSequence.SetVoteFor(r.Argument.CandidateId); } logger.Debug("{0}: VoteFor={1} Rpc={2}", Name, LogSequence.VoteFor, r); r.SendResultCode(0); return(Procedure.Success); } }
public Raft(StateMachine sm, string RaftName = null, RaftConfig raftconf = null, Zeze.Config config = null, string name = "Zeze.Raft.Server") { if (null == raftconf) { raftconf = RaftConfig.Load(); } raftconf.Verify(); RaftConfig = raftconf; sm.Raft = this; StateMachine = sm; if (false == string.IsNullOrEmpty(RaftName)) { raftconf.Name = RaftName; } if (null == config) { config = Zeze.Config.Load(); } Server = new Server(this, name, config); if (Server.Config.AcceptorCount() != 0) { throw new Exception("Acceptor Found!"); } if (Server.Config.ConnectorCount() != 0) { throw new Exception("Connector Found!"); } if (RaftConfig.Nodes.Count < 3) { throw new Exception("Startup Nodes.Count Must >= 3."); } ImportantThreadPool = new SimpleThreadPool(5, $"Raft.{Name}"); Server.CreateAcceptor(Server, raftconf); Server.CreateConnector(Server, raftconf); LogSequence = new LogSequence(this); RegisterInternalRpc(); StartLeaderLostTimerTask(); LogSequence.StartSnapshotPerDayTimer(); AppDomain.CurrentDomain.ProcessExit += ProcessExit; }
private bool IsLastLogUpToDate(long lastTerm, long lastIndex) { var last = LogSequence.LastRaftLog(); if (lastTerm > last.Term) { return(true); } if (lastTerm < last.Term) { return(false); } return(lastIndex >= last.Index); }
private void SendRequestVote(SchedulerTask ThisTask) { lock (this) { VoteSuccess.Clear(); // 每次选举开始清除。 LeaderId = string.Empty; LogSequence.SetVoteFor(Name); // Vote Self First. LogSequence.TrySetTerm(LogSequence.Term + 1); WaitMajorityVoteTimoutTask?.Cancel(); WaitMajorityVoteTimoutTask = null; var arg = new RequestVoteArgument(); arg.Term = LogSequence.Term; arg.CandidateId = Name; var log = LogSequence.LastRaftLog(); arg.LastLogIndex = log.Index; arg.LastLogTerm = log.Term; Server.Config.ForEachConnector( (c) => { if (false == c.IsHandshakeDone) { return; } var rpc = new RequestVote() { Argument = arg }; rpc.Send(c.Socket, (p) => ProcessRequestVoteResult(rpc, c)); logger.Debug("{0}: SendRequestVote {1}", Name, rpc); }); // 定时,如果超时选举还未完成,再次发起选举。 WaitMajorityVoteTimoutTask = Scheduler.Instance.Schedule( (ThisTask) => { lock (this) { StartRequestVoteDelayTask = null; ConvertStateTo(RaftState.Candidate); } }, RaftConfig.AppendEntriesTimeout + 1000); } }
private int ProcessAppendEntries(Protocol p) { var r = p as AppendEntries; lock (this) { LogSequence.TrySetTerm(r.Argument.Term); // 【注意】只有Leader会发送AppendEntries,总是转到Follower,不管当前状态。 // raft.pdf 文档描述仅在 Candidate 才转。 if (State != RaftState.Follower) { ConvertStateTo(RaftState.Follower); } LeaderId = r.Argument.LeaderId; // always replace return(LogSequence.FollowerOnAppendEntries(r)); } }
private void ConvertStateFromFollwerTo(RaftState newState) { switch (newState) { case RaftState.Follower: logger.Info($"RaftState {Name}: Follower->Follower"); return; case RaftState.Candidate: logger.Info($"RaftState {Name}: Follower->Candidate"); State = RaftState.Candidate; LeaderLostTimerTask?.Cancel(); LeaderLostTimerTask = null; LogSequence.SetVoteFor(string.Empty); // 先清除,在真正自荐前可以给别人投票。 StartRequestVote(); return; case RaftState.Leader: // 并发的RequestVote的结果如果没有判断当前状态,可能会到达这里。 // 不是什么大问题。see ProcessRequestVoteResult logger.Info($"RaftState {Name} Impossible! Follower->Leader"); return; } }
private void ConvertStateFromCandidateTo(RaftState newState) { switch (newState) { case RaftState.Follower: logger.Info($"RaftState {Name}: Candidate->Follower"); State = RaftState.Follower; VoteSuccess.Clear(); // 选举结束清除。 LogSequence.SetVoteFor(string.Empty); StartRequestVoteDelayTask?.Cancel(); StartRequestVoteDelayTask = null; WaitMajorityVoteTimoutTask?.Cancel(); WaitMajorityVoteTimoutTask = null; StartLeaderLostTimerTask(); return; case RaftState.Candidate: logger.Info($"RaftState {Name}: Candidate->Candidate"); LogSequence.SetVoteFor(string.Empty); // 先清除,在真正自荐前可以给别人投票。 StartRequestVote(); return; case RaftState.Leader: StartRequestVoteDelayTask?.Cancel(); StartRequestVoteDelayTask = null; WaitMajorityVoteTimoutTask?.Cancel(); WaitMajorityVoteTimoutTask = null; VoteSuccess.Clear(); // 选举结束清除。 logger.Info($"RaftState {Name}: Candidate->Leader"); State = RaftState.Leader; LogSequence.SetVoteFor(string.Empty); LeaderId = Name; // set to self // (Reinitialized after election) var nextIndex = LogSequence.LastIndex + 1; Server.Config.ForEachConnector( (c) => { var cex = c as Server.ConnectorEx; cex.NextIndex = nextIndex; cex.MatchIndex = 0; }); // Upon election: // send initial empty AppendEntries RPCs // (heartbeat)to each server; repeat during // idle periods to prevent election timeouts(§5.2) LogSequence.AppendLog(new HeartbeatLog(HeartbeatLog.SetLeaderReadyEvent), false); HearbeatTimerTask = Scheduler.Instance.Schedule( (ThisTask) => { var elapse = Util.Time.NowUnixMillis - LogSequence.AppendLogActiveTime; if (elapse < RaftConfig.LeaderHeartbeatTimer) { LogSequence.AppendLog(new HeartbeatLog(), false); } }, 1000, 1000); return; } }
public void AppendLog(Log log, bool ApplySync = true) { LogSequence.AppendLog(log, ApplySync); }
private int ProcessInstallSnapshot(Protocol p) { var r = p as InstallSnapshot; lock (this) { if (LogSequence.TrySetTerm(r.Argument.Term)) { LeaderId = r.Argument.LeaderId; // new term found. ConvertStateTo(RaftState.Follower); } } r.Result.Term = LogSequence.Term; if (r.Argument.Term < LogSequence.Term) { // 1. Reply immediately if term < currentTerm r.SendResultCode(InstallSnapshot.ResultCodeTermError); return(Procedure.LogicError); } // 2. Create new snapshot file if first chunk(offset is 0) // 把 LastIncludedIndex 放到文件名中, // 新的InstallSnapshot不覆盖原来进行中或中断的。 var path = Path.Combine(RaftConfig.DbHome, $"{LogSequence.SnapshotFileName}.{r.Argument.LastIncludedIndex}"); FileStream outputFileStream = null; if (r.Argument.Offset == 0) { // GetOrAdd 允许重新开始。 outputFileStream = ReceiveSnapshotting.GetOrAdd( r.Argument.LastIncludedIndex, (_) => new FileStream(path, FileMode.OpenOrCreate)); outputFileStream.Seek(0, SeekOrigin.End); } else { // ignore return of TryGetValue here. ReceiveSnapshotting.TryGetValue(r.Argument.LastIncludedIndex, out outputFileStream); } if (null == outputFileStream) { // 肯定是旧的被丢弃的安装,Discard And Ignore。 r.SendResultCode(InstallSnapshot.ResultCodeOldInstall); return(Procedure.Success); } r.Result.Offset = -1; // 默认让Leader继续传输,不用重新定位。 if (r.Argument.Offset > outputFileStream.Length) { // 数据块超出当前已经接收到的数据。 // 填写当前长度,让Leader从该位置开始重新传输。 r.Result.Offset = outputFileStream.Length; r.SendResultCode(InstallSnapshot.ResultCodeNewOffset); return(Procedure.Success); } if (r.Argument.Offset == outputFileStream.Length) { // 正常的Append流程,直接写入。 // 3. Write data into snapshot file at given offset outputFileStream.Write(r.Argument.Data.Bytes, r.Argument.Data.Offset, r.Argument.Data.Count); } else { // 数据块开始位置小于当前长度。 var newEndPosition = r.Argument.Offset + r.Argument.Data.Count; if (newEndPosition > outputFileStream.Length) { // 有新的数据需要写入文件。 outputFileStream.Seek(r.Argument.Offset, SeekOrigin.Begin); outputFileStream.Write(r.Argument.Data.Bytes, r.Argument.Data.Offset, r.Argument.Data.Count); } r.Result.Offset = outputFileStream.Length; } // 4. Reply and wait for more data chunks if done is false if (r.Argument.Done) { // 5. Save snapshot file, discard any existing or partial snapshot with a smaller index outputFileStream.Close(); foreach (var e in ReceiveSnapshotting) { if (e.Key < r.Argument.LastIncludedIndex) { e.Value.Close(); var pathDelete = Path.Combine(RaftConfig.DbHome, $"{LogSequence.SnapshotFileName}.{e.Key}"); File.Delete(path); ReceiveSnapshotting.TryRemove(e.Key, out var _); } } // 剩下的处理流程在下面的函数里面。 LogSequence.EndReceiveInstallSnapshot(outputFileStream, r); } r.SendResultCode(0); return(Procedure.Success); }