private void StartInstallSnapshot(Server.ConnectorEx connector) { if (connector.InstallSnapshotting) { return; } var path = Path.Combine(Raft.RaftConfig.DbHome, SnapshotFileName); // 如果 Snapshotting,此时不启动安装。 // 以后重试 AppendEntries 时会重新尝试 Install. if (File.Exists(path) && false == Snapshotting) { connector.InstallSnapshotting = true; InstallSnapshotting[connector.Name] = connector; Zeze.Util.Task.Run(() => InstallSnapshot(path, connector), $"InstallSnapshot To '{connector.Name}'"); } else { // 这一般的情况是snapshot文件被删除了。 // 【注意】这种情况也许报错更好? // 内部会判断,不会启动多个snapshot。 StartSnapshot(true); } }
private int ProcessAppendEntriesResult(Server.ConnectorEx connector, Protocol p) { if (false == Raft.IsLeader) { return(Procedure.Success); // maybe close } var r = p as AppendEntries; if (r.IsTimeout) { TrySendAppendEntries(connector, r); //resend return(Procedure.Success); } lock (Raft) { if (Raft.LogSequence.TrySetTerm(r.Result.Term)) { Raft.LeaderId = string.Empty; // 此时不知道谁是Leader。 // new term found. Raft.ConvertStateTo(Raft.RaftState.Follower); // 发现新的 Term,已经不是Leader,不能继续处理了。 // 直接返回。 connector.Pending = null; return(Procedure.Success); } if (Raft.State != Raft.RaftState.Leader) { connector.Pending = null; return(Procedure.Success); } } if (r.Result.Success) { lock (Raft) { TryCommit(r, connector); } // TryCommit 推进了NextIndex, // 可能日志没有复制完或者有新的AppendLog。 // 尝试继续复制日志。 // see TrySendAppendEntries 内的 // “限制一次发送的日志数量” TrySendAppendEntries(connector, r); return(Procedure.Success); } lock (Raft) { // TODO raft.pdf 提到一个优化 connector.NextIndex--; TrySendAppendEntries(connector, r); //resend. use new NextIndex。 return(Procedure.Success); } }
private void TrySendAppendEntries(Server.ConnectorEx connector, AppendEntries pending) { lock (Raft) { // 按理说,多个Follower设置一次就够了,这里就不做这个处理了。 AppendLogActiveTime = Util.Time.NowUnixMillis; if (connector.Pending != pending) { return; } // 先清除,下面中断(return)不用每次自己清除。 connector.Pending = null; if (false == connector.IsHandshakeDone) { // Hearbeat Will Retry return; } // 【注意】 // 正在安装Snapshot,此时不复制日志,肯定失败。 // 不做这个判断也是可以工作的,算是优化。 if (connector.InstallSnapshotting) { return; } if (connector.NextIndex > LastIndex) { return; } var nextLog = ReadLogReverse(connector.NextIndex); if (nextLog.Index == FirstIndex) { // 已经到了日志开头,此时不会有prev-log,无法复制日志了。 // 这一般发生在Leader进行了Snapshot,但是Follower的日志还更老。 // 新起的Follower也一样。 StartInstallSnapshot(connector); return; } // 现在Index总是递增,但没有确认步长总是为1,这样能处理不为1的情况。 connector.NextIndex = nextLog.Index; connector.Pending = new AppendEntries(); connector.Pending.Argument.Term = Term; connector.Pending.Argument.LeaderId = Raft.Name; connector.Pending.Argument.LeaderCommit = CommitIndex; // 肯定能找到的。 var prevLog = ReadLogReverse(nextLog.Index - 1); connector.Pending.Argument.PrevLogIndex = prevLog.Index; connector.Pending.Argument.PrevLogTerm = prevLog.Term; // 限制一次发送的日志数量,【注意】这个不是raft要求的。 int maxCount = Raft.RaftConfig.MaxAppendEntiresCount; RaftLog lastCopyLog = nextLog; for (var copyLog = nextLog; maxCount > 0 && null != copyLog && copyLog.Index <= LastIndex; copyLog = ReadLogStart(copyLog.Index + 1), --maxCount ) { lastCopyLog = copyLog; connector.Pending.Argument.Entries.Add(new Binary(copyLog.Encode())); } connector.Pending.Argument.LastEntryIndex = lastCopyLog.Index; if (false == connector.Pending.Send( connector.Socket, (p) => ProcessAppendEntriesResult(connector, p), Raft.RaftConfig.AppendEntriesTimeout)) { connector.Pending = null; // Hearbeat Will Retry } } }
private void InstallSnapshot(string path, Server.ConnectorEx connector) { // 整个安装成功结束时设置。中间Break(return)不设置。 // 后面 finally 里面使用这个标志 bool InstallSuccess = false; logger.Debug("{0} InstallSnapshot Start... Path={1} ToConnector={2}", Raft.Name, path, connector.Name); try { var snapshotFile = new FileStream(path, FileMode.Open); long offset = 0; var buffer = new byte[32 * 1024]; var FirstLog = ReadLog(FirstIndex); var trunkArg = new InstallSnapshotArgument(); trunkArg.Term = Term; trunkArg.LeaderId = Raft.LeaderId; trunkArg.LastIncludedIndex = FirstLog.Index; trunkArg.LastIncludedTerm = FirstLog.Term; while (!trunkArg.Done && Raft.IsLeader) { int rc = snapshotFile.Read(buffer); trunkArg.Offset = offset; trunkArg.Data = new Binary(buffer, 0, rc); trunkArg.Done = rc < buffer.Length; offset += rc; if (trunkArg.Done) { trunkArg.LastIncludedLog = new Binary(FirstLog.Encode()); } while (Raft.IsLeader) { connector.HandshakeDoneEvent.WaitOne(); var future = new TaskCompletionSource <int>(); var r = new InstallSnapshot() { Argument = trunkArg }; if (!r.Send(connector.Socket, (_) => { future.SetResult(0); return(Procedure.Success); })) { continue; } future.Task.Wait(); if (r.IsTimeout) { continue; } lock (Raft) { if (this.TrySetTerm(r.Result.Term)) { // new term found. Raft.ConvertStateTo(Raft.RaftState.Follower); return; } } switch (r.ResultCode) { case global::Zeze.Raft.InstallSnapshot.ResultCodeNewOffset: break; default: logger.Warn($"InstallSnapshot Break ResultCode={r.ResultCode}"); return; } if (r.Result.Offset >= 0) { if (r.Result.Offset > snapshotFile.Length) { logger.Error($"InstallSnapshot.Result.Offset Too Big.{r.Result.Offset}/{snapshotFile.Length}"); return; // 中断安装。 } offset = r.Result.Offset; snapshotFile.Seek(offset, SeekOrigin.Begin); } break; } } InstallSuccess = Raft.IsLeader; logger.Debug("{0} InstallSnapshot [SUCCESS] Path={1} ToConnector={2}", Raft.Name, path, connector.Name); } finally { lock (Raft) { connector.InstallSnapshotting = false; InstallSnapshotting.Remove(connector.Name); if (InstallSuccess) { // 安装完成,重新初始化,使得以后的AppendEnties能继续工作。 // = FirstIndex + 1,防止Index跳着分配,使用ReadLogStart。 var next = ReadLogStart(FirstIndex + 1); connector.NextIndex = next == null ? FirstIndex + 1 : next.Index; } } } }