public static bool CheckIfValidLeader(RachisConsensus engine, RemoteConnection connection, out LogLengthNegotiation negotiation) { negotiation = null; using (engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { var logLength = connection.Read <LogLengthNegotiation>(context); if (logLength.Term < engine.CurrentTerm) { var msg = $"The incoming term {logLength.Term} is smaller than current term {engine.CurrentTerm} and is therefor rejected (From thread: {logLength.SendingThread})"; if (engine.Log.IsInfoEnabled) { engine.Log.Info(msg); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = engine.CurrentTerm }); connection.Dispose(); return(false); } if (engine.Log.IsInfoEnabled) { engine.Log.Info($"The incoming term { logLength.Term} is from a valid leader (From thread: {logLength.SendingThread})"); } engine.FoundAboutHigherTerm(logLength.Term, "Setting the term of the new leader"); engine.Timeout.Defer(connection.Source); negotiation = logLength; } return(true); }
/// <summary> /// This method may run for a long while, as we are trying to get agreement /// from a majority of the cluster /// </summary> private void Run() { try { while (_candidate.Running && _disposed == false) { try { Stream stream; try { using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { stream = _engine.ConnectToPeer(_url, _certificate, context).Result; } if (_candidate.Running == false) { break; } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to connect with {_tag}.{Environment.NewLine} " + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to connect to remote peer: " + _url, e); } // wait a bit _candidate.WaitForChangeInState(); continue; // we'll retry connecting } Status = AmbassadorStatus.Connected; StatusMessage = $"Connected to {_tag}"; Connection = new RemoteConnection(_tag, _engine.Tag, stream); using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context)) { ClusterTopology topology; long lastLogIndex; long lastLogTerm; using (context.OpenReadTransaction()) { topology = _engine.GetTopology(context); lastLogIndex = _engine.GetLastEntryIndex(context); lastLogTerm = _engine.GetTermForKnownExisting(context, lastLogIndex); } Debug.Assert(topology.TopologyId != null); Connection.Send(context, new RachisHello { TopologyId = topology.TopologyId, DebugSourceIdentifier = _engine.Tag, DebugDestinationIdentifier = _tag, InitialMessageType = InitialMessageType.RequestVote }); while (_candidate.Running) { RequestVoteResponse rvr; var currentElectionTerm = _candidate.ElectionTerm; var engineCurrentTerm = _engine.CurrentTerm; if (_candidate.IsForcedElection == false || _candidate.RunRealElectionAtTerm != currentElectionTerm) { Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = false, IsTrialElection = true, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } TrialElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } Connection.Send(context, new RequestVote { Source = _engine.Tag, Term = currentElectionTerm, IsForcedElection = _candidate.IsForcedElection, IsTrialElection = false, LastLogIndex = lastLogIndex, LastLogTerm = lastLogTerm }); rvr = Connection.Read <RequestVoteResponse>(context); if (rvr.Term > currentElectionTerm) { var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}"); } // we need to abort the current elections _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message); _engine.FoundAboutHigherTerm(rvr.Term); throw new InvalidOperationException(message); } NotInTopology = rvr.NotInTopology; if (rvr.VoteGranted == false) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}"); } // we go a negative response here, so we can't proceed // we'll need to wait until the candidate has done something, like // change term or given up _candidate.WaitForChangeInState(); continue; } RealElectionWonAtTerm = rvr.Term; _candidate.WaitForChangeInState(); } SendElectionResult(); } } catch (OperationCanceledException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (AggregateException ae) when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException) { Status = AmbassadorStatus.Closed; StatusMessage = "Closed"; SendElectionResult(); break; } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to get vote from {_tag}.{Environment.NewLine}" + e.Message; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to get vote from remote peer url={_url} tag={_tag}", e); } Connection?.Dispose(); _candidate.WaitForChangeInState(); } } } catch (Exception e) { Status = AmbassadorStatus.FailedToConnect; StatusMessage = $"Failed to talk to {_url}.{Environment.NewLine}" + e; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info("Failed to talk to remote peer: " + _url, e); } } finally { if (_candidate.ElectionResult != ElectionResult.Won) { Connection?.Dispose(); } } }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request for term {negotiation.Term} where our term is {_engine.CurrentTerm}"); } if (negotiation.Term > _engine.CurrentTerm) { _engine.FoundAboutHigherTerm(negotiation.Term); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _engine.CurrentTerm, LastLogIndex = negotiation.PrevLogIndex }); } _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); using (context.OpenWriteTransaction()) { var lastCommitIndex = _engine.GetLastCommitIndex(context); if (snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the commited entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _engine.CurrentTerm, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); // notify the state machine _engine.SnapshotInstalled(context, snapshot.LastIncludedIndex); _engine.Timeout.Defer(_connection.Source); }