public void Send(JsonOperationContext context, LogLengthNegotiation lln) { if (_log.IsInfoEnabled) { _log.Info($"Log length negotiation request with ({lln.PrevLogIndex} / {lln.PrevLogTerm}), term: {lln.Term}, Truncated: {lln.Truncated}"); } Send(context, new DynamicJsonValue { ["Type"] = nameof(LogLengthNegotiation), [nameof(LogLengthNegotiation.Term)] = lln.Term, [nameof(LogLengthNegotiation.PrevLogIndex)] = lln.PrevLogIndex, [nameof(LogLengthNegotiation.PrevLogTerm)] = lln.PrevLogTerm, [nameof(LogLengthNegotiation.Truncated)] = lln.Truncated, [nameof(LogLengthNegotiation.SendingThread)] = Thread.CurrentThread.ManagedThreadId }); }
public void AcceptConnection(LogLengthNegotiation negotiation) { _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); // if leader / candidate, this remove them from play and revert to follower mode _engine.SetNewState(RachisState.Follower, this, _term, $"Accepted a new connection from {_connection.Source} in term {negotiation.Term}"); _engine.LeaderTag = _connection.Source; _debugRecorder.Record("Follower connection accepted"); _folloowerLongRunningWork = PoolOfThreads.GlobalRavenThreadPool.LongRunning( action: x => Run(x), state: negotiation, name: $"Follower thread from {_connection} in term {negotiation.Term}"); }
private bool CanHandleLogDivergence(ClusterOperationContext context, LogLengthNegotiation negotiation, ref long midpointIndex, ref long midpointTerm, ref long minIndex, ref long maxIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Our appended entries has been diverged, same index with different terms. " + $"My index/term {midpointIndex}/{midpointTerm}, while yours is {negotiation.PrevLogIndex}/{negotiation.PrevLogTerm}."); } using (context.OpenReadTransaction()) { do { // try to find any log in the previous term midpointIndex--; midpointTerm = _engine.GetTermFor(context, midpointIndex) ?? 0; } while (midpointTerm >= negotiation.PrevLogTerm && midpointIndex > 0); if (midpointTerm == 0 || midpointIndex == 0) { return(false); } // start the binary search again with those boundaries minIndex = Math.Min(midpointIndex, _engine.GetFirstEntryIndex(context)); maxIndex = midpointIndex; midpointIndex = (minIndex + maxIndex) / 2; midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); if (maxIndex < minIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got minIndex: {minIndex} bigger than maxIndex: {maxIndex} will request the entire snapshot. " + $"midpointIndex: {midpointIndex}, midpointTerm: {midpointTerm}."); } Debug.Assert(false, "This is a safeguard against any potential bug here, so in worst case we request the entire snapshot"); return(false); } } return(true); }
private bool NegotiateMatchEntryWithLeaderAndApplyEntries(ClusterOperationContext context, RemoteConnection connection, LogLengthNegotiation negotiation) { long minIndex; long maxIndex; long midpointTerm; long midpointIndex; using (context.OpenReadTransaction()) { minIndex = _engine.GetFirstEntryIndex(context); if (minIndex == 0) // no entries at all { RequestAllEntries(context, connection, "No entries at all here, give me everything from the start"); return(true); // leader will know where to start from here } maxIndex = Math.Min( _engine.GetLastEntryIndex(context), // max negotiation.PrevLogIndex ); midpointIndex = (maxIndex + minIndex) / 2; midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } while ((midpointTerm == negotiation.PrevLogTerm && midpointIndex == negotiation.PrevLogIndex) == false) { _engine.Timeout.Defer(_connection.Source); _engine.ValidateTerm(_term); if (midpointIndex == negotiation.PrevLogIndex && midpointTerm != negotiation.PrevLogTerm) { // our appended entries has been diverged, same index with different terms. if (CanHandleLogDivergence(context, negotiation, ref midpointIndex, ref midpointTerm, ref minIndex, ref maxIndex) == false) { RequestAllEntries(context, connection, "all my entries are invalid, will require snapshot."); return(true); } } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Negotiation, Message = $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}", CurrentTerm = _term, MaxIndex = maxIndex, MinIndex = minIndex, MidpointIndex = midpointIndex, MidpointTerm = midpointTerm }); negotiation = connection.Read <LogLengthNegotiation>(context); _engine.Timeout.Defer(_connection.Source); if (negotiation.Truncated) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries"); } RequestAllEntries(context, connection, "We have entries that are already truncated at the leader, will ask for full snapshot"); return(true); } using (context.OpenReadTransaction()) { if (_engine.GetTermFor(context, negotiation.PrevLogIndex) == negotiation.PrevLogTerm) { minIndex = Math.Min(midpointIndex + 1, maxIndex); } else { maxIndex = Math.Max(midpointIndex - 1, minIndex); } } midpointIndex = (maxIndex + minIndex) / 2; using (context.OpenReadTransaction()) midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {midpointTerm}"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}", CurrentTerm = _term, LastLogIndex = midpointIndex, }); return(false); }
private void NegotiateWithLeader(ClusterOperationContext context, LogLengthNegotiation negotiation) { _debugRecorder.Start(); // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}."); } if (negotiation.Term != _term) { // Our leader is no longer a valid one var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader"; _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); throw new InvalidOperationException($"We close this follower because: {msg}"); } long prevTerm; bool requestSnapshot; bool requestFullSnapshot = false; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; requestSnapshot = _engine.GetSnapshotRequest(context); } if (requestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Request snapshot by the admin"); } _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "Request snapshot by the admin", CurrentTerm = _term, LastLogIndex = 0 }); requestFullSnapshot = true; } else if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader requestFullSnapshot = NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); } _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot"); _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); _debugRecorder.Record("Start receiving snapshot"); // reading the snapshot from network and committing it to the disk might take a long time. using (var cts = new CancellationTokenSource()) { KeepAliveAndExecuteAction(() => ReadAndCommitSnapshot(context, snapshot, cts.Token), cts, "ReadAndCommitSnapshot"); } _debugRecorder.Record("Snapshot was received and committed"); // notify the state machine, we do this in an async manner, and start // the operator in a separate thread to avoid timeouts while this is // going on using (var cts = new CancellationTokenSource()) { KeepAliveAndExecuteAction(() => { _engine.SnapshotInstalled(snapshot.LastIncludedIndex, requestFullSnapshot, cts.Token); }, cts, "SnapshotInstalledAsync"); } _debugRecorder.Record("Done with StateMachine.SnapshotInstalled"); _debugRecorder.Record("Snapshot installed"); //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _term, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); }
public static bool CheckIfValidLeader(RachisConsensus engine, RemoteConnection connection, out LogLengthNegotiation negotiation) { negotiation = null; using (engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context)) { var logLength = connection.Read <LogLengthNegotiation>(context); if (logLength.Term < engine.CurrentTerm) { var msg = $"The incoming term {logLength.Term} is smaller than current term {engine.CurrentTerm} and is therefor rejected (From thread: {logLength.SendingThread})"; if (engine.Log.IsInfoEnabled) { engine.Log.Info(msg); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = engine.CurrentTerm }); connection.Dispose(); return(false); } if (engine.Log.IsInfoEnabled) { engine.Log.Info($"The incoming term { logLength.Term} is from a valid leader (From thread: {logLength.SendingThread})"); } engine.FoundAboutHigherTerm(logLength.Term, "Setting the term of the new leader"); engine.Timeout.Defer(connection.Source); negotiation = logLength; } return(true); }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { _debugRecorder.Start(); // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}"); } if (negotiation.Term != _term) { // Our leader is no longer a valid one var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader"; _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); throw new InvalidOperationException($"We close this follower because: {msg}"); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); } _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot"); _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); _debugRecorder.Record("Snapshot received"); using (context.OpenWriteTransaction()) { var lastTerm = _engine.GetTermFor(context, snapshot.LastIncludedIndex); var lastCommitIndex = _engine.GetLastEntryIndex(context); if (snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the committed entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } _engine.Timeout.Defer(_connection.Source); _debugRecorder.Record("Invoking StateMachine.SnapshotInstalled"); // notify the state machine, we do this in an async manner, and start // the operator in a separate thread to avoid timeouts while this is // going on var task = Task.Run(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex)); var sp = Stopwatch.StartNew(); var timeToWait = (int)(_engine.ElectionTimeout.TotalMilliseconds / 4); while (task.Wait(timeToWait) == false) { // this may take a while, so we let the other side know that // we are still processing, and we reset our own timer while // this is happening MaybeNotifyLeaderThatWeAreStillAlive(context, sp); } _debugRecorder.Record("Done with StateMachine.SnapshotInstalled"); // we might have moved from passive node, so we need to start the timeout clock _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); _debugRecorder.Record("Snapshot installed"); //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _term, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); }
private void NegotiateMatchEntryWithLeaderAndApplyEntries(TransactionOperationContext context, RemoteConnection connection, LogLengthNegotiation negotiation) { long minIndex; long maxIndex; long midpointTerm; long midpointIndex; using (context.OpenReadTransaction()) { minIndex = _engine.GetFirstEntryIndex(context); if (minIndex == 0) // no entries at all { connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "No entries at all here, give me everything from the start", CurrentTerm = _term, LastLogIndex = 0 }); return; // leader will know where to start from here } maxIndex = Math.Min( _engine.GetLastEntryIndex(context), // max negotiation.PrevLogIndex ); midpointIndex = (maxIndex + minIndex) / 2; midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } while (minIndex < maxIndex) { _engine.Timeout.Defer(_connection.Source); _engine.ValidateTerm(_term); connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Negotiation, Message = $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}", CurrentTerm = _term, MaxIndex = maxIndex, MinIndex = minIndex, MidpointIndex = midpointIndex, MidpointTerm = midpointTerm }); var response = connection.Read <LogLengthNegotiation>(context); _engine.Timeout.Defer(_connection.Source); if (response.Truncated) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "We have entries that are already truncated at the leader, will ask for full snapshot", CurrentTerm = _term, LastLogIndex = 0 }); return; } using (context.OpenReadTransaction()) { if (_engine.GetTermFor(context, response.PrevLogIndex) == response.PrevLogTerm) { minIndex = Math.Min(midpointIndex + 1, maxIndex); } else { maxIndex = Math.Max(midpointIndex - 1, minIndex); } } midpointIndex = (maxIndex + minIndex) / 2; using (context.OpenReadTransaction()) midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex); } if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {_term}"); } connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}", CurrentTerm = _term, LastLogIndex = midpointIndex }); }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request for term {negotiation.Term} where our term is {_engine.CurrentTerm}"); } if (negotiation.Term > _engine.CurrentTerm) { _engine.FoundAboutHigherTerm(negotiation.Term); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _engine.CurrentTerm, LastLogIndex = negotiation.PrevLogIndex }); } _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); using (context.OpenWriteTransaction()) { var lastCommitIndex = _engine.GetLastCommitIndex(context); if (snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the commited entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"Follower {_engine.Tag}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"Follower {_engine.Tag}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _engine.CurrentTerm, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); // notify the state machine _engine.SnapshotInstalled(context, snapshot.LastIncludedIndex); _engine.Timeout.Defer(_connection.Source); }