private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { _debugRecorder.Start(); // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}"); } if (negotiation.Term != _term) { // Our leader is no longer a valid one var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader"; _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); throw new InvalidOperationException($"We close this follower because: {msg}"); } long prevTerm; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; } if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); } _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot"); _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); _debugRecorder.Record("Snapshot received"); using (context.OpenWriteTransaction()) { var lastTerm = _engine.GetTermFor(context, snapshot.LastIncludedIndex); var lastCommitIndex = _engine.GetLastEntryIndex(context); if (snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it"); } //This is okay to ignore because we will just get the committed entries again and skip them ReadInstallSnapshotAndIgnoreContent(context); } else if (InstallSnapshot(context)) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info( $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} "); } _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm); } else { var lastEntryIndex = _engine.GetLastEntryIndex(context); if (lastEntryIndex < snapshot.LastIncludedIndex) { var message = $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } } // snapshot always has the latest topology if (snapshot.Topology == null) { const string message = "Expected to get topology on snapshot"; if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: {message}"); } throw new InvalidOperationException(message); } using (var topologyJson = context.ReadObject(snapshot.Topology, "topology")) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}"); } var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson); RachisConsensus.SetTopology(_engine, context, topology); } context.Transaction.Commit(); } _engine.Timeout.Defer(_connection.Source); _debugRecorder.Record("Invoking StateMachine.SnapshotInstalled"); // notify the state machine, we do this in an async manner, and start // the operator in a separate thread to avoid timeouts while this is // going on var task = Task.Run(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex)); var sp = Stopwatch.StartNew(); var timeToWait = (int)(_engine.ElectionTimeout.TotalMilliseconds / 4); while (task.Wait(timeToWait) == false) { // this may take a while, so we let the other side know that // we are still processing, and we reset our own timer while // this is happening MaybeNotifyLeaderThatWeAreStillAlive(context, sp); } _debugRecorder.Record("Done with StateMachine.SnapshotInstalled"); // we might have moved from passive node, so we need to start the timeout clock _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout); _debugRecorder.Record("Snapshot installed"); //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _term, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); }
private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation) { _debugRecorder.Start(); // only the leader can send append entries, so if we accepted it, it's the leader if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}."); } if (negotiation.Term != _term) { // Our leader is no longer a valid one var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader"; _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Rejected, Message = msg, CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); throw new InvalidOperationException($"We close this follower because: {msg}"); } long prevTerm; bool requestSnapshot; using (context.OpenReadTransaction()) { prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0; requestSnapshot = _engine.GetSnapshotRequest(context); } if (requestSnapshot) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Request snapshot by the admin"); } _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = "Request snapshot by the admin", CurrentTerm = _term, LastLogIndex = 0 }); } else if (prevTerm != negotiation.PrevLogTerm) { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" + " will negotiate to find next matched index"); } // we now have a mismatch with the log position, and need to negotiate it with // the leader NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation); } else { if (_engine.Log.IsInfoEnabled) { _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state"); } // this (or the negotiation above) completes the negotiation process _connection.Send(context, new LogLengthNegotiationResponse { Status = LogLengthNegotiationResponse.ResponseStatus.Acceptable, Message = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}", CurrentTerm = _term, LastLogIndex = negotiation.PrevLogIndex }); } _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot"); _engine.Timeout.Defer(_connection.Source); // at this point, the leader will send us a snapshot message // in most cases, it is an empty snapshot, then start regular append entries // the reason we send this is to simplify the # of states in the protocol var snapshot = _connection.ReadInstallSnapshot(context); _debugRecorder.Record("Start receiving snapshot"); // reading the snapshot from network and committing it to the disk might take a long time. using (var cts = new CancellationTokenSource()) { KeepAliveAndExecuteAction(() => ReadAndCommitSnapshot(context, snapshot, cts.Token), cts, "ReadAndCommitSnapshot"); } _debugRecorder.Record("Snapshot was received and committed"); // notify the state machine, we do this in an async manner, and start // the operator in a separate thread to avoid timeouts while this is // going on using (var cts = new CancellationTokenSource()) { KeepAliveAndExecuteAction(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex, cts.Token), cts, "SnapshotInstalledAsync"); } _debugRecorder.Record("Done with StateMachine.SnapshotInstalled"); _debugRecorder.Record("Snapshot installed"); //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater //So we could validate that the entries sent by the leader are indeed the same as the ones we have. _connection.Send(context, new InstallSnapshotResponse { Done = true, CurrentTerm = _term, LastLogIndex = snapshot.LastIncludedIndex }); _engine.Timeout.Defer(_connection.Source); }