예제 #1
0
        private HandleVoteResult ShouldGrantVote(TransactionOperationContext context, long lastIndex, RequestVote rv)
        {
            var result = new HandleVoteResult();
            var lastLogIndexUnderWriteLock = _engine.GetLastEntryIndex(context);
            var lastLogTermUnderWriteLock  = _engine.GetTermFor(context, lastLogIndexUnderWriteLock);

            if (lastLogIndexUnderWriteLock != lastIndex)
            {
                result.DeclineVote   = true;
                result.DeclineReason = "Log was changed";
                return(result);
            }

            if (lastLogTermUnderWriteLock > rv.LastLogTerm)
            {
                result.DeclineVote   = true;
                result.DeclineReason = $"My last log term {lastLogTermUnderWriteLock}, is higher than yours {rv.LastLogTerm}.";
                return(result);
            }

            if (lastLogIndexUnderWriteLock > rv.LastLogIndex)
            {
                result.DeclineVote   = true;
                result.DeclineReason = $"Vote declined because my last log index {lastLogIndexUnderWriteLock} is more up to date than yours {rv.LastLogIndex}";
                return(result);
            }

            var(whoGotMyVoteIn, votedTerm) = _engine.GetWhoGotMyVoteIn(context, rv.Term);
            result.VotedTerm = votedTerm;

            if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source)
            {
                result.DeclineVote   = true;
                result.DeclineReason = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}";
                return(result);
            }

            if (votedTerm >= rv.Term)
            {
                result.DeclineVote   = true;
                result.DeclineReason = $"Already voted in {rv.LastLogTerm}, for another node in higher term: {votedTerm}";
                return(result);
            }

            return(result);
        }
예제 #2
0
        private (bool HasRemovedFromTopology, long LastAcknowledgedIndex, long LastTruncate, long LastCommit)  ApplyLeaderStateToLocalState(Stopwatch sp, ClusterOperationContext context, List <RachisEntry> entries, AppendEntries appendEntries)
        {
            long lastTruncate;
            long lastCommit;

            bool removedFromTopology = false;

            // we start the tx after we finished reading from the network
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Ready to start tx in {sp.Elapsed}");
            }

            using (var tx = context.OpenWriteTransaction())
            {
                _engine.ValidateTerm(_term);

                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Tx running in {sp.Elapsed}");
                }

                if (entries.Count > 0)
                {
                    var(lastTopology, lastTopologyIndex) = _engine.AppendToLog(context, entries);
                    using (lastTopology)
                    {
                        if (lastTopology != null)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Topology changed to {lastTopology}");
                            }

                            var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                            if (topology.Members.ContainsKey(_engine.Tag) ||
                                topology.Promotables.ContainsKey(_engine.Tag) ||
                                topology.Watchers.ContainsKey(_engine.Tag))
                            {
                                RachisConsensus.SetTopology(_engine, context, topology);
                            }
                            else
                            {
                                removedFromTopology = true;
                                _engine.ClearAppendedEntriesAfter(context, lastTopologyIndex);
                            }
                        }
                    }
                }

                var lastEntryIndexToCommit = Math.Min(
                    _engine.GetLastEntryIndex(context),
                    appendEntries.LeaderCommit);

                var lastAppliedIndex = _engine.GetLastCommitIndex(context);
                var lastAppliedTerm  = _engine.GetTermFor(context, lastEntryIndexToCommit);

                // we start to commit only after we have any log with a term of the current leader
                if (lastEntryIndexToCommit > lastAppliedIndex && lastAppliedTerm == appendEntries.Term)
                {
                    lastAppliedIndex = _engine.Apply(context, lastEntryIndexToCommit, null, sp);
                }

                lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastAppliedIndex);
                _engine.TruncateLogBefore(context, lastTruncate);

                lastCommit = lastAppliedIndex;
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Ready to commit in {sp.Elapsed}");
                }

                tx.Commit();
            }

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
            }

            var lastAcknowledgedIndex = entries.Count == 0 ? appendEntries.PrevLogIndex : entries[entries.Count - 1].Index;

            return(HasRemovedFromTopology : removedFromTopology, LastAcknowledgedIndex : lastAcknowledgedIndex, LastTruncate : lastTruncate, LastCommit : lastCommit);
        }
예제 #3
0
        private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation)
        {
            _debugRecorder.Start();
            // only the leader can send append entries, so if we accepted it, it's the leader
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}");
            }
            if (negotiation.Term != _term)
            {
                //  Our leader is no longer a valid one
                var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader";
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Rejected,
                    Message      = msg,
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
                throw new InvalidOperationException($"We close this follower because: {msg}");
            }
            long prevTerm;

            using (context.OpenReadTransaction())
            {
                prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0;
            }
            if (prevTerm != negotiation.PrevLogTerm)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" +
                                     " will negotiate to find next matched index");
                }
                // we now have a mismatch with the log position, and need to negotiate it with
                // the leader
                NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation);
            }
            else
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state");
                }
                // this (or the negotiation above) completes the negotiation process
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}",
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
            }
            _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot");
            _engine.Timeout.Defer(_connection.Source);

            // at this point, the leader will send us a snapshot message
            // in most cases, it is an empty snapshot, then start regular append entries
            // the reason we send this is to simplify the # of states in the protocol

            var snapshot = _connection.ReadInstallSnapshot(context);

            _debugRecorder.Record("Snapshot received");
            using (context.OpenWriteTransaction())
            {
                var lastTerm        = _engine.GetTermFor(context, snapshot.LastIncludedIndex);
                var lastCommitIndex = _engine.GetLastEntryIndex(context);
                if (snapshot.LastIncludedTerm == lastTerm && snapshot.LastIncludedIndex < lastCommitIndex)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it");
                    }

                    //This is okay to ignore because we will just get the committed entries again and skip them
                    ReadInstallSnapshotAndIgnoreContent(context);
                }
                else if (InstallSnapshot(context))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"{ToString()}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} ");
                    }

                    _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                    _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                }
                else
                {
                    var lastEntryIndex = _engine.GetLastEntryIndex(context);
                    if (lastEntryIndex < snapshot.LastIncludedIndex)
                    {
                        var message =
                            $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}";
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: {message}");
                        }
                        throw new InvalidOperationException(message);
                    }
                }

                // snapshot always has the latest topology
                if (snapshot.Topology == null)
                {
                    const string message = "Expected to get topology on snapshot";
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: {message}");
                    }
                    throw new InvalidOperationException(message);
                }
                using (var topologyJson = context.ReadObject(snapshot.Topology, "topology"))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: topology on install snapshot: {topologyJson}");
                    }

                    var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson);

                    RachisConsensus.SetTopology(_engine, context, topology);
                }

                context.Transaction.Commit();
            }

            _engine.Timeout.Defer(_connection.Source);
            _debugRecorder.Record("Invoking StateMachine.SnapshotInstalled");

            // notify the state machine, we do this in an async manner, and start
            // the operator in a separate thread to avoid timeouts while this is
            // going on

            var task = Task.Run(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex));

            var sp = Stopwatch.StartNew();

            var timeToWait = (int)(_engine.ElectionTimeout.TotalMilliseconds / 4);

            while (task.Wait(timeToWait) == false)
            {
                // this may take a while, so we let the other side know that
                // we are still processing, and we reset our own timer while
                // this is happening
                MaybeNotifyLeaderThatWeAreStillAlive(context, sp);
            }

            _debugRecorder.Record("Done with StateMachine.SnapshotInstalled");

            // we might have moved from passive node, so we need to start the timeout clock
            _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout);

            _debugRecorder.Record("Snapshot installed");
            //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater
            //So we could validate that the entries sent by the leader are indeed the same as the ones we have.
            _connection.Send(context, new InstallSnapshotResponse
            {
                Done         = true,
                CurrentTerm  = _term,
                LastLogIndex = snapshot.LastIncludedIndex
            });

            _engine.Timeout.Defer(_connection.Source);
        }
예제 #4
0
        private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation)
        {
            _debugRecorder.Start();
            // only the leader can send append entries, so if we accepted it, it's the leader
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Got a negotiation request for term {negotiation.Term} where our term is {_term}.");
            }

            if (negotiation.Term != _term)
            {
                //  Our leader is no longer a valid one
                var msg = $"The term was changed after leader validation from {_term} to {negotiation.Term}, so you are no longer a valid leader";
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Rejected,
                    Message      = msg,
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
                throw new InvalidOperationException($"We close this follower because: {msg}");
            }

            long prevTerm;
            bool requestSnapshot;

            using (context.OpenReadTransaction())
            {
                prevTerm        = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0;
                requestSnapshot = _engine.GetSnapshotRequest(context);
            }

            if (requestSnapshot)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Request snapshot by the admin");
                }
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = "Request snapshot by the admin",
                    CurrentTerm  = _term,
                    LastLogIndex = 0
                });
            }
            else if (prevTerm != negotiation.PrevLogTerm)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" +
                                     " will negotiate to find next matched index");
                }
                // we now have a mismatch with the log position, and need to negotiate it with
                // the leader
                NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation);
            }
            else
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Got a negotiation request with identical PrevLogTerm will continue to steady state");
                }
                // this (or the negotiation above) completes the negotiation process
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}",
                    CurrentTerm  = _term,
                    LastLogIndex = negotiation.PrevLogIndex
                });
            }
            _debugRecorder.Record("Matching Negotiation is over, waiting for snapshot");
            _engine.Timeout.Defer(_connection.Source);

            // at this point, the leader will send us a snapshot message
            // in most cases, it is an empty snapshot, then start regular append entries
            // the reason we send this is to simplify the # of states in the protocol

            var snapshot = _connection.ReadInstallSnapshot(context);

            _debugRecorder.Record("Start receiving snapshot");

            // reading the snapshot from network and committing it to the disk might take a long time.
            using (var cts = new CancellationTokenSource())
            {
                KeepAliveAndExecuteAction(() => ReadAndCommitSnapshot(context, snapshot, cts.Token), cts, "ReadAndCommitSnapshot");
            }

            _debugRecorder.Record("Snapshot was received and committed");

            // notify the state machine, we do this in an async manner, and start
            // the operator in a separate thread to avoid timeouts while this is
            // going on
            using (var cts = new CancellationTokenSource())
            {
                KeepAliveAndExecuteAction(() => _engine.SnapshotInstalledAsync(snapshot.LastIncludedIndex, cts.Token), cts, "SnapshotInstalledAsync");
            }

            _debugRecorder.Record("Done with StateMachine.SnapshotInstalled");
            _debugRecorder.Record("Snapshot installed");

            //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater
            //So we could validate that the entries sent by the leader are indeed the same as the ones we have.
            _connection.Send(context, new InstallSnapshotResponse
            {
                Done         = true,
                CurrentTerm  = _term,
                LastLogIndex = snapshot.LastIncludedIndex
            });

            _engine.Timeout.Defer(_connection.Source);
        }
예제 #5
0
        private void NegotiateWithLeader(TransactionOperationContext context, LogLengthNegotiation negotiation)
        {
            // only the leader can send append entries, so if we accepted it, it's the leader
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request for term {negotiation.Term} where our term is {_engine.CurrentTerm}");
            }
            if (negotiation.Term > _engine.CurrentTerm)
            {
                _engine.FoundAboutHigherTerm(negotiation.Term);
            }

            long prevTerm;

            using (context.OpenReadTransaction())
            {
                prevTerm = _engine.GetTermFor(context, negotiation.PrevLogIndex) ?? 0;
            }
            if (prevTerm != negotiation.PrevLogTerm)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with PrevLogTerm={negotiation.PrevLogTerm} while our PrevLogTerm={prevTerm}" +
                                     " will negotiate to find next matched index");
                }
                // we now have a mismatch with the log position, and need to negotiate it with
                // the leader
                NegotiateMatchEntryWithLeaderAndApplyEntries(context, _connection, negotiation);
            }
            else
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"Follower {_engine.Tag}: Got a negotiation request with identical PrevLogTerm will continue to steady state");
                }
                // this (or the negotiation above) completes the negotiation process
                _connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                    Message      = $"Found a log index / term match at {negotiation.PrevLogIndex} with term {prevTerm}",
                    CurrentTerm  = _engine.CurrentTerm,
                    LastLogIndex = negotiation.PrevLogIndex
                });
            }

            _engine.Timeout.Defer(_connection.Source);

            // at this point, the leader will send us a snapshot message
            // in most cases, it is an empty snapshot, then start regular append entries
            // the reason we send this is to simplify the # of states in the protocol

            var snapshot = _connection.ReadInstallSnapshot(context);

            using (context.OpenWriteTransaction())
            {
                var lastCommitIndex = _engine.GetLastCommitIndex(context);
                if (snapshot.LastIncludedIndex < lastCommitIndex)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"Follower {_engine.Tag}: Got installed snapshot with last index={snapshot.LastIncludedIndex} while our lastCommitIndex={lastCommitIndex}, will just ignore it");
                    }
                    //This is okay to ignore because we will just get the commited entries again and skip them
                    ReadInstallSnapshotAndIgnoreContent(context);
                }
                else if (InstallSnapshot(context))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info(
                            $"Follower {_engine.Tag}: Installed snapshot with last index={snapshot.LastIncludedIndex} with LastIncludedTerm={snapshot.LastIncludedTerm} ");
                    }
                    _engine.SetLastCommitIndex(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                    _engine.ClearLogEntriesAndSetLastTruncate(context, snapshot.LastIncludedIndex, snapshot.LastIncludedTerm);
                }
                else
                {
                    var lastEntryIndex = _engine.GetLastEntryIndex(context);
                    if (lastEntryIndex < snapshot.LastIncludedIndex)
                    {
                        var message =
                            $"The snapshot installation had failed because the last included index {snapshot.LastIncludedIndex} in term {snapshot.LastIncludedTerm} doesn't match the last entry {lastEntryIndex}";
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: {message}");
                        }
                        throw new InvalidOperationException(message);
                    }
                }

                // snapshot always has the latest topology
                if (snapshot.Topology == null)
                {
                    const string message = "Expected to get topology on snapshot";
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Follower {_engine.Tag}: {message}");
                    }
                    throw new InvalidOperationException(message);
                }
                using (var topologyJson = context.ReadObject(snapshot.Topology, "topology"))
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Follower {_engine.Tag}: topology on install snapshot: {topologyJson}");
                    }

                    var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(topologyJson);

                    RachisConsensus.SetTopology(_engine, context, topology);
                }

                context.Transaction.Commit();
            }
            //Here we send the LastIncludedIndex as our matched index even for the case where our lastCommitIndex is greater
            //So we could validate that the entries sent by the leader are indeed the same as the ones we have.
            _connection.Send(context, new InstallSnapshotResponse
            {
                Done         = true,
                CurrentTerm  = _engine.CurrentTerm,
                LastLogIndex = snapshot.LastIncludedIndex
            });

            _engine.Timeout.Defer(_connection.Source);

            // notify the state machine
            _engine.SnapshotInstalled(context, snapshot.LastIncludedIndex);

            _engine.Timeout.Defer(_connection.Source);
        }