Пример #1
0
        public static bool CheckIfValidLeader(RachisConsensus engine, RemoteConnection connection, out LogLengthNegotiation negotiation)
        {
            negotiation = null;
            using (engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context))
            {
                var logLength = connection.Read <LogLengthNegotiation>(context);

                if (logLength.Term < engine.CurrentTerm)
                {
                    var msg = $"The incoming term {logLength.Term} is smaller than current term {engine.CurrentTerm} and is therefor rejected (From thread: {logLength.SendingThread})";
                    if (engine.Log.IsInfoEnabled)
                    {
                        engine.Log.Info(msg);
                    }
                    connection.Send(context, new LogLengthNegotiationResponse
                    {
                        Status      = LogLengthNegotiationResponse.ResponseStatus.Rejected,
                        Message     = msg,
                        CurrentTerm = engine.CurrentTerm
                    });
                    connection.Dispose();
                    return(false);
                }
                if (engine.Log.IsInfoEnabled)
                {
                    engine.Log.Info($"The incoming term { logLength.Term} is from a valid leader (From thread: {logLength.SendingThread})");
                }
                engine.FoundAboutHigherTerm(logLength.Term, "Setting the term of the new leader");
                engine.Timeout.Defer(connection.Source);
                negotiation = logLength;
            }
            return(true);
        }
Пример #2
0
            public void ReadExactly(int size)
            {
                if (_buffer.Length < size)
                {
                    _buffer = new byte[Bits.NextPowerOf2(size)];
                }
                var remaining = 0;

                while (remaining < size)
                {
                    var read = _parent.Read(_buffer, remaining, size - remaining);
                    remaining += read;
                }
            }
Пример #3
0
            public void ReadExactly(int size)
            {
                if (_buffer.Length < size)
                {
                    _buffer = new byte[Bits.NextPowerOf2(size)];
                }
                var remaining = 0;

                while (remaining < size)
                {
                    var read = _parent.Read(_buffer, remaining, size - remaining);
                    if (read == 0)
                    {
                        throw new EndOfStreamException();
                    }
                    remaining += read;
                }
            }
Пример #4
0
        /// <summary>
        /// This method may run for a long while, as we are trying to get agreement
        /// from a majority of the cluster
        /// </summary>
        private void Run()
        {
            try
            {
                while (_candidate.Running && _disposed == false)
                {
                    try
                    {
                        Stream stream;
                        try
                        {
                            using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                            {
                                stream = _engine.ConnectToPeer(_url, _certificate, context).Result;
                            }

                            if (_candidate.Running == false)
                            {
                                break;
                            }
                        }
                        catch (Exception e)
                        {
                            Status        = AmbassadorStatus.FailedToConnect;
                            StatusMessage = $"Failed to connect with {_tag}.{Environment.NewLine} " + e.Message;
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to connect to remote peer: " + _url, e);
                            }
                            // wait a bit
                            _candidate.WaitForChangeInState();
                            continue; // we'll retry connecting
                        }
                        Status        = AmbassadorStatus.Connected;
                        StatusMessage = $"Connected to {_tag}";

                        Connection = new RemoteConnection(_tag, _engine.Tag, stream);
                        using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                        {
                            ClusterTopology topology;
                            long            lastLogIndex;
                            long            lastLogTerm;
                            using (context.OpenReadTransaction())
                            {
                                topology     = _engine.GetTopology(context);
                                lastLogIndex = _engine.GetLastEntryIndex(context);
                                lastLogTerm  = _engine.GetTermForKnownExisting(context, lastLogIndex);
                            }
                            Debug.Assert(topology.TopologyId != null);
                            Connection.Send(context, new RachisHello
                            {
                                TopologyId                 = topology.TopologyId,
                                DebugSourceIdentifier      = _engine.Tag,
                                DebugDestinationIdentifier = _tag,
                                InitialMessageType         = InitialMessageType.RequestVote
                            });

                            while (_candidate.Running)
                            {
                                RequestVoteResponse rvr;
                                var currentElectionTerm = _candidate.ElectionTerm;
                                var engineCurrentTerm   = _engine.CurrentTerm;
                                if (_candidate.IsForcedElection == false ||
                                    _candidate.RunRealElectionAtTerm != currentElectionTerm)
                                {
                                    Connection.Send(context, new RequestVote
                                    {
                                        Source           = _engine.Tag,
                                        Term             = currentElectionTerm,
                                        IsForcedElection = false,
                                        IsTrialElection  = true,
                                        LastLogIndex     = lastLogIndex,
                                        LastLogTerm      = lastLogTerm
                                    });

                                    rvr = Connection.Read <RequestVoteResponse>(context);

                                    if (rvr.Term > currentElectionTerm)
                                    {
                                        var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}";
                                        // we need to abort the current elections
                                        _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message);
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}");
                                        }
                                        _engine.FoundAboutHigherTerm(rvr.Term);
                                        throw new InvalidOperationException(message);
                                    }
                                    NotInTopology = rvr.NotInTopology;
                                    if (rvr.VoteGranted == false)
                                    {
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}");
                                        }
                                        // we go a negative response here, so we can't proceed
                                        // we'll need to wait until the candidate has done something, like
                                        // change term or given up
                                        _candidate.WaitForChangeInState();
                                        continue;
                                    }
                                    TrialElectionWonAtTerm = rvr.Term;

                                    _candidate.WaitForChangeInState();
                                }

                                Connection.Send(context, new RequestVote
                                {
                                    Source           = _engine.Tag,
                                    Term             = currentElectionTerm,
                                    IsForcedElection = _candidate.IsForcedElection,
                                    IsTrialElection  = false,
                                    LastLogIndex     = lastLogIndex,
                                    LastLogTerm      = lastLogTerm
                                });

                                rvr = Connection.Read <RequestVoteResponse>(context);

                                if (rvr.Term > currentElectionTerm)
                                {
                                    var message = $"Candidate ambassador {_engine.Tag}: found election term {rvr.Term} that is higher than ours {currentElectionTerm}";
                                    if (_engine.Log.IsInfoEnabled)
                                    {
                                        _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}");
                                    }
                                    // we need to abort the current elections
                                    _engine.SetNewState(RachisState.Follower, null, engineCurrentTerm, message);
                                    _engine.FoundAboutHigherTerm(rvr.Term);
                                    throw new InvalidOperationException(message);
                                }
                                NotInTopology = rvr.NotInTopology;
                                if (rvr.VoteGranted == false)
                                {
                                    if (_engine.Log.IsInfoEnabled)
                                    {
                                        _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reason: {rvr.Message}");
                                    }
                                    // we go a negative response here, so we can't proceed
                                    // we'll need to wait until the candidate has done something, like
                                    // change term or given up
                                    _candidate.WaitForChangeInState();
                                    continue;
                                }
                                RealElectionWonAtTerm = rvr.Term;
                                _candidate.WaitForChangeInState();
                            }
                            SendElectionResult();
                        }
                    }
                    catch (OperationCanceledException)
                    {
                        Status        = AmbassadorStatus.Closed;
                        StatusMessage = "Closed";
                        SendElectionResult();
                        break;
                    }
                    catch (ObjectDisposedException)
                    {
                        Status        = AmbassadorStatus.Closed;
                        StatusMessage = "Closed";
                        SendElectionResult();
                        break;
                    }
                    catch (AggregateException ae)
                        when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException)
                        {
                            Status        = AmbassadorStatus.Closed;
                            StatusMessage = "Closed";
                            SendElectionResult();
                            break;
                        }
                    catch (Exception e)
                    {
                        Status        = AmbassadorStatus.FailedToConnect;
                        StatusMessage = $"Failed to get vote from {_tag}.{Environment.NewLine}" + e.Message;
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to get vote from remote peer url={_url} tag={_tag}", e);
                        }
                        Connection?.Dispose();
                        _candidate.WaitForChangeInState();
                    }
                }
            }
            catch (Exception e)
            {
                Status        = AmbassadorStatus.FailedToConnect;
                StatusMessage = $"Failed to talk to {_url}.{Environment.NewLine}" + e;
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info("Failed to talk to remote peer: " + _url, e);
                }
            }
            finally
            {
                if (_candidate.ElectionResult != ElectionResult.Won)
                {
                    Connection?.Dispose();
                }
            }
        }
Пример #5
0
        public void HandleVoteRequest()
        {
            try
            {
                while (true)
                {
                    using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                    {
                        var rv = _connection.Read <RequestVote>(context);

                        if (_engine.Log.IsInfoEnabled)
                        {
                            var election = rv.IsTrialElection ? "Trial" : "Real";
                            _engine.Log.Info($"Received ({election}) 'RequestVote' from {rv.Source}: Election is {rv.ElectionResult} in term {rv.Term} while our current term is {_engine.CurrentTerm}, " +
                                             $"Forced election is {rv.IsForcedElection}. (Sent from:{rv.SendingThread})");
                        }

                        //We are getting a request to vote for our known leader
                        if (_engine.LeaderTag == rv.Source)
                        {
                            _engine.LeaderTag = null;
                            //If we are followers we want to drop the connection with the leader right away.
                            //We shouldn't be in any other state since if we are candidate our leaderTag should be null but its safer to verify.
                            if (_engine.CurrentState == RachisState.Follower)
                            {
                                _engine.SetNewState(RachisState.Follower, null, _engine.CurrentTerm, $"We got a vote request from our leader {rv.Source} so we switch to leaderless state.");
                            }
                        }

                        ClusterTopology clusterTopology;
                        long            lastIndex;
                        long            lastTerm;
                        string          whoGotMyVoteIn;
                        long            lastVotedTerm;

                        using (context.OpenReadTransaction())
                        {
                            lastIndex = _engine.GetLastEntryIndex(context);
                            lastTerm  = _engine.GetTermForKnownExisting(context, lastIndex);
                            (whoGotMyVoteIn, lastVotedTerm) = _engine.GetWhoGotMyVoteIn(context, rv.Term);

                            clusterTopology = _engine.GetTopology(context);
                        }

                        // this should be only the case when we where once in a cluster, then we were brought down and our data was wiped.
                        if (clusterTopology.TopologyId == null)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = rv.Term,
                                VoteGranted = true,
                                Message     = "I might vote for you, because I'm not part of any cluster."
                            });
                            continue;
                        }

                        if (clusterTopology.Members.ContainsKey(rv.Source) == false &&
                            clusterTopology.Promotables.ContainsKey(rv.Source) == false &&
                            clusterTopology.Watchers.ContainsKey(rv.Source) == false)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                // we only report to the node asking for our vote if we are the leader, this gives
                                // the oust node a authorotative confirmation that they were removed from the cluster
                                NotInTopology = _engine.CurrentState == RachisState.Leader,
                                Message       = $"Node {rv.Source} is not in my topology, cannot vote for it"
                            });
                            _connection.Dispose();
                            return;
                        }

                        var currentTerm = _engine.CurrentTerm;
                        if (rv.Term == currentTerm && rv.ElectionResult == ElectionResult.Won)
                        {
                            _electionWon = true;
                            if (Follower.CheckIfValidLeader(_engine, _connection, out var negotiation))
                            {
                                var follower = new Follower(_engine, negotiation.Term, _connection);
                                follower.AcceptConnection(negotiation);
                            }
                            return;
                        }

                        if (rv.ElectionResult != ElectionResult.InProgress)
                        {
                            _connection.Dispose();
                            return;
                        }

                        if (rv.Term <= _engine.CurrentTerm)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = "My term is higher or equals to yours"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (rv.IsForcedElection == false &&
                            (
                                _engine.CurrentState == RachisState.Leader ||
                                _engine.CurrentState == RachisState.LeaderElect
                            )
                            )
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentLeader.Term,
                                VoteGranted = false,
                                Message     = "I'm a leader in good standing, coup will be resisted"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}"
                            });
                            continue;
                        }
                        if (lastVotedTerm > rv.Term)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"Already voted for another node in {lastVotedTerm}"
                            });
                            continue;
                        }

                        if (lastTerm > rv.LastLogTerm)
                        {
                            // we aren't going to vote for this guy, but we need to check if it is more up to date
                            // in the state of the cluster than we are
                            if (rv.Term > _engine.CurrentTerm + 1)
                            {
                                // trail election is often done on the current term + 1, but if there is any
                                // election on a term that is greater than the current term plus one, we should
                                // consider this an indication that the cluster was able to move past our term
                                // and update the term accordingly
                                using (context.OpenWriteTransaction())
                                {
                                    // double checking things under the transaction lock
                                    if (rv.Term > _engine.CurrentTerm + 1)
                                    {
                                        _engine.CastVoteInTerm(context, rv.Term - 1, null, "Noticed that the term in the cluster grew beyond what I was familiar with, increasing it");
                                    }
                                    context.Transaction.Commit();
                                }
                            }

                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm}, so I'm more up to date"
                            });
                            continue;
                        }

                        if (rv.IsTrialElection)
                        {
                            if (_engine.Timeout.ExpiredLastDeferral(_engine.ElectionTimeout.TotalMilliseconds / 2, out string currentLeader) == false &&
                                string.IsNullOrEmpty(currentLeader) == false)    // if we are leaderless we can't refuse to cast our vote.
                            {
                                _connection.Send(context, new RequestVoteResponse
                                {
                                    Term        = _engine.CurrentTerm,
                                    VoteGranted = false,
                                    Message     = $"My leader {currentLeader} is keeping me up to date, so I don't want to vote for you"
                                });
                                continue;
                            }

                            if (lastTerm == rv.LastLogTerm && lastIndex > rv.LastLogIndex)
                            {
                                _connection.Send(context, new RequestVoteResponse
                                {
                                    Term        = _engine.CurrentTerm,
                                    VoteGranted = false,
                                    Message     = $"My log {lastIndex} is more up to date than yours {rv.LastLogIndex}"
                                });
                                continue;
                            }

                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = rv.Term,
                                VoteGranted = true,
                                Message     = "I might vote for you"
                            });
                            continue;
                        }

                        HandleVoteResult result;
                        using (context.OpenWriteTransaction())
                        {
                            result = ShouldGrantVote(context, lastIndex, rv, lastTerm);
                            if (result.DeclineVote == false)
                            {
                                _engine.CastVoteInTerm(context, rv.Term, rv.Source, "Casting vote as elector");
                                context.Transaction.Commit();
                            }
                        }

                        if (result.DeclineVote)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = result.VotedTerm,
                                VoteGranted = false,
                                Message     = result.DeclineReason
                            });
                        }
                        else
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = rv.Term,
                                VoteGranted = true,
                                Message     = "I've voted for you"
                            });
                        }
                    }
                }
            }
            catch (OperationCanceledException)
            {
            }
            catch (ObjectDisposedException)
            {
            }
            catch (AggregateException ae)
                when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException)
                {
                }
            catch (Exception e)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"Failed to talk to candidate: {_engine.Tag}", e);
                }
            }
            finally
            {
                if (_electionWon == false)
                {
                    _connection.Dispose();
                }
            }
        }
Пример #6
0
        private bool NegotiateMatchEntryWithLeaderAndApplyEntries(ClusterOperationContext context, RemoteConnection connection, LogLengthNegotiation negotiation)
        {
            long minIndex;
            long maxIndex;
            long midpointTerm;
            long midpointIndex;

            using (context.OpenReadTransaction())
            {
                minIndex = _engine.GetFirstEntryIndex(context);

                if (minIndex == 0) // no entries at all
                {
                    RequestAllEntries(context, connection, "No entries at all here, give me everything from the start");
                    return(true); // leader will know where to start from here
                }

                maxIndex = Math.Min(
                    _engine.GetLastEntryIndex(context), // max
                    negotiation.PrevLogIndex
                    );

                midpointIndex = (maxIndex + minIndex) / 2;

                midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex);
            }

            while ((midpointTerm == negotiation.PrevLogTerm && midpointIndex == negotiation.PrevLogIndex) == false)
            {
                _engine.Timeout.Defer(_connection.Source);

                _engine.ValidateTerm(_term);

                if (midpointIndex == negotiation.PrevLogIndex && midpointTerm != negotiation.PrevLogTerm)
                {
                    // our appended entries has been diverged, same index with different terms.
                    if (CanHandleLogDivergence(context, negotiation, ref midpointIndex, ref midpointTerm, ref minIndex, ref maxIndex) == false)
                    {
                        RequestAllEntries(context, connection, "all my entries are invalid, will require snapshot.");
                        return(true);
                    }
                }

                connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status  = LogLengthNegotiationResponse.ResponseStatus.Negotiation,
                    Message =
                        $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}",
                    CurrentTerm   = _term,
                    MaxIndex      = maxIndex,
                    MinIndex      = minIndex,
                    MidpointIndex = midpointIndex,
                    MidpointTerm  = midpointTerm
                });

                negotiation = connection.Read <LogLengthNegotiation>(context);

                _engine.Timeout.Defer(_connection.Source);
                if (negotiation.Truncated)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries");
                    }

                    RequestAllEntries(context, connection, "We have entries that are already truncated at the leader, will ask for full snapshot");
                    return(true);
                }

                using (context.OpenReadTransaction())
                {
                    if (_engine.GetTermFor(context, negotiation.PrevLogIndex) == negotiation.PrevLogTerm)
                    {
                        minIndex = Math.Min(midpointIndex + 1, maxIndex);
                    }
                    else
                    {
                        maxIndex = Math.Max(midpointIndex - 1, minIndex);
                    }
                }

                midpointIndex = (maxIndex + minIndex) / 2;
                using (context.OpenReadTransaction())
                    midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex);
            }

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {midpointTerm}");
            }

            connection.Send(context, new LogLengthNegotiationResponse
            {
                Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                Message      = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}",
                CurrentTerm  = _term,
                LastLogIndex = midpointIndex,
            });

            return(false);
        }
Пример #7
0
        private void FollowerSteadyState()
        {
            var  entries = new List <RachisEntry>();
            long lastCommit = 0, lastTruncate = 0, lastAcknowledgedIndex = 0;

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Entering steady state");
            }

            while (true)
            {
                entries.Clear();

                using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context))
                {
                    using (context.OpenReadTransaction())
                    {
                        if (_engine.RequestSnapshot)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"{ToString()}: Snapshot was requested, so we close this follower.");
                            }
                            return;
                        }
                    }

                    _debugRecorder.Record("Wait for entries");
                    var appendEntries = _connection.Read <AppendEntries>(context);
                    var sp            = Stopwatch.StartNew();

                    if (appendEntries.Term != _engine.CurrentTerm)
                    {
                        _connection.Send(context, new AppendEntriesResponse
                        {
                            CurrentTerm = _engine.CurrentTerm,
                            Message     = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term,
                            Success     = false
                        });
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection...");
                        }

                        return;
                    }

                    ClusterCommandsVersionManager.SetClusterVersion(appendEntries.MinCommandVersion);

                    _debugRecorder.Record("Got entries");
                    _engine.Timeout.Defer(_connection.Source);
                    if (appendEntries.EntriesCount != 0)
                    {
                        using (var cts = new CancellationTokenSource())
                        {
                            var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex);
                            try
                            {
                                for (int i = 0; i < appendEntries.EntriesCount; i++)
                                {
                                    entries.Add(_connection.ReadRachisEntry(context));
                                }
                            }
                            finally
                            {
                                cts.Cancel();
                                task.Wait(CancellationToken.None);
                            }
                            _engine.Timeout.Defer(_connection.Source);
                        }
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})"
#if DEBUG
                                             + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]"
#endif
                                             );
                        }
                    }

                    // don't start write transaction for noop
                    if (lastCommit != appendEntries.LeaderCommit ||
                        lastTruncate != appendEntries.TruncateLogBefore ||
                        entries.Count != 0)
                    {
                        using (var cts = new CancellationTokenSource())
                        {
                            // applying the leader state may take a while, we need to ping
                            // the server and let us know that we are still there
                            var task = Concurrent_SendAppendEntriesPendingToLeaderAsync(cts, _term, appendEntries.PrevLogIndex);
                            try
                            {
                                bool hasRemovedFromTopology;

                                (hasRemovedFromTopology, lastAcknowledgedIndex, lastTruncate, lastCommit) = ApplyLeaderStateToLocalState(sp,
                                                                                                                                         context,
                                                                                                                                         entries,
                                                                                                                                         appendEntries);

                                if (hasRemovedFromTopology)
                                {
                                    if (_engine.Log.IsInfoEnabled)
                                    {
                                        _engine.Log.Info("Was notified that I was removed from the node topology, will be moving to passive mode now.");
                                    }

                                    _engine.SetNewState(RachisState.Passive, null, appendEntries.Term,
                                                        "I was kicked out of the cluster and moved to passive mode");
                                    return;
                                }
                            }
                            catch (RachisInvalidOperationException)
                            {
                                // on raft protocol violation propagate the error and close this follower.
                                throw;
                            }
                            catch (ConcurrencyException)
                            {
                                // the term was changed
                                throw;
                            }
                            catch (Exception e)
                            {
                                if (_engine.Log.IsInfoEnabled)
                                {
                                    _engine.Log.Info($"Failed to apply leader state to local state with {entries.Count:#,#;;0} entries with leader commit: {appendEntries.LeaderCommit}, term: {appendEntries.Term}. Prev log index: {appendEntries.PrevLogIndex}", e);
                                }
                            }
                            finally
                            {
                                // here we need to wait until the concurrent send pending to leader
                                // is completed to avoid concurrent writes to the leader
                                cts.Cancel();
                                task.Wait(CancellationToken.None);
                            }
                        }
                    }

                    if (appendEntries.ForceElections)
                    {
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: Got a request to become candidate from the leader.");
                        }
                        _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true);
                        return;
                    }
                    _debugRecorder.Record("Processing entries is completed");
                    _connection.Send(context, new AppendEntriesResponse
                    {
                        CurrentTerm  = _term,
                        LastLogIndex = lastAcknowledgedIndex,
                        Success      = true
                    });

                    if (sp.Elapsed > _engine.ElectionTimeout / 2)
                    {
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"{ToString()}: Took a long time to complete the cycle with {entries.Count} entries: {sp.Elapsed}");
                        }
                    }

                    _engine.Timeout.Defer(_connection.Source);
                    _engine.ReportLeaderTime(appendEntries.TimeAsLeader);

                    _debugRecorder.Record("Cycle done");
                    _debugRecorder.Start();
                }
            }
        }
Пример #8
0
        public void HandleVoteRequest()
        {
            try
            {
                while (true)
                {
                    using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                    {
                        var rv = _connection.Read <RequestVote>(context);

                        ClusterTopology clusterTopology;
                        long            lastIndex;
                        long            lastTerm;
                        string          whoGotMyVoteIn;

                        using (context.OpenReadTransaction())
                        {
                            lastIndex      = _engine.GetLastEntryIndex(context);
                            lastTerm       = _engine.GetTermForKnownExisting(context, lastIndex);
                            whoGotMyVoteIn = _engine.GetWhoGotMyVoteIn(context, rv.Term);

                            clusterTopology = _engine.GetTopology(context);
                        }

                        if (clusterTopology.Members.ContainsKey(rv.Source) == false &&
                            clusterTopology.Promotables.ContainsKey(rv.Source) == false &&
                            clusterTopology.Watchers.ContainsKey(rv.Source) == false)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                // we only report to the node asking for our vote if we are the leader, this gives
                                // the oust node a authorotative confirmation that they were removed from the cluster
                                NotInTopology = _engine.CurrentState == RachisConsensus.State.Leader,
                                Message       = $"Node {rv.Source} is not in my topology, cannot vote for it"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (rv.Term <= _engine.CurrentTerm)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = "My term is higher or equals to yours"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (rv.IsForcedElection == false &&
                            (
                                _engine.CurrentState == RachisConsensus.State.Leader ||
                                _engine.CurrentState == RachisConsensus.State.LeaderElect
                            )
                            )
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = "I'm a leader in good standing, coup will be resisted"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (lastTerm > rv.LastLogTerm)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm}, so I'm more up to date"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (lastIndex > rv.LastLogIndex)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"My last log entry is of term {lastTerm} / {lastIndex} while yours is {rv.LastLogTerm} / {rv.LastLogIndex}, so I'm more up to date"
                            });
                            _connection.Dispose();
                            return;
                        }

                        if (rv.IsTrialElection)
                        {
                            if (_engine.Timeout.ExpiredLastDeferral(_engine.ElectionTimeout.TotalMilliseconds / 2, out string currentLeader) == false)
                            {
                                _connection.Send(context, new RequestVoteResponse
                                {
                                    Term        = _engine.CurrentTerm,
                                    VoteGranted = false,
                                    Message     = $"My leader {currentLeader} is keeping me up to date, so I don't want to vote for you"
                                });
                                _connection.Dispose();
                                return;
                            }

                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = rv.Term,
                                VoteGranted = true,
                                Message     = "I might vote for you"
                            });
                            if (_thread == null) // let's wait for this in another thread
                            {
                                _thread = new Thread(HandleVoteRequest)
                                {
                                    Name =
                                        $"Elector thread for {rv.Source} > {_engine.Tag}",
                                    IsBackground = true
                                };
                                _thread.Start();
                                return;
                            }
                            continue;
                        }

                        bool alreadyVoted = false;
                        using (context.OpenWriteTransaction())
                        {
                            whoGotMyVoteIn = _engine.GetWhoGotMyVoteIn(context, rv.Term);
                            if (whoGotMyVoteIn != null && whoGotMyVoteIn != rv.Source)
                            {
                                alreadyVoted = true;
                            }
                            else
                            {
                                _engine.CastVoteInTerm(context, rv.Term, rv.Source);
                            }
                            context.Transaction.Commit();
                        }
                        if (alreadyVoted)
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = false,
                                Message     = $"Already voted in {rv.LastLogTerm}, for {whoGotMyVoteIn}"
                            });
                        }
                        else
                        {
                            _connection.Send(context, new RequestVoteResponse
                            {
                                Term        = _engine.CurrentTerm,
                                VoteGranted = true,
                                Message     = "I've voted for you"
                            });
                        }
                        _connection.Dispose();
                    }
                }
            }
            catch (OperationCanceledException)
            {
            }
            catch (ObjectDisposedException)
            {
            }
            catch (AggregateException ae)
                when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException)
                {
                }
            catch (Exception e)
            {
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info("Failed to talk to candidate: " + _engine.Tag, e);
                }
            }
        }
Пример #9
0
        private void NegotiateMatchEntryWithLeaderAndApplyEntries(TransactionOperationContext context,
                                                                  RemoteConnection connection, LogLengthNegotiation negotiation)
        {
            long minIndex;
            long maxIndex;
            long midpointTerm;
            long midpointIndex;

            using (context.OpenReadTransaction())
            {
                minIndex = _engine.GetFirstEntryIndex(context);

                if (minIndex == 0) // no entries at all
                {
                    connection.Send(context, new LogLengthNegotiationResponse
                    {
                        Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                        Message      = "No entries at all here, give me everything from the start",
                        CurrentTerm  = _term,
                        LastLogIndex = 0
                    });

                    return; // leader will know where to start from here
                }

                maxIndex = Math.Min(
                    _engine.GetLastEntryIndex(context), // max
                    negotiation.PrevLogIndex
                    );

                midpointIndex = (maxIndex + minIndex) / 2;

                midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex);
            }


            while (minIndex < maxIndex)
            {
                _engine.Timeout.Defer(_connection.Source);

                _engine.ValidateTerm(_term);

                connection.Send(context, new LogLengthNegotiationResponse
                {
                    Status  = LogLengthNegotiationResponse.ResponseStatus.Negotiation,
                    Message =
                        $"Term/Index mismatch from leader, need to figure out at what point the logs match, range: {maxIndex} - {minIndex} | {midpointIndex} in term {midpointTerm}",
                    CurrentTerm   = _term,
                    MaxIndex      = maxIndex,
                    MinIndex      = minIndex,
                    MidpointIndex = midpointIndex,
                    MidpointTerm  = midpointTerm
                });

                var response = connection.Read <LogLengthNegotiation>(context);

                _engine.Timeout.Defer(_connection.Source);
                if (response.Truncated)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: Got a truncated response from the leader will request all entries");
                    }
                    connection.Send(context, new LogLengthNegotiationResponse
                    {
                        Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                        Message      = "We have entries that are already truncated at the leader, will ask for full snapshot",
                        CurrentTerm  = _term,
                        LastLogIndex = 0
                    });
                    return;
                }
                using (context.OpenReadTransaction())
                {
                    if (_engine.GetTermFor(context, response.PrevLogIndex) == response.PrevLogTerm)
                    {
                        minIndex = Math.Min(midpointIndex + 1, maxIndex);
                    }
                    else
                    {
                        maxIndex = Math.Max(midpointIndex - 1, minIndex);
                    }
                }
                midpointIndex = (maxIndex + minIndex) / 2;
                using (context.OpenReadTransaction())
                    midpointTerm = _engine.GetTermForKnownExisting(context, midpointIndex);
            }
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: agreed upon last matched index = {midpointIndex} on term = {_term}");
            }
            connection.Send(context, new LogLengthNegotiationResponse
            {
                Status       = LogLengthNegotiationResponse.ResponseStatus.Acceptable,
                Message      = $"Found a log index / term match at {midpointIndex} with term {midpointTerm}",
                CurrentTerm  = _term,
                LastLogIndex = midpointIndex
            });
        }
Пример #10
0
        private void FollowerSteadyState()
        {
            var  entries = new List <RachisEntry>();
            long lastCommit = 0, lastTruncate = 0;

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state");
            }
            while (true)
            {
                entries.Clear();

                using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                {
                    var appendEntries = _connection.Read <AppendEntries>(context);

                    if (appendEntries.Term != _engine.CurrentTerm)
                    {
                        _connection.Send(context, new AppendEntriesResponse
                        {
                            CurrentTerm = _engine.CurrentTerm,
                            Message     = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term,
                            Success     = false
                        });
                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection...");
                        }

                        return;
                    }


                    _engine.Timeout.Defer(_connection.Source);
                    var sp = Stopwatch.StartNew();
                    if (appendEntries.EntriesCount != 0)
                    {
                        for (int i = 0; i < appendEntries.EntriesCount; i++)
                        {
                            entries.Add(_connection.ReadRachisEntry(context));
                            _engine.Timeout.Defer(_connection.Source);
                        }
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})"
#if DEBUG
                                             + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]"
#endif
                                             );
                        }
                    }

                    var lastLogIndex = appendEntries.PrevLogIndex;

                    // don't start write transaction fro noop
                    if (lastCommit != appendEntries.LeaderCommit ||
                        lastTruncate != appendEntries.TruncateLogBefore ||
                        entries.Count != 0)
                    {
                        bool removedFromTopology = false;
                        // we start the tx after we finished reading from the network
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}");
                        }
                        using (var tx = context.OpenWriteTransaction())
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}");
                            }
                            if (entries.Count > 0)
                            {
                                using (var lastTopology = _engine.AppendToLog(context, entries))
                                {
                                    if (lastTopology != null)
                                    {
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"Topology changed to {lastTopology}");
                                        }

                                        var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                                        if (topology.Members.ContainsKey(_engine.Tag) ||
                                            topology.Promotables.ContainsKey(_engine.Tag) ||
                                            topology.Watchers.ContainsKey(_engine.Tag))
                                        {
                                            RachisConsensus.SetTopology(_engine, context, topology);
                                        }
                                        else
                                        {
                                            removedFromTopology = true;
                                        }
                                    }
                                }
                            }

                            lastLogIndex = _engine.GetLastEntryIndex(context);

                            var lastEntryIndexToCommit = Math.Min(
                                lastLogIndex,
                                appendEntries.LeaderCommit);


                            var lastAppliedIndex = _engine.GetLastCommitIndex(context);

                            if (lastEntryIndexToCommit > lastAppliedIndex)
                            {
                                _engine.Apply(context, lastEntryIndexToCommit, null);
                            }

                            lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit);
                            _engine.TruncateLogBefore(context, lastTruncate);
                            lastCommit = lastEntryIndexToCommit;
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}");
                            }
                            tx.Commit();
                        }

                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
                        }

                        if (removedFromTopology)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now.");
                            }
                            _engine.SetNewState(RachisState.Passive, null, appendEntries.Term,
                                                "I was kicked out of the cluster and moved to passive mode");
                            return;
                        }
                    }

                    if (appendEntries.ForceElections)
                    {
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader.");
                        }
                        _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true);
                        return;
                    }

                    _connection.Send(context, new AppendEntriesResponse
                    {
                        CurrentTerm  = _engine.CurrentTerm,
                        LastLogIndex = lastLogIndex,
                        Success      = true
                    });

                    _engine.Timeout.Defer(_connection.Source);

                    _engine.ReportLeaderTime(appendEntries.TimeAsLeader);
                }
            }
        }