예제 #1
0
파일: Leader.cs 프로젝트: rstonkus/ravendb
        private void RefreshAmbassadors(ClusterTopology clusterTopology, Dictionary <string, RemoteConnection> connections = null)
        {
            bool lockTaken = false;

            Monitor.TryEnter(this, ref lockTaken);
            try
            {
                //This only means we are been disposed so we can quit now
                if (lockTaken == false)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: Skipping refreshing ambassadors because we are been disposed of");
                    }
                    return;
                }

                if (Term != _engine.CurrentTerm)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: We are no longer the actual leader, since the current term is {_engine.CurrentTerm}");
                    }
                    return;
                }

                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Refreshing ambassadors");
                }
                var old = new Dictionary <string, FollowerAmbassador>(StringComparer.OrdinalIgnoreCase);
                foreach (var peers in new[] { _voters, _promotables, _nonVoters })
                {
                    foreach (var peer in peers)
                    {
                        old[peer.Key] = peer.Value;
                    }
                    peers.Clear();
                }

                foreach (var voter in clusterTopology.Members)
                {
                    if (voter.Key == _engine.Tag)
                    {
                        continue; // we obviously won't be applying to ourselves
                    }
                    if (old.TryGetValue(voter.Key, out FollowerAmbassador existingInstance))
                    {
                        existingInstance.UpdateLeaderWake(_voterResponded);
                        _voters.Add(voter.Key, existingInstance);
                        old.Remove(voter.Key);
                        continue; // already here
                    }
                    RemoteConnection connection = null;
                    connections?.TryGetValue(voter.Key, out connection);
                    var ambasaddor = new FollowerAmbassador(_engine, this, _voterResponded, voter.Key, voter.Value,
                                                            _engine.ClusterCertificate, connection);
                    _voters.Add(voter.Key, ambasaddor);
                    _engine.AppendStateDisposable(this, ambasaddor);
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: starting ambassador for voter {voter.Key} {voter.Value}");
                    }
                    ambasaddor.Start();
                }

                foreach (var promotable in clusterTopology.Promotables)
                {
                    if (old.TryGetValue(promotable.Key, out FollowerAmbassador existingInstance))
                    {
                        existingInstance.UpdateLeaderWake(_promotableUpdated);
                        _promotables.Add(promotable.Key, existingInstance);
                        old.Remove(promotable.Key);
                        continue; // already here
                    }
                    RemoteConnection connection = null;
                    connections?.TryGetValue(promotable.Key, out connection);
                    var ambasaddor = new FollowerAmbassador(_engine, this, _promotableUpdated, promotable.Key, promotable.Value,
                                                            _engine.ClusterCertificate, connection);
                    _promotables.Add(promotable.Key, ambasaddor);
                    _engine.AppendStateDisposable(this, ambasaddor);
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: starting ambassador for promotable {promotable.Key} {promotable.Value}");
                    }
                    ambasaddor.Start();
                }

                foreach (var nonVoter in clusterTopology.Watchers)
                {
                    if (old.TryGetValue(nonVoter.Key, out FollowerAmbassador existingInstance))
                    {
                        existingInstance.UpdateLeaderWake(_noop);

                        _nonVoters.Add(nonVoter.Key, existingInstance);
                        old.Remove(nonVoter.Key);
                        continue; // already here
                    }
                    RemoteConnection connection = null;
                    connections?.TryGetValue(nonVoter.Key, out connection);
                    var ambasaddor = new FollowerAmbassador(_engine, this, _noop, nonVoter.Key, nonVoter.Value,
                                                            _engine.ClusterCertificate, connection);
                    _nonVoters.Add(nonVoter.Key, ambasaddor);
                    _engine.AppendStateDisposable(this, ambasaddor);
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"{ToString()}: starting ambassador for watcher {nonVoter.Key} {nonVoter.Value}");
                    }
                    ambasaddor.Start();
                }

                if (old.Count > 0)
                {
                    Interlocked.Increment(ref _previousPeersWereDisposed);
                    System.Threading.ThreadPool.QueueUserWorkItem(_ =>
                    {
                        foreach (var ambasaddor in old)
                        {
                            // it is not used by anything else, so we can close it
                            ambasaddor.Value.Dispose();
                        }
                        Interlocked.Decrement(ref _previousPeersWereDisposed);
                    }, null);
                }
            }
            finally
            {
                if (lockTaken)
                {
                    Monitor.Exit(this);
                }
            }
        }
예제 #2
0
        /// <summary>
        /// This method may run for a long while, as we are trying to get agreement
        /// from a majority of the cluster
        /// </summary>
        private void Run()
        {
            try
            {
                while (_candidate.Running && _disposed == false)
                {
                    _conenctToPeer = null;
                    try
                    {
                        try
                        {
                            using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                            {
                                _conenctToPeer = _engine.ConnectToPeer(_url, _certificate, context).Result;
                            }

                            if (_candidate.Running == false)
                            {
                                break;
                            }
                        }
                        catch (Exception e)
                        {
                            Status        = AmbassadorStatus.FailedToConnect;
                            StatusMessage = $"Failed to connect with {_tag}.{Environment.NewLine} " + e.Message;
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to connect to remote peer: " + _url, e);
                            }
                            // wait a bit
                            _candidate.WaitForChangeInState();
                            continue; // we'll retry connecting
                        }
                        Status        = AmbassadorStatus.Connected;
                        StatusMessage = $"Connected to {_tag}";
                        using (var connection = new RemoteConnection(_tag, _engine.Tag, _conenctToPeer))
                        {
                            try
                            {
                                _engine.AppendStateDisposable(_candidate, connection);
                            }
                            catch (ConcurrencyException)
                            {
                                // we probably lost the election, because someone else changed our state to follower
                                // we'll still return to the top of the loop to ensure that this is the case
                                continue;
                            }

                            while (_candidate.Running)
                            {
                                using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                                {
                                    ClusterTopology topology;
                                    long            lastLogIndex;
                                    long            lastLogTerm;
                                    using (context.OpenReadTransaction())
                                    {
                                        topology     = _engine.GetTopology(context);
                                        lastLogIndex = _engine.GetLastEntryIndex(context);
                                        lastLogTerm  = _engine.GetTermForKnownExisting(context, lastLogIndex);
                                    }
                                    Debug.Assert(topology.TopologyId != null);
                                    connection.Send(context, new RachisHello
                                    {
                                        TopologyId                 = topology.TopologyId,
                                        DebugSourceIdentifier      = _engine.Tag,
                                        DebugDestinationIdentifier = _tag,
                                        InitialMessageType         = InitialMessageType.RequestVote
                                    });

                                    RequestVoteResponse rvr;
                                    var currentElectionTerm = _candidate.ElectionTerm;
                                    var engineCurrentTerm   = _engine.CurrentTerm;
                                    if (_candidate.IsForcedElection == false ||
                                        _candidate.RunRealElectionAtTerm != currentElectionTerm)
                                    {
                                        connection.Send(context, new RequestVote
                                        {
                                            Source           = _engine.Tag,
                                            Term             = currentElectionTerm,
                                            IsForcedElection = false,
                                            IsTrialElection  = true,
                                            LastLogIndex     = lastLogIndex,
                                            LastLogTerm      = lastLogTerm
                                        });

                                        rvr = connection.Read <RequestVoteResponse>(context);

                                        if (rvr.Term > currentElectionTerm)
                                        {
                                            var message = "Found election term " + rvr.Term + " that is higher than ours " + currentElectionTerm;
                                            // we need to abort the current elections
                                            _engine.SetNewState(RachisConsensus.State.Follower, null, engineCurrentTerm, message);
                                            if (_engine.Log.IsInfoEnabled)
                                            {
                                                _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}");
                                            }
                                            _engine.FoundAboutHigherTerm(rvr.Term);
                                            throw new InvalidOperationException(message);
                                        }
                                        NotInTopology = rvr.NotInTopology;
                                        if (rvr.VoteGranted == false)
                                        {
                                            if (_engine.Log.IsInfoEnabled)
                                            {
                                                _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reseason:{rvr.Message}");
                                            }
                                            // we go a negative response here, so we can't proceed
                                            // we'll need to wait until the candidate has done something, like
                                            // change term or given up
                                            _candidate.WaitForChangeInState();
                                            continue;
                                        }
                                        TrialElectionWonAtTerm = rvr.Term;

                                        _candidate.WaitForChangeInState();
                                    }

                                    connection.Send(context, new RequestVote
                                    {
                                        Source           = _engine.Tag,
                                        Term             = currentElectionTerm,
                                        IsForcedElection = _candidate.IsForcedElection,
                                        IsTrialElection  = false,
                                        LastLogIndex     = lastLogIndex,
                                        LastLogTerm      = lastLogTerm
                                    });

                                    rvr = connection.Read <RequestVoteResponse>(context);

                                    if (rvr.Term > currentElectionTerm)
                                    {
                                        var message = "Found election term " + rvr.Term + " that is higher than ours " + currentElectionTerm;
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: {message}");
                                        }
                                        // we need to abort the current elections
                                        _engine.SetNewState(RachisConsensus.State.Follower, null, engineCurrentTerm, message);
                                        _engine.FoundAboutHigherTerm(rvr.Term);
                                        throw new InvalidOperationException(message);
                                    }
                                    NotInTopology = rvr.NotInTopology;
                                    if (rvr.VoteGranted == false)
                                    {
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Got a negative response from {_tag} reseason:{rvr.Message}");
                                        }
                                        // we go a negative response here, so we can't proceed
                                        // we'll need to wait until the candidate has done something, like
                                        // change term or given up
                                        _candidate.WaitForChangeInState();
                                        continue;
                                    }
                                    ReadlElectionWonAtTerm = rvr.Term;
                                    _candidate.WaitForChangeInState();
                                }
                            }
                        }
                    }
                    catch (Exception e)
                    {
                        Status        = AmbassadorStatus.FailedToConnect;
                        StatusMessage = $"Failed to get vote from {_tag}.{Environment.NewLine}" + e.Message;
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"CandidateAmbassador {_engine.Tag}: Failed to get vote from remote peer url={_url} tag={_tag}", e);
                        }
                        _candidate.WaitForChangeInState();
                    }
                    finally
                    {
                        _conenctToPeer?.Dispose();
                    }
                }
            }
            catch (OperationCanceledException)
            {
                Status        = AmbassadorStatus.Closed;
                StatusMessage = "Closed";
            }
            catch (ObjectDisposedException)
            {
                Status        = AmbassadorStatus.Closed;
                StatusMessage = "Closed";
            }
            catch (AggregateException ae)
                when(ae.InnerException is OperationCanceledException || ae.InnerException is ObjectDisposedException)
                {
                    Status        = AmbassadorStatus.Closed;
                    StatusMessage = "Closed";
                }
            catch (Exception e)
            {
                Status        = AmbassadorStatus.FailedToConnect;
                StatusMessage = $"Failed to talk to {_url}.{Environment.NewLine}" + e;
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info("Failed to talk to remote peer: " + _url, e);
                }
            }
        }
예제 #3
0
        private void Run()
        {
            var ambassadorsToRemove = new List <CandidateAmbassador>();

            try
            {
                try
                {
                    // Operation may fail, that's why we don't RaiseOrDie
                    _running.Raise();
                    ElectionTerm = _engine.CurrentTerm;
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Candidate {_engine.Tag}: Starting elections");
                    }
                    ClusterTopology clusterTopology;
                    using (_engine.ContextPool.AllocateOperationContext(out ClusterOperationContext context))
                        using (context.OpenReadTransaction())
                        {
                            clusterTopology = _engine.GetTopology(context);
                        }

                    if (clusterTopology.Members.Count == 1)
                    {
                        CastVoteForSelf(ElectionTerm + 1, "Single member cluster, natural leader");
                        _engine.SwitchToLeaderState(ElectionTerm, ClusterCommandsVersionManager.CurrentClusterMinimalVersion,
                                                    "I'm the only one in the cluster, so no need for elections, I rule.");
                        return;
                    }

                    if (_engine.RequestSnapshot)
                    {
                        // we aren't allowed to be elected for leadership if we requested a snapshot
                        if (_engine.Log.IsOperationsEnabled)
                        {
                            _engine.Log.Operations("we aren't allowed to be elected for leadership if we requested a snapshot");
                        }
                        return;
                    }

                    if (IsForcedElection)
                    {
                        CastVoteForSelf(ElectionTerm + 1, "Voting for self in forced elections");
                    }
                    else
                    {
                        ElectionTerm = ElectionTerm + 1;
                    }

                    foreach (var voter in clusterTopology.Members)
                    {
                        if (voter.Key == _engine.Tag)
                        {
                            continue; // we already voted for ourselves
                        }
                        var candidateAmbassador = new CandidateAmbassador(_engine, this, voter.Key, voter.Value);
                        _voters = new List <CandidateAmbassador>(_voters)
                        {
                            candidateAmbassador
                        };
                        _engine.AppendStateDisposable(this, candidateAmbassador); // concurrency exception here will dispose the current candidate and it ambassadors
                        candidateAmbassador.Start();
                    }
                    while (_running && _engine.CurrentState == RachisState.Candidate)
                    {
                        if (_peersWaiting.WaitOne(_engine.Timeout.TimeoutPeriod) == false)
                        {
                            ElectionTerm = _engine.CurrentTerm + 1;
                            _engine.RandomizeTimeout(extend: true);

                            StateChange(); // will wake ambassadors and make them ping peers again
                            continue;
                        }
                        if (_running == false)
                        {
                            return;
                        }

                        _peersWaiting.Reset();

                        var trialElectionsCount = 1;
                        var realElectionsCount  = 1;
                        foreach (var ambassador in _voters)
                        {
                            if (ambassador.NotInTopology)
                            {
                                MoveCandidateToPassive("A leader node has indicated that I'm not in their topology, I was probably kicked out.");
                                return;
                            }

                            if (ambassador.TopologyMismatch)
                            {
                                ambassadorsToRemove.Add(ambassador);
                                continue;
                            }

                            if (ambassador.RealElectionWonAtTerm == ElectionTerm)
                            {
                                realElectionsCount++;
                            }
                            if (ambassador.TrialElectionWonAtTerm == ElectionTerm)
                            {
                                trialElectionsCount++;
                            }
                        }

                        if (StillHavePeers(ambassadorsToRemove) == false)
                        {
                            MoveCandidateToPassive("I'm left alone in the cluster.");
                            return;
                        }

                        var majority = ((_voters.Count + 1) / 2) + 1;

                        if (realElectionsCount >= majority)
                        {
                            ElectionResult = ElectionResult.Won;
                            _running.Lower();

                            var connections = new Dictionary <string, RemoteConnection>();
                            var versions    = new List <int>
                            {
                                ClusterCommandsVersionManager.MyCommandsVersion
                            };

                            foreach (var candidateAmbassador in _voters)
                            {
                                if (candidateAmbassador.ClusterCommandsVersion > 0)
                                {
                                    versions.Add(candidateAmbassador.ClusterCommandsVersion);
                                }

                                if (candidateAmbassador.TryGetPublishedConnection(out var connection))
                                {
                                    connections[candidateAmbassador.Tag] = connection;
                                }
                            }
                            StateChange();

                            var    minimalVersion = ClusterCommandsVersionManager.GetClusterMinimalVersion(versions, _engine.MaximalVersion);
                            string msg            = $"Was elected by {realElectionsCount} nodes for leadership in term {ElectionTerm} with cluster version of {minimalVersion}";

                            _engine.SwitchToLeaderState(ElectionTerm, minimalVersion, msg, connections);
                            break;
                        }
                        if (RunRealElectionAtTerm != ElectionTerm &&
                            trialElectionsCount >= majority)
                        {
                            CastVoteForSelf(ElectionTerm, "Won in the trial elections");
                        }
                    }
                }
                catch (Exception e)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Candidate {_engine.Tag}: Failure during candidacy run with current state of {_engine.CurrentState}", e);
                    }
                    if (_engine.CurrentState == RachisState.Candidate)
                    {
                        // if we are still a candidate, start the candidacy again.
                        _engine.SwitchToCandidateState("An error occurred during the last candidacy: " + e);
                    }
                    else if (_engine.CurrentState != RachisState.Passive)
                    {
                        _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout);
                    }
                }
            }
            finally
            {
                try
                {
                    Dispose();
                }
                catch (Exception)
                {
                    // nothing to be done here
                }
            }
        }
예제 #4
0
        private void Run()
        {
            try
            {
                try
                {
                    // Operation may fail, that's why we don't RaiseOrDie
                    _running.Raise();
                    ElectionTerm = _engine.CurrentTerm;
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Candidate {_engine.Tag}: Starting elections");
                    }
                    ClusterTopology clusterTopology;
                    using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                        using (context.OpenReadTransaction())
                        {
                            clusterTopology = _engine.GetTopology(context);
                        }

                    if (clusterTopology.Members.Count == 1)
                    {
                        CastVoteForSelf(ElectionTerm + 1, "Single member cluster, natural leader");
                        _engine.SwitchToLeaderState(ElectionTerm, "I'm the only one in the cluster, so no need for elections, I rule.");
                        return;
                    }

                    if (IsForcedElection)
                    {
                        CastVoteForSelf(ElectionTerm + 1, "Voting for self in forced elections");
                    }
                    else
                    {
                        ElectionTerm = ElectionTerm + 1;
                    }

                    foreach (var voter in clusterTopology.Members)
                    {
                        if (voter.Key == _engine.Tag)
                        {
                            continue; // we already voted for ourselves
                        }
                        var candidateAmbassador = new CandidateAmbassador(_engine, this, voter.Key, voter.Value,
                                                                          _engine.ClusterCertificate);
                        _voters.Add(candidateAmbassador);
                        try
                        {
                            _engine.AppendStateDisposable(this, candidateAmbassador);
                        }
                        catch (ConcurrencyException)
                        {
                            foreach (var ambassador in _voters)
                            {
                                ambassador.Dispose();
                            }
                            return; // we lost the election, because someone else changed our state to follower
                        }
                        candidateAmbassador.Start();
                    }
                    while (_running && _engine.CurrentState == RachisState.Candidate)
                    {
                        if (_peersWaiting.WaitOne(_engine.Timeout.TimeoutPeriod) == false)
                        {
                            ElectionTerm = _engine.CurrentTerm;

                            // timeout?
                            if (IsForcedElection)
                            {
                                CastVoteForSelf(ElectionTerm + 1, "Timeout during forced elections");
                            }
                            else
                            {
                                ElectionTerm = ElectionTerm + 1;
                            }
                            _engine.RandomizeTimeout(extend: true);

                            StateChange(); // will wake ambassadors and make them ping peers again
                            continue;
                        }
                        if (_running == false)
                        {
                            return;
                        }

                        _peersWaiting.Reset();

                        bool removedFromTopology = false;
                        var  trialElectionsCount = 1;
                        var  realElectionsCount  = 1;
                        foreach (var ambassador in _voters)
                        {
                            if (ambassador.NotInTopology)
                            {
                                removedFromTopology = true;
                                break;
                            }
                            if (ambassador.RealElectionWonAtTerm == ElectionTerm)
                            {
                                realElectionsCount++;
                            }
                            if (ambassador.TrialElectionWonAtTerm == ElectionTerm)
                            {
                                trialElectionsCount++;
                            }
                        }

                        var majority = ((_voters.Count + 1) / 2) + 1;

                        if (removedFromTopology)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info(
                                    $"Candidate {_engine.Tag}: A leader node has indicated that I'm not in their topology, I was probably kicked out. Moving to passive mode");
                            }
                            _engine.SetNewState(RachisState.Passive, this, _engine.CurrentTerm,
                                                "I just learned from the leader that I'm not in their topology, moving to passive state");
                            break;
                        }

                        if (realElectionsCount >= majority)
                        {
                            ElectionResult = ElectionResult.Won;
                            _running.Lower();
                            StateChange();

                            var connections = new Dictionary <string, RemoteConnection>();
                            foreach (var candidateAmbassador in _voters)
                            {
                                connections[candidateAmbassador.Tag] = candidateAmbassador.Connection;
                            }
                            _engine.SwitchToLeaderState(ElectionTerm, $"Was elected by {realElectionsCount} nodes to leadership in {ElectionTerm}", connections);

                            break;
                        }
                        if (RunRealElectionAtTerm != ElectionTerm &&
                            trialElectionsCount >= majority)
                        {
                            CastVoteForSelf(ElectionTerm, "Won in the trial elections");
                        }
                    }
                }
                catch (Exception e)
                {
                    if (_engine.Log.IsInfoEnabled)
                    {
                        _engine.Log.Info($"Candidate {_engine.Tag}: Failure during candidacy run with current state of {_engine.CurrentState}", e);
                    }
                    if (_engine.CurrentState == RachisState.Candidate)
                    {
                        // if we are still a candidate, start the candidacy again.
                        _engine.SwitchToCandidateState("An error occured during the last candidacy: " + e);
                    }
                    else if (_engine.CurrentState != RachisState.Passive)
                    {
                        _engine.Timeout.Start(_engine.SwitchToCandidateStateOnTimeout);
                    }
                }
            }
            finally
            {
                try
                {
                    Dispose();
                }
                catch (Exception)
                {
                    // nothing to be done here
                }
            }
        }