Exemple #1
0
        public void Apply(TransactionOperationContext context, long uptoInclusive, Leader leader, ServerStore serverStore)
        {
            Debug.Assert(context.Transaction != null);

            var lastAppliedIndex = _parent.GetLastCommitIndex(context);

            for (var index = lastAppliedIndex + 1; index <= uptoInclusive; index++)
            {
                var cmd = _parent.GetEntry(context, index, out RachisEntryFlags flags);
                if (cmd == null || flags == RachisEntryFlags.Invalid)
                {
                    throw new InvalidOperationException("Expected to apply entry " + index + " but it isn't stored");
                }

                if (flags != RachisEntryFlags.StateMachineCommand)
                {
                    continue;
                }

                Apply(context, cmd, index, leader, serverStore);
            }
            var term = _parent.GetTermForKnownExisting(context, uptoInclusive);

            _parent.SetLastCommitIndex(context, uptoInclusive, term);
        }
Exemple #2
0
 private long GetLastIndex(RachisConsensus <CountingStateMachine> rachis)
 {
     using (rachis.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
         using (context.OpenReadTransaction())
         {
             return(rachis.GetLastCommitIndex(context));
         }
 }
Exemple #3
0
        public long Apply(ClusterOperationContext context, long uptoInclusive, Leader leader, ServerStore serverStore, Stopwatch duration)
        {
            Debug.Assert(context.Transaction != null);

            var lastAppliedIndex             = _parent.GetLastCommitIndex(context);
            var maxTimeAllowedToWaitForApply = _parent.Timeout.TimeoutPeriod / 4;

            for (var index = lastAppliedIndex + 1; index <= uptoInclusive; index++)
            {
                var cmd = _parent.GetEntry(context, index, out RachisEntryFlags flags);
                if (cmd == null || flags == RachisEntryFlags.Invalid)
                {
                    throw new InvalidOperationException("Expected to apply entry " + index + " but it isn't stored");
                }

                lastAppliedIndex = index;

                if (flags != RachisEntryFlags.StateMachineCommand)
                {
                    _parent.LogHistory.UpdateHistoryLog(context, index, _parent.CurrentTerm, cmd, null, null);

                    var currentIndex = index;
                    context.Transaction.InnerTransaction.LowLevelTransaction.OnDispose += t =>
                    {
                        if (t is LowLevelTransaction llt && llt.Committed)
                        {
                            serverStore.Cluster.NotifyAndSetCompleted(currentIndex);
                        }
                    };
                    continue;
                }

                Apply(context, cmd, index, leader, serverStore);

                if (duration.ElapsedMilliseconds >= maxTimeAllowedToWaitForApply)
                {
                    // we don't want to spend so much time applying commands that we will time out the leader
                    // so we time this from the follower perspective and abort after applying a single command
                    // or 25% of the time has already passed
                    break;
                }
            }
            var term = _parent.GetTermForKnownExisting(context, lastAppliedIndex);

            _parent.SetLastCommitIndex(context, lastAppliedIndex, term);

            return(lastAppliedIndex);
        }
Exemple #4
0
        private void OnVoterConfirmation()
        {
            TransactionOperationContext context;

            if (_hasNewTopology.Lower())
            {
                ClusterTopology clusterTopology;
                using (_engine.ContextPool.AllocateOperationContext(out context))
                    using (context.OpenReadTransaction())
                    {
                        clusterTopology = _engine.GetTopology(context);
                    }
                if (clusterTopology.Contains(_engine.LeaderTag) == false)
                {
                    TaskExecutor.CompleteAndReplace(ref _newEntriesArrived);
                    _engine.SetNewState(RachisState.Passive, this, Term,
                                        "I was kicked out of the cluster and moved to passive mode");
                    return;
                }
                RefreshAmbassadors(clusterTopology);
            }

            var maxIndexOnQuorum = GetMaxIndexOnQuorum(VotersMajority);

            if (_lastCommit >= maxIndexOnQuorum ||
                maxIndexOnQuorum == 0)
            {
                return; // nothing to do here
            }
            bool changedFromLeaderElectToLeader;

            using (_engine.ContextPool.AllocateOperationContext(out context))
                using (context.OpenWriteTransaction())
                {
                    _lastCommit = _engine.GetLastCommitIndex(context);

                    if (_lastCommit >= maxIndexOnQuorum ||
                        maxIndexOnQuorum == 0)
                    {
                        return; // nothing to do here
                    }
                    if (_engine.GetTermForKnownExisting(context, maxIndexOnQuorum) < Term)
                    {
                        return;// can't commit until at least one entry from our term has been published
                    }
                    changedFromLeaderElectToLeader = _engine.TakeOffice();

                    maxIndexOnQuorum = _engine.Apply(context, maxIndexOnQuorum, this, Stopwatch.StartNew());

                    context.Transaction.Commit();

                    _lastCommit = maxIndexOnQuorum;
                }

            foreach (var kvp in _entries)
            {
                if (kvp.Key > _lastCommit)
                {
                    continue;
                }

                if (_entries.TryRemove(kvp.Key, out CommandState value))
                {
                    TaskExecutor.Execute(o =>
                    {
                        var tuple = (CommandState)o;
                        if (tuple.OnNotify != null)
                        {
                            tuple.OnNotify(tuple.TaskCompletionSource);
                            return;
                        }
                        tuple.TaskCompletionSource.TrySetResult((tuple.CommandIndex, tuple.Result));
                    }, value);
                }
            }

            if (_entries.Count != 0)
            {
                // we have still items to process, run them in 1 node cluster
                // and speed up the followers ambassadors if they can
                _newEntry.Set();
            }

            if (changedFromLeaderElectToLeader)
            {
                _engine.LeaderElectToLeaderChanged();
            }
        }
Exemple #5
0
        private (bool HasRemovedFromTopology, long LastAcknowledgedIndex, long LastTruncate, long LastCommit)  ApplyLeaderStateToLocalState(Stopwatch sp, ClusterOperationContext context, List <RachisEntry> entries, AppendEntries appendEntries)
        {
            long lastTruncate;
            long lastCommit;

            bool removedFromTopology = false;

            // we start the tx after we finished reading from the network
            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Ready to start tx in {sp.Elapsed}");
            }

            using (var tx = context.OpenWriteTransaction())
            {
                _engine.ValidateTerm(_term);

                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Tx running in {sp.Elapsed}");
                }

                if (entries.Count > 0)
                {
                    var(lastTopology, lastTopologyIndex) = _engine.AppendToLog(context, entries);
                    using (lastTopology)
                    {
                        if (lastTopology != null)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Topology changed to {lastTopology}");
                            }

                            var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                            if (topology.Members.ContainsKey(_engine.Tag) ||
                                topology.Promotables.ContainsKey(_engine.Tag) ||
                                topology.Watchers.ContainsKey(_engine.Tag))
                            {
                                RachisConsensus.SetTopology(_engine, context, topology);
                            }
                            else
                            {
                                removedFromTopology = true;
                                _engine.ClearAppendedEntriesAfter(context, lastTopologyIndex);
                            }
                        }
                    }
                }

                var lastEntryIndexToCommit = Math.Min(
                    _engine.GetLastEntryIndex(context),
                    appendEntries.LeaderCommit);

                var lastAppliedIndex = _engine.GetLastCommitIndex(context);
                var lastAppliedTerm  = _engine.GetTermFor(context, lastEntryIndexToCommit);

                // we start to commit only after we have any log with a term of the current leader
                if (lastEntryIndexToCommit > lastAppliedIndex && lastAppliedTerm == appendEntries.Term)
                {
                    lastAppliedIndex = _engine.Apply(context, lastEntryIndexToCommit, null, sp);
                }

                lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastAppliedIndex);
                _engine.TruncateLogBefore(context, lastTruncate);

                lastCommit = lastAppliedIndex;
                if (_engine.Log.IsInfoEnabled)
                {
                    _engine.Log.Info($"{ToString()}: Ready to commit in {sp.Elapsed}");
                }

                tx.Commit();
            }

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"{ToString()}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
            }

            var lastAcknowledgedIndex = entries.Count == 0 ? appendEntries.PrevLogIndex : entries[entries.Count - 1].Index;

            return(HasRemovedFromTopology : removedFromTopology, LastAcknowledgedIndex : lastAcknowledgedIndex, LastTruncate : lastTruncate, LastCommit : lastCommit);
        }
Exemple #6
0
        private void FollowerSteadyState()
        {
            var  entries = new List <RachisEntry>();
            long lastCommit = 0, lastTruncate = 0;

            if (_engine.Log.IsInfoEnabled)
            {
                _engine.Log.Info($"Follower {_engine.Tag}: Entering steady state");
            }
            while (true)
            {
                entries.Clear();

                using (_engine.ContextPool.AllocateOperationContext(out TransactionOperationContext context))
                {
                    var appendEntries = _connection.Read <AppendEntries>(context);

                    if (appendEntries.Term != _engine.CurrentTerm)
                    {
                        _connection.Send(context, new AppendEntriesResponse
                        {
                            CurrentTerm = _engine.CurrentTerm,
                            Message     = "The current term that I have " + _engine.CurrentTerm + " doesn't match " + appendEntries.Term,
                            Success     = false
                        });
                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got invalid term {appendEntries.Term} while the current term is {_engine.CurrentTerm}, aborting connection...");
                        }

                        return;
                    }


                    _engine.Timeout.Defer(_connection.Source);
                    var sp = Stopwatch.StartNew();
                    if (appendEntries.EntriesCount != 0)
                    {
                        for (int i = 0; i < appendEntries.EntriesCount; i++)
                        {
                            entries.Add(_connection.ReadRachisEntry(context));
                            _engine.Timeout.Defer(_connection.Source);
                        }
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got non empty append entries request with {entries.Count} entries. Last: ({entries[entries.Count - 1].Index} - {entries[entries.Count - 1].Flags})"
#if DEBUG
                                             + $"[{string.Join(" ,", entries.Select(x => x.ToString()))}]"
#endif
                                             );
                        }
                    }

                    var lastLogIndex = appendEntries.PrevLogIndex;

                    // don't start write transaction fro noop
                    if (lastCommit != appendEntries.LeaderCommit ||
                        lastTruncate != appendEntries.TruncateLogBefore ||
                        entries.Count != 0)
                    {
                        bool removedFromTopology = false;
                        // we start the tx after we finished reading from the network
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Ready to start tx in {sp.Elapsed}");
                        }
                        using (var tx = context.OpenWriteTransaction())
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Tx running in {sp.Elapsed}");
                            }
                            if (entries.Count > 0)
                            {
                                using (var lastTopology = _engine.AppendToLog(context, entries))
                                {
                                    if (lastTopology != null)
                                    {
                                        if (_engine.Log.IsInfoEnabled)
                                        {
                                            _engine.Log.Info($"Topology changed to {lastTopology}");
                                        }

                                        var topology = JsonDeserializationRachis <ClusterTopology> .Deserialize(lastTopology);

                                        if (topology.Members.ContainsKey(_engine.Tag) ||
                                            topology.Promotables.ContainsKey(_engine.Tag) ||
                                            topology.Watchers.ContainsKey(_engine.Tag))
                                        {
                                            RachisConsensus.SetTopology(_engine, context, topology);
                                        }
                                        else
                                        {
                                            removedFromTopology = true;
                                        }
                                    }
                                }
                            }

                            lastLogIndex = _engine.GetLastEntryIndex(context);

                            var lastEntryIndexToCommit = Math.Min(
                                lastLogIndex,
                                appendEntries.LeaderCommit);


                            var lastAppliedIndex = _engine.GetLastCommitIndex(context);

                            if (lastEntryIndexToCommit > lastAppliedIndex)
                            {
                                _engine.Apply(context, lastEntryIndexToCommit, null);
                            }

                            lastTruncate = Math.Min(appendEntries.TruncateLogBefore, lastEntryIndexToCommit);
                            _engine.TruncateLogBefore(context, lastTruncate);
                            lastCommit = lastEntryIndexToCommit;
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info($"Follower {_engine.Tag}: Ready to commit in {sp.Elapsed}");
                            }
                            tx.Commit();
                        }

                        if (_engine.Log.IsInfoEnabled && entries.Count > 0)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Processing entries request with {entries.Count} entries took {sp.Elapsed}");
                        }

                        if (removedFromTopology)
                        {
                            if (_engine.Log.IsInfoEnabled)
                            {
                                _engine.Log.Info("Was notified that I was removed from the node topoloyg, will be moving to passive mode now.");
                            }
                            _engine.SetNewState(RachisState.Passive, null, appendEntries.Term,
                                                "I was kicked out of the cluster and moved to passive mode");
                            return;
                        }
                    }

                    if (appendEntries.ForceElections)
                    {
                        if (_engine.Log.IsInfoEnabled)
                        {
                            _engine.Log.Info($"Follower {_engine.Tag}: Got a request to become candidate from the leader.");
                        }
                        _engine.SwitchToCandidateState("Was asked to do so by my leader", forced: true);
                        return;
                    }

                    _connection.Send(context, new AppendEntriesResponse
                    {
                        CurrentTerm  = _engine.CurrentTerm,
                        LastLogIndex = lastLogIndex,
                        Success      = true
                    });

                    _engine.Timeout.Defer(_connection.Source);

                    _engine.ReportLeaderTime(appendEntries.TimeAsLeader);
                }
            }
        }