Esempio n. 1
0
        private async Task <bool> DoHeartbeats(IAuditTrail <IRaftLogEntry> auditTrail, CancellationToken token)
        {
            var timeStamp = Timestamp.Current;
            var tasks     = new AsyncResultSet();

            long commitIndex       = auditTrail.GetLastIndex(true),
                 currentIndex      = auditTrail.GetLastIndex(false),
                 term              = currentTerm,
                 minPrecedingIndex = 0L;

            // send heartbeat in parallel
            foreach (var member in stateMachine.Members)
            {
                if (member.IsRemote)
                {
                    long precedingIndex = Math.Max(0, member.NextIndex - 1), precedingTerm;
                    minPrecedingIndex = Math.Min(minPrecedingIndex, precedingIndex);

                    // try to get term from the cache to avoid touching audit trail for each member
                    if (!precedingTermCache.TryGetValue(precedingIndex, out precedingTerm))
                    {
                        precedingTermCache.Add(precedingIndex, precedingTerm = await auditTrail.GetTermAsync(precedingIndex, token).ConfigureAwait(false));
                    }

                    tasks.AddLast(new Replicator(member, commitIndex, currentIndex, term, precedingIndex, precedingTerm, stateMachine.Logger, token).Start(auditTrail));
                }
            }

            // clear cache
            if (precedingTermCache.Count > MaxTermCacheSize)
            {
                precedingTermCache.Clear();
            }
            else
            {
                precedingTermCache.RemoveHead(minPrecedingIndex);
            }

            int quorum = 1, commitQuorum = 1; // because we know that the entry is replicated in this node

#if NETSTANDARD2_1
            for (var task = tasks.First; task is not null; task.Value = default, task = task.Next)
#else
            for (var task = tasks.First; task is not null; task.ValueRef = default, task = task.Next)
#endif
            {
                try
                {
#if NETSTANDARD2_1
                    var result = await task.Value.ConfigureAwait(false);
#else
                    var result = await task.ValueRef.ConfigureAwait(false);
#endif
                    term          = Math.Max(term, result.Term);
                    quorum       += 1;
                    commitQuorum += result.Value ? 1 : -1;
                }
                catch (MemberUnavailableException)
                {
                    quorum       -= 1;
                    commitQuorum -= 1;
                }
                catch (OperationCanceledException)
                {
                    // leading was canceled
                    tasks.Clear();
                    Metrics?.ReportBroadcastTime(timeStamp.Elapsed);
                    return(false);
                }
                catch (Exception e)
                {
                    stateMachine.Logger.LogError(e, ExceptionMessages.UnexpectedError);
                }
            }

            tasks.Clear();
            Metrics?.ReportBroadcastTime(timeStamp.Elapsed);

            // majority of nodes accept entries with a least one entry from the current term
            if (commitQuorum > 0)
            {
                var count = await auditTrail.CommitAsync(currentIndex, token).ConfigureAwait(false); // commit all entries started from first uncommitted index to the end

                stateMachine.Logger.CommitSuccessful(commitIndex + 1, count);
                goto check_term;
            }

            stateMachine.Logger.CommitFailed(quorum, commitIndex);

            // majority of nodes replicated, continue leading if current term is not changed
            if (quorum <= 0 && !allowPartitioning)
            {
                goto stop_leading;
            }

check_term:
            if (term <= currentTerm)
            {
                return(true);
            }

            // it is partitioned network with absolute majority, not possible to have more than one leader
stop_leading:
            stateMachine.MoveToFollowerState(false, term);
            return(false);
        }
Esempio n. 2
0
        private async Task <bool> DoHeartbeats(IAuditTrail <IRaftLogEntry> auditTrail, CancellationToken token)
        {
            var timeStamp = Timestamp.Current;
            var tasks     = new LinkedList <ValueTask <Result <bool> > >();

            long commitIndex = auditTrail.GetLastIndex(true), currentIndex = auditTrail.GetLastIndex(false);
            var  term = currentTerm;

            // send heartbeat in parallel
            foreach (var member in stateMachine.Members)
            {
                if (member.IsRemote)
                {
                    long precedingIndex = Math.Max(0, member.NextIndex - 1), precedingTerm = await auditTrail.GetTermAsync(precedingIndex, token).ConfigureAwait(false);

                    tasks.AddLast(new Replicator(member, commitIndex, currentIndex, term, precedingIndex, precedingTerm, stateMachine.Logger, token).Start(auditTrail));
                }
            }

            var quorum       = 1; // because we know that the entry is replicated in this node
            var commitQuorum = 1;

            for (var task = tasks.First; task != null; task.Value = default, task = task.Next)
            {
                try
                {
                    var result = await task.Value.ConfigureAwait(false);

                    term          = Math.Max(term, result.Term);
                    quorum       += 1;
                    commitQuorum += result.Value ? 1 : -1;
                }
                catch (MemberUnavailableException)
                {
                    quorum       -= 1;
                    commitQuorum -= 1;
                }
                catch (OperationCanceledException)
                {
                    // leading was canceled
                    tasks.Clear();
                    Metrics?.ReportBroadcastTime(timeStamp.Elapsed);
                    return(false);
                }
                catch (Exception e)
                {
                    stateMachine.Logger.LogError(e, ExceptionMessages.UnexpectedError);
                }
            }

            tasks.Clear();
            Metrics?.ReportBroadcastTime(timeStamp.Elapsed);

            // majority of nodes accept entries with a least one entry from current term
            if (commitQuorum > 0)
            {
                var count = await auditTrail.CommitAsync(currentIndex, token).ConfigureAwait(false); // commit all entries started from first uncommitted index to the end

                stateMachine.Logger.CommitSuccessful(commitIndex + 1, count);
                return(CheckTerm(term));
            }

            stateMachine.Logger.CommitFailed(quorum, commitIndex);

            // majority of nodes replicated, continue leading if current term is not changed
            if (quorum > 0 || allowPartitioning)
            {
                return(CheckTerm(term));
            }

            // it is partitioned network with absolute majority, not possible to have more than one leader
            stateMachine.MoveToFollowerState(false, term);
            return(false);
        }