private async Task <bool> DoHeartbeats(IAuditTrail <IRaftLogEntry> auditTrail, CancellationToken token) { var timeStamp = Timestamp.Current; var tasks = new AsyncResultSet(); long commitIndex = auditTrail.GetLastIndex(true), currentIndex = auditTrail.GetLastIndex(false), term = currentTerm, minPrecedingIndex = 0L; // send heartbeat in parallel foreach (var member in stateMachine.Members) { if (member.IsRemote) { long precedingIndex = Math.Max(0, member.NextIndex - 1), precedingTerm; minPrecedingIndex = Math.Min(minPrecedingIndex, precedingIndex); // try to get term from the cache to avoid touching audit trail for each member if (!precedingTermCache.TryGetValue(precedingIndex, out precedingTerm)) { precedingTermCache.Add(precedingIndex, precedingTerm = await auditTrail.GetTermAsync(precedingIndex, token).ConfigureAwait(false)); } tasks.AddLast(new Replicator(member, commitIndex, currentIndex, term, precedingIndex, precedingTerm, stateMachine.Logger, token).Start(auditTrail)); } } // clear cache if (precedingTermCache.Count > MaxTermCacheSize) { precedingTermCache.Clear(); } else { precedingTermCache.RemoveHead(minPrecedingIndex); } int quorum = 1, commitQuorum = 1; // because we know that the entry is replicated in this node #if NETSTANDARD2_1 for (var task = tasks.First; task is not null; task.Value = default, task = task.Next) #else for (var task = tasks.First; task is not null; task.ValueRef = default, task = task.Next) #endif { try { #if NETSTANDARD2_1 var result = await task.Value.ConfigureAwait(false); #else var result = await task.ValueRef.ConfigureAwait(false); #endif term = Math.Max(term, result.Term); quorum += 1; commitQuorum += result.Value ? 1 : -1; } catch (MemberUnavailableException) { quorum -= 1; commitQuorum -= 1; } catch (OperationCanceledException) { // leading was canceled tasks.Clear(); Metrics?.ReportBroadcastTime(timeStamp.Elapsed); return(false); } catch (Exception e) { stateMachine.Logger.LogError(e, ExceptionMessages.UnexpectedError); } } tasks.Clear(); Metrics?.ReportBroadcastTime(timeStamp.Elapsed); // majority of nodes accept entries with a least one entry from the current term if (commitQuorum > 0) { var count = await auditTrail.CommitAsync(currentIndex, token).ConfigureAwait(false); // commit all entries started from first uncommitted index to the end stateMachine.Logger.CommitSuccessful(commitIndex + 1, count); goto check_term; } stateMachine.Logger.CommitFailed(quorum, commitIndex); // majority of nodes replicated, continue leading if current term is not changed if (quorum <= 0 && !allowPartitioning) { goto stop_leading; } check_term: if (term <= currentTerm) { return(true); } // it is partitioned network with absolute majority, not possible to have more than one leader stop_leading: stateMachine.MoveToFollowerState(false, term); return(false); }
private async Task <bool> DoHeartbeats(IAuditTrail <IRaftLogEntry> auditTrail, CancellationToken token) { var timeStamp = Timestamp.Current; var tasks = new LinkedList <ValueTask <Result <bool> > >(); long commitIndex = auditTrail.GetLastIndex(true), currentIndex = auditTrail.GetLastIndex(false); var term = currentTerm; // send heartbeat in parallel foreach (var member in stateMachine.Members) { if (member.IsRemote) { long precedingIndex = Math.Max(0, member.NextIndex - 1), precedingTerm = await auditTrail.GetTermAsync(precedingIndex, token).ConfigureAwait(false); tasks.AddLast(new Replicator(member, commitIndex, currentIndex, term, precedingIndex, precedingTerm, stateMachine.Logger, token).Start(auditTrail)); } } var quorum = 1; // because we know that the entry is replicated in this node var commitQuorum = 1; for (var task = tasks.First; task != null; task.Value = default, task = task.Next) { try { var result = await task.Value.ConfigureAwait(false); term = Math.Max(term, result.Term); quorum += 1; commitQuorum += result.Value ? 1 : -1; } catch (MemberUnavailableException) { quorum -= 1; commitQuorum -= 1; } catch (OperationCanceledException) { // leading was canceled tasks.Clear(); Metrics?.ReportBroadcastTime(timeStamp.Elapsed); return(false); } catch (Exception e) { stateMachine.Logger.LogError(e, ExceptionMessages.UnexpectedError); } } tasks.Clear(); Metrics?.ReportBroadcastTime(timeStamp.Elapsed); // majority of nodes accept entries with a least one entry from current term if (commitQuorum > 0) { var count = await auditTrail.CommitAsync(currentIndex, token).ConfigureAwait(false); // commit all entries started from first uncommitted index to the end stateMachine.Logger.CommitSuccessful(commitIndex + 1, count); return(CheckTerm(term)); } stateMachine.Logger.CommitFailed(quorum, commitIndex); // majority of nodes replicated, continue leading if current term is not changed if (quorum > 0 || allowPartitioning) { return(CheckTerm(term)); } // it is partitioned network with absolute majority, not possible to have more than one leader stateMachine.MoveToFollowerState(false, term); return(false); }