public FatalErrorHandler( ILogger <FatalErrorHandler> log, IOptions <ClusterMembershipOptions> clusterMembershipOptions) { this.log = log; this.clusterMembershipOptions = clusterMembershipOptions.Value; }
public MembershipTableManager( ILocalSiloDetails localSiloDetails, IOptions <ClusterMembershipOptions> clusterMembershipOptions, IMembershipTable membershipTable, IFatalErrorHandler fatalErrorHandler, IMembershipGossiper gossiper, ILogger <MembershipTableManager> log, IAsyncTimerFactory timerFactory, ISiloLifecycle siloLifecycle) { this.localSiloDetails = localSiloDetails; this.membershipTableProvider = membershipTable; this.fatalErrorHandler = fatalErrorHandler; this.gossiper = gossiper; this.clusterMembershipOptions = clusterMembershipOptions.Value; this.myAddress = this.localSiloDetails.SiloAddress; this.log = log; this.siloLifecycle = siloLifecycle; this.snapshot = new MembershipTableSnapshot( this.CreateLocalSiloEntry(this.CurrentStatus), MembershipVersion.MinValue, ImmutableDictionary <SiloAddress, MembershipEntry> .Empty); this.updates = new AsyncEnumerable <MembershipTableSnapshot>( (previous, proposed) => proposed.Version > previous.Version, this.snapshot) { OnPublished = update => Interlocked.Exchange(ref this.snapshot, update) }; this.membershipUpdateTimer = timerFactory.Create( this.clusterMembershipOptions.TableRefreshTimeout, nameof(PeriodicallyRefreshMembershipTable)); }
private async Task BasicScenario(bool enabled) { var options = new ClusterMembershipOptions { DefunctSiloCleanupPeriod = enabled ? new TimeSpan?(TimeSpan.FromMinutes(90)) : null, DefunctSiloExpiration = TimeSpan.FromDays(1) }; var timers = new List <DelegateAsyncTimer>(); var timerCalls = new ConcurrentQueue <(TimeSpan?DelayOverride, TaskCompletionSource <bool> Completion)>(); var timerFactory = new DelegateAsyncTimerFactory( (period, name) => { Assert.Equal(options.DefunctSiloCleanupPeriod.Value, period); var t = new DelegateAsyncTimer( overridePeriod => { var task = new TaskCompletionSource <bool>(); timerCalls.Enqueue((overridePeriod, task)); return(task.Task); }); timers.Add(t); return(t); }); var table = new InMemoryMembershipTable(); var cleanupAgent = new MembershipTableCleanupAgent( Options.Create(options), table, this.loggerFactory.CreateLogger <MembershipTableCleanupAgent>(), timerFactory); var lifecycle = new SiloLifecycleSubject(this.loggerFactory.CreateLogger <SiloLifecycleSubject>()); ((ILifecycleParticipant <ISiloLifecycle>)cleanupAgent).Participate(lifecycle); await lifecycle.OnStart(); Assert.DoesNotContain(table.Calls, c => c.Method.Equals(nameof(IMembershipTable.CleanupDefunctSiloEntries))); Assert.Equal(enabled, timerCalls.TryDequeue(out var timer)); timer.Completion?.TrySetResult(true); var stopped = lifecycle.OnStop(); while (timerCalls.TryDequeue(out timer)) { timer.Completion.TrySetResult(false); } if (enabled) { Assert.Contains(table.Calls, c => c.Method.Equals(nameof(IMembershipTable.CleanupDefunctSiloEntries))); } else { Assert.DoesNotContain(table.Calls, c => c.Method.Equals(nameof(IMembershipTable.CleanupDefunctSiloEntries))); } await stopped; }
public MembershipTableCleanupAgent( IOptions <ClusterMembershipOptions> clusterMembershipOptions, IMembershipTable membershipTableProvider, ILogger <MembershipTableCleanupAgent> log, IAsyncTimerFactory timerFactory) { this.clusterMembershipOptions = clusterMembershipOptions.Value; this.membershipTableProvider = membershipTableProvider; this.log = log; if (this.clusterMembershipOptions.DefunctSiloCleanupPeriod.HasValue) { this.cleanupDefunctSilosTimer = timerFactory.Create( this.clusterMembershipOptions.DefunctSiloCleanupPeriod.Value, nameof(CleanupDefunctSilos)); } }
public ClusterHealthMonitor( MembershipTableManager tableManager, ILogger <ClusterHealthMonitor> log, IOptions <ClusterMembershipOptions> clusterMembershipOptions, IFatalErrorHandler fatalErrorHandler, IServiceProvider serviceProvider, IAsyncTimerFactory timerFactory) { this.tableManager = tableManager; this.log = log; this.fatalErrorHandler = fatalErrorHandler; this.clusterMembershipOptions = clusterMembershipOptions.Value; this.monitorClusterHealthTimer = timerFactory.Create( this.clusterMembershipOptions.ProbeTimeout, nameof(MonitorClusterHealth)); this.createMonitor = silo => ActivatorUtilities.CreateInstance <SiloHealthMonitor>(serviceProvider, silo); }
/// <summary> /// Get the timeout value to use to wait for the silo liveness sub-system to detect and act on any recent cluster membership changes. /// <seealso cref="WaitForLivenessToStabilizeAsync"/> /// </summary> public static TimeSpan GetLivenessStabilizationTime(ClusterMembershipOptions clusterMembershipOptions, bool didKill = false) { TimeSpan stabilizationTime = TimeSpan.Zero; if (didKill) { // in case of hard kill (kill and not Stop), we should give silos time to detect failures first. stabilizationTime = TestingUtils.Multiply(clusterMembershipOptions.ProbeTimeout, clusterMembershipOptions.NumMissedProbesLimit); } if (clusterMembershipOptions.UseLivenessGossip) { stabilizationTime += TimeSpan.FromSeconds(5); } else { stabilizationTime += TestingUtils.Multiply(clusterMembershipOptions.TableRefreshTimeout, 2); } return(stabilizationTime); }
public MembershipAgent( MembershipTableManager tableManager, ClusterHealthMonitor clusterHealthMonitor, ILocalSiloDetails localSilo, IFatalErrorHandler fatalErrorHandler, IOptions <ClusterMembershipOptions> options, ILogger <MembershipAgent> log, IAsyncTimerFactory timerFactory) { this.tableManager = tableManager; this.clusterHealthMonitor = clusterHealthMonitor; this.localSilo = localSilo; this.fatalErrorHandler = fatalErrorHandler; this.clusterMembershipOptions = options.Value; this.log = log; this.iAmAliveTimer = timerFactory.Create( this.clusterMembershipOptions.IAmAliveTablePublishTimeout, nameof(UpdateIAmAlive)); }
public ClusterHealthMonitor( ILocalSiloDetails localSiloDetails, MembershipTableManager membershipService, ILogger <ClusterHealthMonitor> log, IOptions <ClusterMembershipOptions> clusterMembershipOptions, IFatalErrorHandler fatalErrorHandler, IServiceProvider serviceProvider) { this.localSiloDetails = localSiloDetails; this.serviceProvider = serviceProvider; this.membershipService = membershipService; this.log = log; this.fatalErrorHandler = fatalErrorHandler; this.clusterMembershipOptions = clusterMembershipOptions.Value; this.onProbeResult = this.OnProbeResultInternal; Func <SiloHealthMonitor, ProbeResult, Task> onProbeResultFunc = (siloHealthMonitor, probeResult) => this.onProbeResult(siloHealthMonitor, probeResult); this.createMonitor = silo => ActivatorUtilities.CreateInstance <SiloHealthMonitor>(serviceProvider, silo, onProbeResultFunc); }
public SiloHealthMonitor( SiloAddress siloAddress, Func <SiloHealthMonitor, ProbeResult, Task> onProbeResult, IOptions <ClusterMembershipOptions> clusterMembershipOptions, ILoggerFactory loggerFactory, IRemoteSiloProber remoteSiloProber, IAsyncTimerFactory asyncTimerFactory, ILocalSiloHealthMonitor localSiloHealthMonitor) { SiloAddress = siloAddress; _clusterMembershipOptions = clusterMembershipOptions.Value; _prober = remoteSiloProber; _localSiloHealthMonitor = localSiloHealthMonitor; _log = loggerFactory.CreateLogger <SiloHealthMonitor>(); _pingTimer = asyncTimerFactory.Create( _clusterMembershipOptions.ProbeTimeout, nameof(SiloHealthMonitor)); _onProbeResult = onProbeResult; _elapsedSinceLastSuccessfulResponse = ValueStopwatch.StartNew(); }
/// <summary> /// Get the timeout value to use to wait for the silo liveness sub-system to detect and act on any recent cluster membership changes. /// </summary> public static TimeSpan GetLivenessStabilizationTime(GlobalConfiguration config, bool didKill = false) { var clusterMembershipOptions = new ClusterMembershipOptions() { NumMissedTableIAmAliveLimit = config.NumMissedTableIAmAliveLimit, LivenessEnabled = config.LivenessEnabled, ProbeTimeout = config.ProbeTimeout, TableRefreshTimeout = config.TableRefreshTimeout, DeathVoteExpirationTimeout = config.DeathVoteExpirationTimeout, IAmAliveTablePublishTimeout = config.IAmAliveTablePublishTimeout, MaxJoinAttemptTime = config.MaxJoinAttemptTime, ValidateInitialConnectivity = config.ValidateInitialConnectivity, NumMissedProbesLimit = config.NumMissedProbesLimit, UseLivenessGossip = config.UseLivenessGossip, NumProbedSilos = config.NumProbedSilos, NumVotesForDeathDeclaration = config.NumVotesForDeathDeclaration, }; return(TestCluster.GetLivenessStabilizationTime(clusterMembershipOptions, didKill)); }
public MembershipTableManager( ILocalSiloDetails localSiloDetails, IOptions <ClusterMembershipOptions> clusterMembershipOptions, IMembershipTable membershipTable, IFatalErrorHandler fatalErrorHandler, IMembershipGossiper gossiper, ILogger <MembershipTableManager> log, IAsyncTimerFactory timerFactory) { this.localSiloDetails = localSiloDetails; this.membershipTableProvider = membershipTable; this.fatalErrorHandler = fatalErrorHandler; this.gossiper = gossiper; this.clusterMembershipOptions = clusterMembershipOptions.Value; this.myAddress = this.localSiloDetails.SiloAddress; this.log = log; var backOffMax = StandardExtensions.Max(EXP_BACKOFF_STEP.Multiply(this.clusterMembershipOptions.ExpectedClusterSize), SiloMessageSender.CONNECTION_RETRY_DELAY.Multiply(2)); this.EXP_BACKOFF_CONTENTION_MAX = backOffMax; this.EXP_BACKOFF_ERROR_MAX = backOffMax; this.snapshot = new MembershipTableSnapshot( this.CreateLocalSiloEntry(this.CurrentStatus), MembershipVersion.MinValue, ImmutableDictionary <SiloAddress, MembershipEntry> .Empty); this.updates = new AsyncEnumerable <MembershipTableSnapshot>( (previous, proposed) => proposed.Version > previous.Version, this.snapshot) { OnPublished = update => Interlocked.Exchange(ref this.snapshot, update) }; this.membershipUpdateTimer = timerFactory.Create( this.clusterMembershipOptions.TableRefreshTimeout, nameof(PeriodicallyRefreshMembershipTable)); }
public LocalSiloHealthMonitor( IEnumerable <IHealthCheckParticipant> healthCheckParticipants, MembershipTableManager membershipTableManager, ConnectionManager connectionManager, ClusterHealthMonitor clusterHealthMonitor, ILocalSiloDetails localSiloDetails, ILogger <LocalSiloHealthMonitor> log, IOptions <ClusterMembershipOptions> clusterMembershipOptions, IAsyncTimerFactory timerFactory, ILoggerFactory loggerFactory, ProbeRequestMonitor probeRequestMonitor) { _healthCheckParticipants = healthCheckParticipants.ToList(); _membershipTableManager = membershipTableManager; _clusterHealthMonitor = clusterHealthMonitor; _localSiloDetails = localSiloDetails; _log = log; _probeRequestMonitor = probeRequestMonitor; _clusterMembershipOptions = clusterMembershipOptions.Value; _degradationCheckTimer = timerFactory.Create( _clusterMembershipOptions.LocalHealthDegradationMonitoringPeriod, nameof(LocalSiloHealthMonitor)); _threadPoolMonitor = new ThreadPoolMonitor(loggerFactory.CreateLogger <ThreadPoolMonitor>()); }
public static DateTime?HasMissedIAmAlivesSince(this MembershipEntry entry, ClusterMembershipOptions options, DateTime time) { var lastIAmAlive = entry.IAmAliveTime; if (entry.IAmAliveTime.Equals(default))