public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { // This silo's status has changed if (Equals(updatedSilo, MyAddress)) { if (status == SiloStatus.Stopping || status.Equals(SiloStatus.ShuttingDown)) { // QueueAction up the "Stop" to run on a system turn Scheduler.QueueAction(() => Stop(true), CacheValidator.SchedulingContext).Ignore(); } else if (status == SiloStatus.Dead) { // QueueAction up the "Stop" to run on a system turn Scheduler.QueueAction(() => Stop(false), CacheValidator.SchedulingContext).Ignore(); } } else // Status change for some other silo { if (status.IsTerminating()) { // QueueAction up the "Remove" to run on a system turn Scheduler.QueueAction(() => RemoveServer(updatedSilo, status), CacheValidator.SchedulingContext).Ignore(); } else if (status.Equals(SiloStatus.Active)) // do not do anything with SiloStatus.Starting -- wait until it actually becomes active { // QueueAction up the "Remove" to run on a system turn Scheduler.QueueAction(() => AddServer(updatedSilo), CacheValidator.SchedulingContext).Ignore(); } } }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { if (!status.IsTerminating()) { return; } SiloRuntimeStatistics ignore; periodicStats.TryRemove(updatedSilo, out ignore); NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null); }
private void AssertStatus(SiloAddress address, SiloStatus expected) { var localStatus = this.oracle.GetApproximateSiloStatus(address); Assert.Equal(expected, localStatus); if (address.Equals(this.siloDetails.SiloAddress)) { Assert.Equal(localStatus, this.oracle.CurrentStatus); } Assert.Equal(!address.Equals(this.siloDetails.SiloAddress) && expected == SiloStatus.Dead, this.oracle.IsDeadSilo(address)); Assert.Equal(address.Equals(this.siloDetails.SiloAddress) || !expected.IsTerminating(), this.oracle.IsFunctionalDirectory(address)); }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { // This silo's status has changed if (updatedSilo.Equals(MyAddress)) { if (status.IsTerminating()) { Stop(); } } else // Status change for some other silo { if (status.IsTerminating()) { RemoveServer(updatedSilo); } else if (status.Equals(SiloStatus.Active)) // do not do anything with SiloStatus.Created or SiloStatus.Joining -- wait until it actually becomes active { AddServer(updatedSilo); } } }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { if (status.IsTerminating() && updatedSilo.Equals(this.Silo)) { refreshTimer?.Dispose(); } else if (status != SiloStatus.Dead) { return; } scheduler.QueueTask(() => OnClientRefreshTimer(null), SchedulingContext).Ignore(); }
private void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status) { if (!status.IsTerminating()) { return; } if (Equals(updatedSilo, this.Silo)) { this.publishTimer.Dispose(); } periodicStats.TryRemove(updatedSilo, out _); NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null); }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { // This silo's status has changed if (!Equals(updatedSilo, MyAddress)) // Status change for some other silo { if (status.IsTerminating()) { // QueueAction up the "Remove" to run on a system turn CacheValidator.WorkItemGroup.QueueAction(() => RemoveServer(updatedSilo, status)); } else if (status == SiloStatus.Active) // do not do anything with SiloStatus.Starting -- wait until it actually becomes active { // QueueAction up the "Remove" to run on a system turn CacheValidator.WorkItemGroup.QueueAction(() => AddServer(updatedSilo)); } } }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { // This silo's status has changed if (updatedSilo.Equals(myAddress)) { if (status.IsTerminating()) { Stop(); } } else // Status change for some other silo { if (status.IsTerminating()) { RemoveServer(updatedSilo); } else if (status.Equals(SiloStatus.Active)) // do not do anything with SiloStatus.Created or SiloStatus.Joining -- wait until it actually becomes active { AddServer(updatedSilo); } } }
public async Task UpdateStatus(SiloStatus status) { string errorString = null; int numCalls = 0; try { Func <int, Task <bool> > updateMyStatusTask = async counter => { numCalls++; if (log.IsEnabled(LogLevel.Debug)) { log.Debug("-Going to try to TryUpdateMyStatusGlobalOnce #{0}", counter); } return(await TryUpdateMyStatusGlobalOnce(status)); // function to retry }; if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable) { // SystemTarget-based membership may not be accessible at this stage, so allow for one quick attempt to update // the status before continuing regardless of the outcome. var updateTask = updateMyStatusTask(0); updateTask.Ignore(); var result = await Task.WhenAny(Task.Delay(TimeSpan.FromSeconds(5)), updateTask); if (ReferenceEquals(result, updateTask)) { await result; } else { this.log.LogWarning( "Failed to update status to dead in the alotted time during shutdown"); } this.CurrentStatus = status; return; } bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime); if (ok) { if (log.IsEnabled(LogLevel.Debug)) { log.Debug("-Silo {0} Successfully updated my Status in the Membership table to {1}", myAddress, status); } var gossipTask = this.GossipToOthers(this.myAddress, status); var timeoutTask = Task.Delay(GossipTimeout); var task = await Task.WhenAny(gossipTask, timeoutTask); if (ReferenceEquals(task, timeoutTask)) { if (status.IsTerminating()) { this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout); } else if (this.log.IsEnabled(LogLevel.Debug)) { this.log.LogDebug("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout); } } } else { errorString = $"-Silo {myAddress} failed to update its status to {status} in the Membership table due to write contention on the table after {numCalls} attempts."; log.Error(ErrorCode.MembershipFailedToWriteConditional, errorString); throw new OrleansException(errorString); } } catch (Exception exc) { if (errorString == null) { errorString = $"-Silo {this.myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts: {exc.Message}"; log.Error(ErrorCode.MembershipFailedToWrite, errorString); throw new OrleansException(errorString, exc); } throw; } }
// TODO move this logic in the LocalGrainDirectory private void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status) { // ignore joining events and also events on myself. if (updatedSilo.Equals(LocalSilo)) { return; } // We deactivate those activations when silo goes either of ShuttingDown/Stopping/Dead states, // since this is what Directory is doing as well. Directory removes a silo based on all those 3 statuses, // thus it will only deliver a "remove" notification for a given silo once to us. Therefore, we need to react the fist time we are notified. // We may review the directory behavior in the future and treat ShuttingDown differently ("drain only") and then this code will have to change a well. if (!status.IsTerminating()) { return; } if (status == SiloStatus.Dead) { this.RuntimeClient.BreakOutstandingMessagesToDeadSilo(updatedSilo); } var activationsToShutdown = new List <IGrainContext>(); try { // scan all activations in activation directory and deactivate the ones that the removed silo is their primary partition owner. lock (activations) { foreach (var activation in activations) { try { var activationData = activation.Value; if (!activationData.PlacementStrategy.IsUsingGrainDirectory || grainDirectoryResolver.HasNonDefaultDirectory(activationData.GrainId.Type)) { continue; } if (!updatedSilo.Equals(directory.GetPrimaryForGrain(activationData.GrainId))) { continue; } activationsToShutdown.Add(activationData); } catch (Exception exc) { logger.LogError( (int)ErrorCode.Catalog_SiloStatusChangeNotification_Exception, exc, "Catalog has thrown an exception while handling removal of silo {Silo}", updatedSilo.ToStringWithHashCode()); } } } logger.LogInformation( (int)ErrorCode.Catalog_SiloStatusChangeNotification, "Catalog is deactivating {Count} activations due to a failure of silo {Silo}, since it is a primary directory partition to these grain ids.", activationsToShutdown.Count, updatedSilo.ToStringWithHashCode()); } finally { // outside the lock. if (activationsToShutdown.Count > 0) { DeactivateActivations(activationsToShutdown).Ignore(); } } }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { // ignore joining events and also events on myself. if (updatedSilo.Equals(LocalSilo)) return; // We deactivate those activations when silo goes either of ShuttingDown/Stopping/Dead states, // since this is what Directory is doing as well. Directory removes a silo based on all those 3 statuses, // thus it will only deliver a "remove" notification for a given silo once to us. Therefore, we need to react the fist time we are notified. // We may review the directory behaiviour in the future and treat ShuttingDown differently ("drain only") and then this code will have to change a well. if (!status.IsTerminating()) return; var activationsToShutdown = new List<ActivationData>(); try { // scan all activations in activation directory and deactivate the ones that the removed silo is their primary partition owner. lock (activations) { foreach (var activation in activations) { try { var activationData = activation.Value; if (!directory.GetPrimaryForGrain(activationData.Grain).Equals(updatedSilo)) continue; lock (activationData) { // adapted from InsideGarinClient.DeactivateOnIdle(). activationData.ResetKeepAliveRequest(); activationsToShutdown.Add(activationData); } } catch (Exception exc) { logger.Error(ErrorCode.Catalog_SiloStatusChangeNotification_Exception, String.Format("Catalog has thrown an exception while executing SiloStatusChangeNotification of silo {0}.", updatedSilo.ToStringWithHashCode()), exc); } } } logger.Info(ErrorCode.Catalog_SiloStatusChangeNotification, String.Format("Catalog is deactivating {0} activations due to a failure of silo {1}, since it is a primary directory partiton to these grain ids.", activationsToShutdown.Count, updatedSilo.ToStringWithHashCode())); } finally { // outside the lock. if (activationsToShutdown.Count > 0) { DeactivateActivations(activationsToShutdown).Ignore(); } } }
/// <summary> /// Return true if silo is terminating or missing. /// </summary> public static bool IsUnavailable(this SiloStatus siloStatus) { return(siloStatus.IsTerminating() || siloStatus == SiloStatus.None); }
public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status) { if (!status.IsTerminating()) return; SiloRuntimeStatistics ignore; periodicStats.TryRemove(updatedSilo, out ignore); NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null); }
public async Task UpdateStatus(SiloStatus status) { if (status == SiloStatus.Joining) { // first, cleanup all outdated entries of myself from the table Func <int, Task <bool> > cleanupTableEntriesTask = async counter => { if (log.IsEnabled(LogLevel.Debug)) { log.Debug("-Attempting CleanupTableEntries #{0}", counter); } var table = await this.membershipTableProvider.ReadAll(); log.Info(ErrorCode.MembershipReadAll_Cleanup, "-CleanupTable called on silo startup. Membership table {0}", table.ToString()); return(await CleanupMyTableEntries(table)); }; await MembershipExecuteWithRetries(cleanupTableEntriesTask, this.clusterMembershipOptions.MaxJoinAttemptTime); } if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable) { this.CurrentStatus = status; // SystemTarget-based clustering does not support transitioning to Dead locally since at this point app scheduler turns have been stopped. return; } string errorString = null; int numCalls = 0; try { Func <int, Task <bool> > updateMyStatusTask = async counter => { numCalls++; if (log.IsEnabled(LogLevel.Debug)) { log.Debug("-Going to try to TryUpdateMyStatusGlobalOnce #{0}", counter); } return(await TryUpdateMyStatusGlobalOnce(status)); // function to retry }; bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime); if (ok) { if (log.IsEnabled(LogLevel.Debug)) { log.Debug("-Silo {0} Successfully updated my Status in the Membership table to {1}", myAddress, status); } var gossipTask = this.GossipToOthers(this.myAddress, status); if (status.IsTerminating()) { var timeoutTask = Task.Delay(ShutdownGossipTimeout); var task = await Task.WhenAny(gossipTask, timeoutTask); if (ReferenceEquals(task, timeoutTask)) { this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", ShutdownGossipTimeout); } } } else { errorString = $"-Silo {myAddress} failed to update its status to {status} in the Membership table due to write contention on the table after {numCalls} attempts."; log.Error(ErrorCode.MembershipFailedToWriteConditional, errorString); throw new OrleansException(errorString); } } catch (Exception exc) { if (errorString == null) { errorString = $"-Silo {this.myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts: {exc.Message}"; log.Error(ErrorCode.MembershipFailedToWrite, errorString); throw new OrleansException(errorString, exc); } throw; } }
public async Task UpdateStatus(SiloStatus status) { bool wasThrownLocally = false; int numCalls = 0; try { Func <int, Task <bool> > updateMyStatusTask = async counter => { numCalls++; if (log.IsEnabled(LogLevel.Debug)) { log.LogDebug("Going to try to TryUpdateMyStatusGlobalOnce #{Attempt}", counter); } return(await TryUpdateMyStatusGlobalOnce(status)); // function to retry }; if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable) { // SystemTarget-based membership may not be accessible at this stage, so allow for one quick attempt to update // the status before continuing regardless of the outcome. var updateTask = updateMyStatusTask(0); updateTask.Ignore(); await Task.WhenAny(Task.Delay(TimeSpan.FromMilliseconds(500)), updateTask); var gossipTask = this.GossipToOthers(this.myAddress, status); gossipTask.Ignore(); await Task.WhenAny(Task.Delay(TimeSpan.FromMilliseconds(500)), gossipTask); this.CurrentStatus = status; return; } bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime); if (ok) { if (log.IsEnabled(LogLevel.Debug)) { log.LogDebug("Silo {SiloAddress} Successfully updated my Status in the membership table to {Status}", myAddress, status); } var gossipTask = this.GossipToOthers(this.myAddress, status); gossipTask.Ignore(); var cancellation = new CancellationTokenSource(); var timeoutTask = Task.Delay(GossipTimeout, cancellation.Token); var task = await Task.WhenAny(gossipTask, timeoutTask); if (ReferenceEquals(task, timeoutTask)) { if (status.IsTerminating()) { this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout); } else if (this.log.IsEnabled(LogLevel.Debug)) { this.log.LogDebug("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout); } } else { cancellation.Cancel(); } } else { wasThrownLocally = true; log.LogError( (int)ErrorCode.MembershipFailedToWriteConditional, "Silo {MyAddress} failed to update its status to {Status} in the membership table due to write contention on the table after {NumCalls} attempts.", myAddress, status, numCalls); throw new OrleansException($"Silo {myAddress} failed to update its status to {status} in the membership table due to write contention on the table after {numCalls} attempts."); } } catch (Exception exc) when(!wasThrownLocally) { log.LogError( (int)ErrorCode.MembershipFailedToWrite, exc, "Silo {MyAddress} failed to update its status to {Status} in the table due to failures (socket failures or table read/write failures) after {NumCalls} attempts", myAddress, status, numCalls); throw new OrleansException($"Silo {myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts", exc); } }