Esempio n. 1
0
 public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
 {
     // This silo's status has changed
     if (Equals(updatedSilo, MyAddress))
     {
         if (status == SiloStatus.Stopping || status.Equals(SiloStatus.ShuttingDown))
         {
             // QueueAction up the "Stop" to run on a system turn
             Scheduler.QueueAction(() => Stop(true), CacheValidator.SchedulingContext).Ignore();
         }
         else if (status == SiloStatus.Dead)
         {
             // QueueAction up the "Stop" to run on a system turn
             Scheduler.QueueAction(() => Stop(false), CacheValidator.SchedulingContext).Ignore();
         }
     }
     else // Status change for some other silo
     {
         if (status.IsTerminating())
         {
             // QueueAction up the "Remove" to run on a system turn
             Scheduler.QueueAction(() => RemoveServer(updatedSilo, status), CacheValidator.SchedulingContext).Ignore();
         }
         else if (status.Equals(SiloStatus.Active))      // do not do anything with SiloStatus.Starting -- wait until it actually becomes active
         {
             // QueueAction up the "Remove" to run on a system turn
             Scheduler.QueueAction(() => AddServer(updatedSilo), CacheValidator.SchedulingContext).Ignore();
         }
     }
 }
Esempio n. 2
0
        public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
        {
            if (!status.IsTerminating())
            {
                return;
            }

            SiloRuntimeStatistics ignore;

            periodicStats.TryRemove(updatedSilo, out ignore);
            NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null);
        }
        private void AssertStatus(SiloAddress address, SiloStatus expected)
        {
            var localStatus = this.oracle.GetApproximateSiloStatus(address);

            Assert.Equal(expected, localStatus);
            if (address.Equals(this.siloDetails.SiloAddress))
            {
                Assert.Equal(localStatus, this.oracle.CurrentStatus);
            }
            Assert.Equal(!address.Equals(this.siloDetails.SiloAddress) && expected == SiloStatus.Dead, this.oracle.IsDeadSilo(address));
            Assert.Equal(address.Equals(this.siloDetails.SiloAddress) || !expected.IsTerminating(), this.oracle.IsFunctionalDirectory(address));
        }
Esempio n. 4
0
 public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
 {
     // This silo's status has changed
     if (updatedSilo.Equals(MyAddress))
     {
         if (status.IsTerminating())
         {
             Stop();
         }
     }
     else // Status change for some other silo
     {
         if (status.IsTerminating())
         {
             RemoveServer(updatedSilo);
         }
         else if (status.Equals(SiloStatus.Active))      // do not do anything with SiloStatus.Created or SiloStatus.Joining -- wait until it actually becomes active
         {
             AddServer(updatedSilo);
         }
     }
 }
Esempio n. 5
0
        public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
        {
            if (status.IsTerminating() && updatedSilo.Equals(this.Silo))
            {
                refreshTimer?.Dispose();
            }
            else if (status != SiloStatus.Dead)
            {
                return;
            }

            scheduler.QueueTask(() => OnClientRefreshTimer(null), SchedulingContext).Ignore();
        }
        private void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status)
        {
            if (!status.IsTerminating())
            {
                return;
            }

            if (Equals(updatedSilo, this.Silo))
            {
                this.publishTimer.Dispose();
            }
            periodicStats.TryRemove(updatedSilo, out _);
            NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null);
        }
Esempio n. 7
0
 public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
 {
     // This silo's status has changed
     if (!Equals(updatedSilo, MyAddress)) // Status change for some other silo
     {
         if (status.IsTerminating())
         {
             // QueueAction up the "Remove" to run on a system turn
             CacheValidator.WorkItemGroup.QueueAction(() => RemoveServer(updatedSilo, status));
         }
         else if (status == SiloStatus.Active)      // do not do anything with SiloStatus.Starting -- wait until it actually becomes active
         {
             // QueueAction up the "Remove" to run on a system turn
             CacheValidator.WorkItemGroup.QueueAction(() => AddServer(updatedSilo));
         }
     }
 }
 public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
 {
     // This silo's status has changed
     if (updatedSilo.Equals(myAddress))
     {
         if (status.IsTerminating())
         {
             Stop();
         }
     }
     else // Status change for some other silo
     {
         if (status.IsTerminating())
         {
             RemoveServer(updatedSilo);
         }
         else if (status.Equals(SiloStatus.Active))      // do not do anything with SiloStatus.Created or SiloStatus.Joining -- wait until it actually becomes active
         {
             AddServer(updatedSilo);
         }
     }
 }
Esempio n. 9
0
        public async Task UpdateStatus(SiloStatus status)
        {
            string errorString = null;
            int    numCalls    = 0;

            try
            {
                Func <int, Task <bool> > updateMyStatusTask = async counter =>
                {
                    numCalls++;
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.Debug("-Going to try to TryUpdateMyStatusGlobalOnce #{0}", counter);
                    }
                    return(await TryUpdateMyStatusGlobalOnce(status));  // function to retry
                };

                if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable)
                {
                    // SystemTarget-based membership may not be accessible at this stage, so allow for one quick attempt to update
                    // the status before continuing regardless of the outcome.
                    var updateTask = updateMyStatusTask(0);
                    updateTask.Ignore();
                    var result = await Task.WhenAny(Task.Delay(TimeSpan.FromSeconds(5)), updateTask);

                    if (ReferenceEquals(result, updateTask))
                    {
                        await result;
                    }
                    else
                    {
                        this.log.LogWarning(
                            "Failed to update status to dead in the alotted time during shutdown");
                    }

                    this.CurrentStatus = status;
                    return;
                }

                bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime);

                if (ok)
                {
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.Debug("-Silo {0} Successfully updated my Status in the Membership table to {1}", myAddress, status);
                    }

                    var gossipTask  = this.GossipToOthers(this.myAddress, status);
                    var timeoutTask = Task.Delay(GossipTimeout);
                    var task        = await Task.WhenAny(gossipTask, timeoutTask);

                    if (ReferenceEquals(task, timeoutTask))
                    {
                        if (status.IsTerminating())
                        {
                            this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout);
                        }
                        else if (this.log.IsEnabled(LogLevel.Debug))
                        {
                            this.log.LogDebug("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout);
                        }
                    }
                }
                else
                {
                    errorString = $"-Silo {myAddress} failed to update its status to {status} in the Membership table due to write contention on the table after {numCalls} attempts.";
                    log.Error(ErrorCode.MembershipFailedToWriteConditional, errorString);
                    throw new OrleansException(errorString);
                }
            }
            catch (Exception exc)
            {
                if (errorString == null)
                {
                    errorString = $"-Silo {this.myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts: {exc.Message}";
                    log.Error(ErrorCode.MembershipFailedToWrite, errorString);
                    throw new OrleansException(errorString, exc);
                }

                throw;
            }
        }
Esempio n. 10
0
        // TODO move this logic in the LocalGrainDirectory
        private void OnSiloStatusChange(SiloAddress updatedSilo, SiloStatus status)
        {
            // ignore joining events and also events on myself.
            if (updatedSilo.Equals(LocalSilo))
            {
                return;
            }

            // We deactivate those activations when silo goes either of ShuttingDown/Stopping/Dead states,
            // since this is what Directory is doing as well. Directory removes a silo based on all those 3 statuses,
            // thus it will only deliver a "remove" notification for a given silo once to us. Therefore, we need to react the fist time we are notified.
            // We may review the directory behavior in the future and treat ShuttingDown differently ("drain only") and then this code will have to change a well.
            if (!status.IsTerminating())
            {
                return;
            }
            if (status == SiloStatus.Dead)
            {
                this.RuntimeClient.BreakOutstandingMessagesToDeadSilo(updatedSilo);
            }

            var activationsToShutdown = new List <IGrainContext>();

            try
            {
                // scan all activations in activation directory and deactivate the ones that the removed silo is their primary partition owner.
                lock (activations)
                {
                    foreach (var activation in activations)
                    {
                        try
                        {
                            var activationData = activation.Value;
                            if (!activationData.PlacementStrategy.IsUsingGrainDirectory || grainDirectoryResolver.HasNonDefaultDirectory(activationData.GrainId.Type))
                            {
                                continue;
                            }
                            if (!updatedSilo.Equals(directory.GetPrimaryForGrain(activationData.GrainId)))
                            {
                                continue;
                            }

                            activationsToShutdown.Add(activationData);
                        }
                        catch (Exception exc)
                        {
                            logger.LogError(
                                (int)ErrorCode.Catalog_SiloStatusChangeNotification_Exception,
                                exc,
                                "Catalog has thrown an exception while handling removal of silo {Silo}", updatedSilo.ToStringWithHashCode());
                        }
                    }
                }

                logger.LogInformation(
                    (int)ErrorCode.Catalog_SiloStatusChangeNotification,
                    "Catalog is deactivating {Count} activations due to a failure of silo {Silo}, since it is a primary directory partition to these grain ids.",
                    activationsToShutdown.Count,
                    updatedSilo.ToStringWithHashCode());
            }
            finally
            {
                // outside the lock.
                if (activationsToShutdown.Count > 0)
                {
                    DeactivateActivations(activationsToShutdown).Ignore();
                }
            }
        }
Esempio n. 11
0
        public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
        {
            // ignore joining events and also events on myself.
            if (updatedSilo.Equals(LocalSilo)) return;

            // We deactivate those activations when silo goes either of ShuttingDown/Stopping/Dead states,
            // since this is what Directory is doing as well. Directory removes a silo based on all those 3 statuses,
            // thus it will only deliver a "remove" notification for a given silo once to us. Therefore, we need to react the fist time we are notified.
            // We may review the directory behaiviour in the future and treat ShuttingDown differently ("drain only") and then this code will have to change a well.
            if (!status.IsTerminating()) return;

            var activationsToShutdown = new List<ActivationData>();
            try
            {
                // scan all activations in activation directory and deactivate the ones that the removed silo is their primary partition owner.
                lock (activations)
                {
                    foreach (var activation in activations)
                    {
                        try
                        {
                            var activationData = activation.Value;
                            if (!directory.GetPrimaryForGrain(activationData.Grain).Equals(updatedSilo)) continue;

                            lock (activationData)
                            {
                                // adapted from InsideGarinClient.DeactivateOnIdle().
                                activationData.ResetKeepAliveRequest();
                                activationsToShutdown.Add(activationData);
                            }
                        }
                        catch (Exception exc)
                        {
                            logger.Error(ErrorCode.Catalog_SiloStatusChangeNotification_Exception,
                                String.Format("Catalog has thrown an exception while executing SiloStatusChangeNotification of silo {0}.", updatedSilo.ToStringWithHashCode()), exc);
                        }
                    }
                }
                logger.Info(ErrorCode.Catalog_SiloStatusChangeNotification,
                    String.Format("Catalog is deactivating {0} activations due to a failure of silo {1}, since it is a primary directory partiton to these grain ids.",
                        activationsToShutdown.Count, updatedSilo.ToStringWithHashCode()));
            }
            finally
            {
                // outside the lock.
                if (activationsToShutdown.Count > 0)
                {
                    DeactivateActivations(activationsToShutdown).Ignore();
                }
            }
        }
Esempio n. 12
0
 /// <summary>
 /// Return true if silo is terminating or missing.
 /// </summary>
 public static bool IsUnavailable(this SiloStatus siloStatus)
 {
     return(siloStatus.IsTerminating() || siloStatus == SiloStatus.None);
 }
Esempio n. 13
0
 public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
 {
     // This silo's status has changed
     if (Equals(updatedSilo, MyAddress))
     {
         if (status == SiloStatus.Stopping || status.Equals(SiloStatus.ShuttingDown))
         {
             // QueueAction up the "Stop" to run on a system turn
             Scheduler.QueueAction(() => Stop(true), CacheValidator.SchedulingContext).Ignore();
         }
         else if (status == SiloStatus.Dead)
         {
             // QueueAction up the "Stop" to run on a system turn
             Scheduler.QueueAction(() => Stop(false), CacheValidator.SchedulingContext).Ignore();
         }
     }
     else // Status change for some other silo
     {
         if (status.IsTerminating())
         {
             // QueueAction up the "Remove" to run on a system turn
             Scheduler.QueueAction(() => RemoveServer(updatedSilo, status), CacheValidator.SchedulingContext).Ignore();
         }
         else if (status.Equals(SiloStatus.Active))      // do not do anything with SiloStatus.Starting -- wait until it actually becomes active
         {
             // QueueAction up the "Remove" to run on a system turn
             Scheduler.QueueAction(() => AddServer(updatedSilo), CacheValidator.SchedulingContext).Ignore();
         }
     }
 }
Esempio n. 14
0
        public void SiloStatusChangeNotification(SiloAddress updatedSilo, SiloStatus status)
        {
            if (!status.IsTerminating()) return;

            SiloRuntimeStatistics ignore;
            periodicStats.TryRemove(updatedSilo, out ignore);
            NotifyAllStatisticsChangeEventsSubscribers(updatedSilo, null);
        }
Esempio n. 15
0
        public async Task UpdateStatus(SiloStatus status)
        {
            if (status == SiloStatus.Joining)
            {
                // first, cleanup all outdated entries of myself from the table
                Func <int, Task <bool> > cleanupTableEntriesTask = async counter =>
                {
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.Debug("-Attempting CleanupTableEntries #{0}", counter);
                    }
                    var table = await this.membershipTableProvider.ReadAll();

                    log.Info(ErrorCode.MembershipReadAll_Cleanup, "-CleanupTable called on silo startup. Membership table {0}",
                             table.ToString());

                    return(await CleanupMyTableEntries(table));
                };

                await MembershipExecuteWithRetries(cleanupTableEntriesTask, this.clusterMembershipOptions.MaxJoinAttemptTime);
            }

            if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable)
            {
                this.CurrentStatus = status;


                // SystemTarget-based clustering does not support transitioning to Dead locally since at this point app scheduler turns have been stopped.
                return;
            }

            string errorString = null;
            int    numCalls    = 0;

            try
            {
                Func <int, Task <bool> > updateMyStatusTask = async counter =>
                {
                    numCalls++;
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.Debug("-Going to try to TryUpdateMyStatusGlobalOnce #{0}", counter);
                    }
                    return(await TryUpdateMyStatusGlobalOnce(status));  // function to retry
                };

                bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime);

                if (ok)
                {
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.Debug("-Silo {0} Successfully updated my Status in the Membership table to {1}", myAddress, status);
                    }
                    var gossipTask = this.GossipToOthers(this.myAddress, status);
                    if (status.IsTerminating())
                    {
                        var timeoutTask = Task.Delay(ShutdownGossipTimeout);
                        var task        = await Task.WhenAny(gossipTask, timeoutTask);

                        if (ReferenceEquals(task, timeoutTask))
                        {
                            this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", ShutdownGossipTimeout);
                        }
                    }
                }
                else
                {
                    errorString = $"-Silo {myAddress} failed to update its status to {status} in the Membership table due to write contention on the table after {numCalls} attempts.";
                    log.Error(ErrorCode.MembershipFailedToWriteConditional, errorString);
                    throw new OrleansException(errorString);
                }
            }
            catch (Exception exc)
            {
                if (errorString == null)
                {
                    errorString = $"-Silo {this.myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts: {exc.Message}";
                    log.Error(ErrorCode.MembershipFailedToWrite, errorString);
                    throw new OrleansException(errorString, exc);
                }

                throw;
            }
        }
Esempio n. 16
0
        public async Task UpdateStatus(SiloStatus status)
        {
            bool wasThrownLocally = false;
            int  numCalls         = 0;

            try
            {
                Func <int, Task <bool> > updateMyStatusTask = async counter =>
                {
                    numCalls++;
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.LogDebug("Going to try to TryUpdateMyStatusGlobalOnce #{Attempt}", counter);
                    }
                    return(await TryUpdateMyStatusGlobalOnce(status));  // function to retry
                };

                if (status == SiloStatus.Dead && this.membershipTableProvider is SystemTargetBasedMembershipTable)
                {
                    // SystemTarget-based membership may not be accessible at this stage, so allow for one quick attempt to update
                    // the status before continuing regardless of the outcome.
                    var updateTask = updateMyStatusTask(0);
                    updateTask.Ignore();
                    await Task.WhenAny(Task.Delay(TimeSpan.FromMilliseconds(500)), updateTask);

                    var gossipTask = this.GossipToOthers(this.myAddress, status);
                    gossipTask.Ignore();
                    await Task.WhenAny(Task.Delay(TimeSpan.FromMilliseconds(500)), gossipTask);

                    this.CurrentStatus = status;
                    return;
                }

                bool ok = await MembershipExecuteWithRetries(updateMyStatusTask, this.clusterMembershipOptions.MaxJoinAttemptTime);

                if (ok)
                {
                    if (log.IsEnabled(LogLevel.Debug))
                    {
                        log.LogDebug("Silo {SiloAddress} Successfully updated my Status in the membership table to {Status}", myAddress, status);
                    }

                    var gossipTask = this.GossipToOthers(this.myAddress, status);
                    gossipTask.Ignore();
                    var cancellation = new CancellationTokenSource();
                    var timeoutTask  = Task.Delay(GossipTimeout, cancellation.Token);
                    var task         = await Task.WhenAny(gossipTask, timeoutTask);

                    if (ReferenceEquals(task, timeoutTask))
                    {
                        if (status.IsTerminating())
                        {
                            this.log.LogWarning("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout);
                        }
                        else if (this.log.IsEnabled(LogLevel.Debug))
                        {
                            this.log.LogDebug("Timed out while gossiping status to other silos after {Timeout}", GossipTimeout);
                        }
                    }
                    else
                    {
                        cancellation.Cancel();
                    }
                }
                else
                {
                    wasThrownLocally = true;
                    log.LogError(
                        (int)ErrorCode.MembershipFailedToWriteConditional,
                        "Silo {MyAddress} failed to update its status to {Status} in the membership table due to write contention on the table after {NumCalls} attempts.",
                        myAddress,
                        status,
                        numCalls);
                    throw new OrleansException($"Silo {myAddress} failed to update its status to {status} in the membership table due to write contention on the table after {numCalls} attempts.");
                }
            }
            catch (Exception exc) when(!wasThrownLocally)
            {
                log.LogError(
                    (int)ErrorCode.MembershipFailedToWrite,
                    exc,
                    "Silo {MyAddress} failed to update its status to {Status} in the table due to failures (socket failures or table read/write failures) after {NumCalls} attempts",
                    myAddress,
                    status,
                    numCalls);
                throw new OrleansException($"Silo {myAddress} failed to update its status to {status} in the table due to failures (socket failures or table read/write failures) after {numCalls} attempts", exc);
            }
        }