Beispiel #1
0
 internal static void OnClusApiHang(object context)
 {
     LatencyChecker.LatencyContext latencyContext = (LatencyChecker.LatencyContext)context;
     LatencyChecker.ClusDbHungInfo clusDbHungInfo = LatencyChecker.GatherHungNodesInformation(latencyContext);
     LatencyChecker.LastKnownHungInfo = clusDbHungInfo;
     LatencyChecker.ClusDbHungAction action = LatencyChecker.AnalyzeAndSuggestActionForClusDbHang(clusDbHungInfo);
     LatencyChecker.ActOnClusDbHang(action);
 }
Beispiel #2
0
        internal static TimeSpan Measure(string apiName, string hintStr, TimeSpan maxAllowedLatency, Action action)
        {
            TimeSpan result;

            LatencyChecker.Measure(apiName, hintStr, maxAllowedLatency, TimeSpan.MaxValue, null, delegate()
            {
                action();
                return(0);
            }, out result);
            return(result);
        }
Beispiel #3
0
        internal static void ReportClusApiHangLongLatency(object context)
        {
            LatencyChecker.LatencyContext latencyContext = (LatencyChecker.LatencyContext)context;
            TimeSpan timeSpan = ExDateTime.Now - latencyContext.StartTime;

            ReplayCrimsonEvents.ClusApiOperationAppearsToBeHungAlert.Log <string, ExDateTime, TimeSpan, string, TimeSpan>(latencyContext.ApiName, latencyContext.StartTime, timeSpan, latencyContext.HintStr, latencyContext.MaxAllowedLatency);
            ReplayEventLogConstants.Tuple_ClusterApiHungAlert.LogEvent(null, new object[]
            {
                latencyContext.ApiName,
                timeSpan.ToString()
            });
            LatencyChecker.RaiseRedEvent();
        }
Beispiel #4
0
        internal static int MeasureClusApiAndKillIfExceeds(string apiName, string hintStr, Func <int> func)
        {
            TimerCallback latencyCallback = null;

            if (LatencyChecker.EnableClusterKill)
            {
                latencyCallback = new TimerCallback(LatencyChecker.OnClusApiHang);
            }
            TimeSpan currentLatency;
            int      result = LatencyChecker.Measure(apiName, hintStr, TimeSpan.FromSeconds((double)RegistryParameters.ClusApiLatencyAllowedInSec), TimeSpan.FromSeconds((double)RegistryParameters.ClusApiHangActionLatencyAllowedInSec), latencyCallback, func, out currentLatency);

            LatencyChecker.RaiseGreenEventIfNeeded(currentLatency);
            return(result);
        }
Beispiel #5
0
        private static void RaiseRedEvent()
        {
            string arg  = string.Empty;
            string arg2 = string.Empty;

            if (LatencyChecker.LastKnownHungInfo != null)
            {
                arg  = LatencyChecker.ConvertAmServerNamesToString(LatencyChecker.LastKnownHungInfo.HungNodes);
                arg2 = LatencyChecker.ConvertAmServerNamesToString(LatencyChecker.LastKnownHungInfo.RpcFailedNodes);
            }
            new EventNotificationItem("MSExchangeRepl", "Cluster", "ClusterHung", string.Format("ClusDb write timed out. HungNodeInfo={0}", (LatencyChecker.LastKnownHungInfo == null) ? "NULL" : LatencyChecker.LastKnownHungInfo.ToString()), string.Format("HungNodeApi={0}", arg), ResultSeverityLevel.Critical)
            {
                StateAttribute2 = string.Format("RpcHungNode={0}", arg2)
            }.Publish(false);
        }
Beispiel #6
0
        // Token: 0x06000335 RID: 821 RVA: 0x00012410 File Offset: 0x00010610
        public int CallBack(ClusApiHooks api, string hintStr, Func <int> func)
        {
            this.CountEntry(api);
            int result;

            if (api == ClusApiHooks.ClusterRegCloseBatch)
            {
                ActiveManagerServerPerfmon.ClusterBatchWriteCalls.Increment();
                result = LatencyChecker.MeasureClusApiAndKillIfExceeds(api.ToString(), hintStr, func);
            }
            else
            {
                result = LatencyChecker.MeasureClusApi(api.ToString(), hintStr, func);
            }
            return(result);
        }
Beispiel #7
0
            public override string ToString()
            {
                string format = "CurrentGumId: '{0}' LockOwner: '{1}' ApiName: '{2}' ApiHungStartTime: '{3}' HungNodes: '{4}' RpcFailedNodes: '{5}' ClusterStatus: '{6}' HungNodeApiEx: '{7}' RpcExs: '{8}'";

                object[] array = new object[9];
                array[0] = this.CurrentGumId;
                array[1] = ((this.CurrentLockOwnerName == null) ? "NULL" : this.CurrentLockOwnerName.NetbiosName);
                array[2] = (string.IsNullOrEmpty(this.ApiName) ? "NULL" : this.ApiName);
                array[3] = this.ApiHungStartTime.ToString("o");
                array[4] = LatencyChecker.ConvertAmServerNamesToString(this.HungNodes);
                array[5] = LatencyChecker.ConvertAmServerNamesToString(this.RpcFailedNodes);
                object[] array2 = array;
                int      num    = 6;
                string   text;

                if (this.ClusterNodesStatus != null && this.ClusterNodesStatus.Count >= 1)
                {
                    text = string.Join(",", this.ClusterNodesStatus.Select((KeyValuePair <string, AmNodeState> pair, int sel) => string.Format("{0}={1}", pair.Key, pair.Value)));
                }
                else
                {
                    text = "NULL";
                }
                array2[num] = text;
                array[7]    = ((this.HungNodeApiException == null) ? "NULL" : this.HungNodeApiException.Message);
                object[] array3 = array;
                int      num2   = 8;
                string   text2;

                if (this.RpcExceptions != null && this.RpcExceptions.Length >= 1)
                {
                    text2 = string.Join(",", from e in this.RpcExceptions
                                        select e.Message);
                }
                else
                {
                    text2 = "NULL";
                }
                array3[num2] = text2;
                return(string.Format(format, array));
            }
Beispiel #8
0
 public override string ToString()
 {
     return(string.Format("TakeAction: '{0}' TargetNodes: '{1}' Reason: '{2}'", this.TakeAction, LatencyChecker.ConvertAmServerNamesToString(this.TargetNodes), string.IsNullOrEmpty(this.Reason) ? "NULL" : this.Reason));
 }
Beispiel #9
0
        internal static int Measure(string apiName, string hintStr, TimeSpan maxAllowedLatency, TimeSpan maxAllowedLatencyForTimer, TimerCallback latencyCallback, Func <int> func, out TimeSpan elapsed)
        {
            ExDateTime now    = ExDateTime.Now;
            int        num    = 0;
            bool       flag   = true;
            Timer      timer  = null;
            Timer      timer2 = null;

            try
            {
                if (latencyCallback != null && maxAllowedLatencyForTimer.TotalSeconds > 0.0)
                {
                    LatencyChecker.LatencyContext latencyContext = new LatencyChecker.LatencyContext(now, apiName, hintStr, maxAllowedLatencyForTimer);
                    timer = new Timer(latencyCallback, latencyContext, -1, -1);
                    latencyContext.Timer = timer;
                    timer.Change(maxAllowedLatencyForTimer, TimeSpan.FromMilliseconds(-1.0));
                    timer2 = new Timer(new TimerCallback(LatencyChecker.ReportClusApiHangLongLatency), latencyContext, -1, -1);
                    TimeSpan dueTime = TimeSpan.FromSeconds((double)RegistryParameters.ClusApiHangReportLongLatencyDurationInSec);
                    timer2.Change(dueTime, TimeSpan.FromMilliseconds(-1.0));
                }
                if (RegistryParameters.IsApiLatencyTestEnabled)
                {
                    LatencyChecker.DelayApiIfRequired(apiName);
                    num = RegistryParameters.GetApiSimulatedErrorCode(apiName);
                    if (num == 0)
                    {
                        num = func();
                    }
                    else
                    {
                        NativeMethods.SetLastError(num);
                    }
                }
                else
                {
                    num = func();
                }
                flag = false;
            }
            finally
            {
                if (flag)
                {
                    num = -1;
                }
                if (timer != null)
                {
                    timer.Change(-1, -1);
                    timer.Dispose();
                }
                if (timer2 != null)
                {
                    timer2.Change(-1, -1);
                    timer2.Dispose();
                }
                elapsed = ExDateTime.Now - now;
                ExTraceGlobals.LatencyCheckerTracer.TraceDebug(0L, "Api={0}, StartTime={1}, Elapsed={2}, Hint={3}, IsUnhandled={4}, RetCode={5}, MaxLatency={6}", new object[]
                {
                    apiName,
                    now,
                    elapsed,
                    hintStr,
                    flag,
                    num,
                    maxAllowedLatency
                });
                if (elapsed > maxAllowedLatency || (num != 0 && RegistryParameters.GetIsLogApiLatencyFailure()))
                {
                    ReplayCrimsonEvents.OperationTookVeryLongTimeToComplete.Log <string, ExDateTime, TimeSpan, string, bool, int, TimeSpan>(apiName, now, elapsed, hintStr, flag, num, maxAllowedLatency);
                }
            }
            return(num);
        }
Beispiel #10
0
        internal static int MeasureClusApi(string apiName, string hintStr, Func <int> func)
        {
            TimeSpan timeSpan;

            return(LatencyChecker.Measure(apiName, hintStr, TimeSpan.FromSeconds((double)RegistryParameters.ClusApiLatencyAllowedInSec), TimeSpan.MaxValue, null, func, out timeSpan));
        }
Beispiel #11
0
 internal static LatencyChecker.ClusDbHungAction AnalyzeAndSuggestActionForClusDbHang(LatencyChecker.ClusDbHungInfo hungInfo)
 {
     LatencyChecker.ClusDbHungAction clusDbHungAction = new LatencyChecker.ClusDbHungAction();
     clusDbHungAction.HungInfo    = hungInfo;
     clusDbHungAction.TakeAction  = false;
     clusDbHungAction.TargetNodes = hungInfo.HungNodes;
     clusDbHungAction.Reason      = "If you see this message, something is wrong...";
     if (hungInfo.HungNodeApiException != null)
     {
         if (hungInfo.HungNodeApiException is HungDetectionGumIdChangedException)
         {
             clusDbHungAction.TakeAction = false;
             clusDbHungAction.Reason     = "GumId changed.";
         }
         else if (hungInfo.HungNodeApiException is OpenClusterTimedoutException)
         {
             clusDbHungAction.TakeAction = true;
             OpenClusterTimedoutException ex = (OpenClusterTimedoutException)hungInfo.HungNodeApiException;
             clusDbHungAction.Reason = string.Format("OpenCluster timed-out for {0}", ex.ServerName);
         }
         else if (hungInfo.HungNodeApiException is ClusterException)
         {
             clusDbHungAction.TakeAction = false;
             clusDbHungAction.Reason     = "ClusterException was caught.";
         }
     }
     else
     {
         clusDbHungAction.TakeAction = true;
         clusDbHungAction.Reason     = "Hung node detected without any Exceptions caught.";
     }
     if (clusDbHungAction.TargetNodes == null || clusDbHungAction.TargetNodes.Length < 1)
     {
         clusDbHungAction.TakeAction = false;
         clusDbHungAction.Reason     = "No hung node detected, and Rpc timeout did not catch anything.";
         if (hungInfo.RpcFailedNodes != null && hungInfo.RpcFailedNodes.Length > 0)
         {
             AmServerName amServerName = null;
             foreach (AmServerName amServerName2 in hungInfo.RpcFailedNodes)
             {
                 AmNodeState amNodeState = AmNodeState.Unknown;
                 if (hungInfo.ClusterNodesStatus.TryGetValue(amServerName2.NetbiosName, out amNodeState) && amNodeState != AmNodeState.Unknown && amNodeState != AmNodeState.Down)
                 {
                     amServerName = amServerName2;
                     break;
                 }
             }
             if (amServerName != null)
             {
                 clusDbHungAction.TakeAction  = true;
                 clusDbHungAction.TargetNodes = new AmServerName[]
                 {
                     amServerName
                 };
                 clusDbHungAction.Reason = string.Format("Hung nodes detected via Rpc timeout. Node '{0}' chosen for action. Original list={1}", amServerName.NetbiosName, LatencyChecker.ConvertAmServerNamesToString(hungInfo.RpcFailedNodes));
             }
             else
             {
                 clusDbHungAction.TakeAction  = false;
                 clusDbHungAction.TargetNodes = null;
                 clusDbHungAction.Reason      = string.Format("No nodes in Rpc non-responsive list are UP according to cluster. Skipping reboot. Original list={0}", LatencyChecker.ConvertAmServerNamesToString(hungInfo.RpcFailedNodes));
             }
         }
         if (!clusDbHungAction.TakeAction && !AmServerName.IsNullOrEmpty(hungInfo.CurrentLockOwnerName))
         {
             clusDbHungAction.TakeAction  = true;
             clusDbHungAction.TargetNodes = new AmServerName[]
             {
                 hungInfo.CurrentLockOwnerName
             };
             clusDbHungAction.Reason = string.Format("Could not find any hung nodes, so taking restart/reboot action for the lock owner '{0}'", hungInfo.CurrentLockOwnerName.NetbiosName);
         }
     }
     ReplayCrimsonEvents.HungNodeAnalysisResult.Log <string>(clusDbHungAction.ToString());
     return(clusDbHungAction);
 }
Beispiel #12
0
        internal static LatencyChecker.ClusDbHungInfo GatherHungNodesInformation(LatencyChecker.LatencyContext latencyContext)
        {
            LatencyChecker.ClusDbHungInfo clusDbHungInfo = new LatencyChecker.ClusDbHungInfo();
            TimeSpan timeSpan = ExDateTime.Now - latencyContext.StartTime;

            clusDbHungInfo.ApiName          = latencyContext.ApiName;
            clusDbHungInfo.ApiHungStartTime = latencyContext.StartTime;
            ReplayCrimsonEvents.ClusApiOperationAppearsToBeHung.Log <string, ExDateTime, TimeSpan, string, TimeSpan>(latencyContext.ApiName, latencyContext.StartTime, timeSpan, latencyContext.HintStr, latencyContext.MaxAllowedLatency);
            clusDbHungInfo.HungNodeApiException = null;
            try
            {
                ReplayCrimsonEvents.AttemptingToGetHungNodes.Log <string, ExDateTime, LatencyChecker.LatencyContext>(latencyContext.ApiName, latencyContext.StartTime, latencyContext);
                HungNodesInfo nodesHungInClusDbUpdate = HungNodesInfo.GetNodesHungInClusDbUpdate();
                if (nodesHungInClusDbUpdate != null)
                {
                    ReplayCrimsonEvents.HungNodeDetectionCompleted.Log <int, AmServerName, HungNodesInfo>(nodesHungInClusDbUpdate.CurrentGumId, nodesHungInClusDbUpdate.CurrentLockOwnerName, nodesHungInClusDbUpdate);
                    clusDbHungInfo.CurrentGumId         = nodesHungInClusDbUpdate.CurrentGumId;
                    clusDbHungInfo.CurrentLockOwnerName = nodesHungInClusDbUpdate.CurrentLockOwnerName;
                    clusDbHungInfo.HungNodes            = nodesHungInClusDbUpdate.NodeMap.Values.ToArray <AmServerName>();
                }
            }
            catch (HungDetectionGumIdChangedException ex)
            {
                clusDbHungInfo.HungNodeApiException = ex;
                ReplayCrimsonEvents.HungActionSkippedSinceGumIdChanged.Log <int, int, string, long>(ex.LocalGumId, ex.RemoteGumId, ex.LockOwnerName, ex.HungNodesMask);
            }
            catch (OpenClusterTimedoutException ex2)
            {
                clusDbHungInfo.HungNodeApiException = ex2;
                clusDbHungInfo.HungNodes            = new AmServerName[]
                {
                    new AmServerName(ex2.ServerName)
                };
                ReplayCrimsonEvents.OpenClusterCallHung.Log <string, string, string>(ex2.ServerName, ex2.Message, ex2.Context);
            }
            catch (ClusterException ex3)
            {
                clusDbHungInfo.HungNodeApiException = ex3;
                ReplayCrimsonEvents.HungNodeDetectionFailed.Log <string, string>(ex3.Message, ex3.ToString());
            }
            List <AmServerName> dagServers = LatencyChecker.DagServers;

            ReplayCrimsonEvents.HungNodeRpcScanStart.Log <string>(LatencyChecker.ConvertAmServerNamesToString(dagServers));
            AmMultiNodeCopyStatusFetcher amMultiNodeCopyStatusFetcher = new AmMultiNodeCopyStatusFetcher(dagServers, LatencyChecker.DatabaseMap, RpcGetDatabaseCopyStatusFlags2.None, null, false, 60000);

            amMultiNodeCopyStatusFetcher.GetStatus();
            List <AmServerName> list  = new List <AmServerName>();
            List <Exception>    list2 = new List <Exception>();

            clusDbHungInfo.ClusterNodesStatus = LatencyChecker.QueryClusterNodeStatus(TimeSpan.FromSeconds(30.0), true);
            foreach (AmServerName amServerName in LatencyChecker.DagServers)
            {
                Exception possibleExceptionForServer = amMultiNodeCopyStatusFetcher.GetPossibleExceptionForServer(amServerName);
                if (possibleExceptionForServer != null)
                {
                    if (possibleExceptionForServer is ReplayServiceDownException)
                    {
                        list.Add(amServerName);
                    }
                    list2.Add(possibleExceptionForServer);
                }
            }
            clusDbHungInfo.RpcFailedNodes = list.ToArray();
            clusDbHungInfo.RpcExceptions  = list2.ToArray();
            ReplayCrimsonEvents.HungNodeInformationLog.Log <string>(clusDbHungInfo.ToString());
            return(clusDbHungInfo);
        }