Пример #1
0
 /// <summary>
 /// Prevents a default instance of the BrokerPerformanceCounter class from being created
 /// </summary>
 private BrokerPerformanceCounter()
 {
     this.counters = new PerformanceCounter[5];
     this.counters[(int)BrokerPerformanceCounterKey.RequestMessages]  = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.RequestMessages);
     this.counters[(int)BrokerPerformanceCounterKey.ResponseMessages] = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.ResponseMessages);
     this.counters[(int)BrokerPerformanceCounterKey.Calculations]     = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.Calculations);
     this.counters[(int)BrokerPerformanceCounterKey.Faults]           = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.Faults);
 }
Пример #2
0
        /// <summary>
        /// Start broker init operations
        /// </summary>
        private async Task RecoverThreadProc(CancellationToken ct)
        {
            int retry = 0;

            BrokerRecoverInfo[] recoverInfoList;
            this.schedulerHelper = null;

            // TODO: Read Azure Storage Queue instead
            if (!SoaHelper.IsOnAzure())
            {
                while (!ct.IsCancellationRequested)
                {
                    TraceHelper.TraceEvent(TraceEventType.Information, "[BrokerManager] Try to create the perf counters, Retry count = {0}", retry);
                    try
                    {
                        this.requestQueueLengthCounter  = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableRequestsQueueLength);
                        this.responseQueueLengthCounter = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableResponsesQueueLength);
                        break;
                    }
                    catch (Exception e)
                    {
                        // Bug 8507 : Fix leak
                        if (this.requestQueueLengthCounter != null)
                        {
                            this.requestQueueLengthCounter.Close();
                            this.requestQueueLengthCounter = null;
                        }

                        TraceHelper.TraceEvent(TraceEventType.Error, "[BrokerManager] Failed to create the perf counters: {0}", e);
                        retry++;
                        await Task.Delay(RetryPeriod, ct);
                    }
                }
            }

            while (true)
            {
                TraceHelper.TraceEvent(
                    System.Diagnostics.TraceEventType.Information,
                    "[BrokerManager] Try to connect to the headnode, Retry count = {0}.",
                    retry);
                try
                {
                    lock (this.brokerDic)
                    {
                        this.brokerDic.Clear();
                    }

                    // Bug 8507 : Fix leak
                    if (this.schedulerHelper == null)
                    {
                        this.schedulerHelper = SchedulerHelperFactory.GetSchedulerHelper(this.context);
                    }

                    recoverInfoList = await this.schedulerHelper.LoadBrokerRecoverInfo();

                    break;
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(
                        TraceEventType.Error,
                        "[BrokerManager] Exception throwed while connecting to head node {0}: {1}", this.headnode, e);

                    retry++;
                    await Task.Delay(RetryPeriod, ct);
                }
            }

            this.staleSessionCleanupTimer.Change(0, BrokerManager.StaleSessionCleanupPeriod);

            if (this.updateQueueLengthTimer != null)
            {
                // TODO: on azure, about the MSMQ. Don't use the MSMQ in the Azure cluster.
                this.updateQueueLengthTimer.Change(0, BrokerManager.UpdateQueueLengthPeriod);
            }

            List <BrokerRecoverInfo> failedList    = new List <BrokerRecoverInfo>();
            List <Exception>         exceptionList = new List <Exception>();

            for (int i = 0; i < RecoverBrokerRetryLimit; i++)
            {
                List <BrokerRecoverInfo> retryList = new List <BrokerRecoverInfo>();
                foreach (BrokerRecoverInfo recoverInfo in recoverInfoList)
                {
                    try
                    {
                        // Only running broker will be recovered here
                        // Should start the broker immediately
                        ClusterInfoContract clusterInfo = await this.schedulerHelper.GetClusterInfoAsync();

                        await this.CreateBrokerAndRun(recoverInfo, true, clusterInfo);

                        TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Information, "[BrokerManager] Succeeded start broker {0} during initialization", recoverInfo.SessionId);
                        TraceHelper.RuntimeTrace.LogSessionRaisedUpFailover(recoverInfo.SessionId);
                    }
                    catch (Exception e)
                    {
                        TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Error, "[BrokerManager] Exception throwed while recovering broker {0} : {1}, Retry = {2}", recoverInfo.SessionId, e, ExceptionUtility.ShouldRetry(e));
                        lock (this.brokerDic)
                        {
                            if (this.brokerDic.ContainsKey(recoverInfo.SessionId))
                            {
                                this.brokerDic.Remove(recoverInfo.SessionId);
                            }
                        }

                        if (ExceptionUtility.ShouldRetry(e))
                        {
                            retryList.Add(recoverInfo);
                        }
                        else
                        {
                            failedList.Add(recoverInfo);
                            exceptionList.Add(e);
                        }
                    }
                }

                if (retryList.Count == 0)
                {
                    if (failedList.Count == 0)
                    {
                        this.connected = true;
                        TraceHelper.TraceEvent(
                            System.Diagnostics.TraceEventType.Information,
                            "[BrokerManager] Succeeded connecting to the headnode:{0}.",
                            this.schedulerHelper.HeadNode);
                        return;
                    }
                    else
                    {
                        break;
                    }
                }

                recoverInfoList = retryList.ToArray();
                await Task.Delay(RetryPeriod, ct);
            }

            TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Warning, "[BrokerManager] Connected to the headnode and recover broker info, Failed = {0}", recoverInfoList.Length);

            // fail jobs that cannot be recovered
            for (int i = 0; i < failedList.Count; i++)
            {
                BrokerRecoverInfo recoverInfo = failedList[i];
                Exception         exception   = exceptionList[i];

                // Log the exception
                TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Error, "[BrokerManager] Failed to recover broker.  Exception: {0}", exception);

                // We do not pass exception detail to FailJob call because of the 128 byte reason message limitation, which is likely not enough for exception detail.
                await this.schedulerHelper.FailJob(recoverInfo.SessionId, "Failed to recover broker.  Check broker log for detail.");
            }

            this.connected = true;
        }