/// <summary> /// Prevents a default instance of the BrokerPerformanceCounter class from being created /// </summary> private BrokerPerformanceCounter() { this.counters = new PerformanceCounter[5]; this.counters[(int)BrokerPerformanceCounterKey.RequestMessages] = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.RequestMessages); this.counters[(int)BrokerPerformanceCounterKey.ResponseMessages] = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.ResponseMessages); this.counters[(int)BrokerPerformanceCounterKey.Calculations] = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.Calculations); this.counters[(int)BrokerPerformanceCounterKey.Faults] = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.Faults); }
/// <summary> /// Start broker init operations /// </summary> private async Task RecoverThreadProc(CancellationToken ct) { int retry = 0; BrokerRecoverInfo[] recoverInfoList; this.schedulerHelper = null; // TODO: Read Azure Storage Queue instead if (!SoaHelper.IsOnAzure()) { while (!ct.IsCancellationRequested) { TraceHelper.TraceEvent(TraceEventType.Information, "[BrokerManager] Try to create the perf counters, Retry count = {0}", retry); try { this.requestQueueLengthCounter = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableRequestsQueueLength); this.responseQueueLengthCounter = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableResponsesQueueLength); break; } catch (Exception e) { // Bug 8507 : Fix leak if (this.requestQueueLengthCounter != null) { this.requestQueueLengthCounter.Close(); this.requestQueueLengthCounter = null; } TraceHelper.TraceEvent(TraceEventType.Error, "[BrokerManager] Failed to create the perf counters: {0}", e); retry++; await Task.Delay(RetryPeriod, ct); } } } while (true) { TraceHelper.TraceEvent( System.Diagnostics.TraceEventType.Information, "[BrokerManager] Try to connect to the headnode, Retry count = {0}.", retry); try { lock (this.brokerDic) { this.brokerDic.Clear(); } // Bug 8507 : Fix leak if (this.schedulerHelper == null) { this.schedulerHelper = SchedulerHelperFactory.GetSchedulerHelper(this.context); } recoverInfoList = await this.schedulerHelper.LoadBrokerRecoverInfo(); break; } catch (Exception e) { TraceHelper.TraceEvent( TraceEventType.Error, "[BrokerManager] Exception throwed while connecting to head node {0}: {1}", this.headnode, e); retry++; await Task.Delay(RetryPeriod, ct); } } this.staleSessionCleanupTimer.Change(0, BrokerManager.StaleSessionCleanupPeriod); if (this.updateQueueLengthTimer != null) { // TODO: on azure, about the MSMQ. Don't use the MSMQ in the Azure cluster. this.updateQueueLengthTimer.Change(0, BrokerManager.UpdateQueueLengthPeriod); } List <BrokerRecoverInfo> failedList = new List <BrokerRecoverInfo>(); List <Exception> exceptionList = new List <Exception>(); for (int i = 0; i < RecoverBrokerRetryLimit; i++) { List <BrokerRecoverInfo> retryList = new List <BrokerRecoverInfo>(); foreach (BrokerRecoverInfo recoverInfo in recoverInfoList) { try { // Only running broker will be recovered here // Should start the broker immediately ClusterInfoContract clusterInfo = await this.schedulerHelper.GetClusterInfoAsync(); await this.CreateBrokerAndRun(recoverInfo, true, clusterInfo); TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Information, "[BrokerManager] Succeeded start broker {0} during initialization", recoverInfo.SessionId); TraceHelper.RuntimeTrace.LogSessionRaisedUpFailover(recoverInfo.SessionId); } catch (Exception e) { TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Error, "[BrokerManager] Exception throwed while recovering broker {0} : {1}, Retry = {2}", recoverInfo.SessionId, e, ExceptionUtility.ShouldRetry(e)); lock (this.brokerDic) { if (this.brokerDic.ContainsKey(recoverInfo.SessionId)) { this.brokerDic.Remove(recoverInfo.SessionId); } } if (ExceptionUtility.ShouldRetry(e)) { retryList.Add(recoverInfo); } else { failedList.Add(recoverInfo); exceptionList.Add(e); } } } if (retryList.Count == 0) { if (failedList.Count == 0) { this.connected = true; TraceHelper.TraceEvent( System.Diagnostics.TraceEventType.Information, "[BrokerManager] Succeeded connecting to the headnode:{0}.", this.schedulerHelper.HeadNode); return; } else { break; } } recoverInfoList = retryList.ToArray(); await Task.Delay(RetryPeriod, ct); } TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Warning, "[BrokerManager] Connected to the headnode and recover broker info, Failed = {0}", recoverInfoList.Length); // fail jobs that cannot be recovered for (int i = 0; i < failedList.Count; i++) { BrokerRecoverInfo recoverInfo = failedList[i]; Exception exception = exceptionList[i]; // Log the exception TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Error, "[BrokerManager] Failed to recover broker. Exception: {0}", exception); // We do not pass exception detail to FailJob call because of the 128 byte reason message limitation, which is likely not enough for exception detail. await this.schedulerHelper.FailJob(recoverInfo.SessionId, "Failed to recover broker. Check broker log for detail."); } this.connected = true; }