예제 #1
0
        /// <summary>
        /// Create a new broker application domain
        /// </summary>
        /// <param name="info">session start info</param>
        /// <param name="sessionid">session id</param>
        /// <param name="durable">indicate if the session is durable</param>
        /// <returns>returns broker initialization result</returns>
        public async Task <BrokerInitializationResult> CreateNewBrokerDomain(SessionStartInfoContract info, string sessionid, bool durable)
        {
            string userName =
                (OperationContext.Current != null && OperationContext.Current.ServiceSecurityContext != null && OperationContext.Current.ServiceSecurityContext.WindowsIdentity != null) ?
                OperationContext.Current.ServiceSecurityContext.WindowsIdentity.Name :
                String.Empty;

            TraceHelper.RuntimeTrace.LogSessionCreating(sessionid, userName);
            TraceHelper.TraceEvent(sessionid, System.Diagnostics.TraceEventType.Information, "[BrokerManager] Create new broker domain: {0}", sessionid);

            BrokerRecoverInfo recoverInfo = new BrokerRecoverInfo();

            recoverInfo.StartInfo = info;
            recoverInfo.SessionId = sessionid;
            recoverInfo.Durable   = durable;
            if (this.schedulerHelper == null)
            {
                this.schedulerHelper = SchedulerHelperFactory.GetSchedulerHelper(this.context);
            }

            ClusterInfoContract clusterInfo = await this.schedulerHelper.GetClusterInfoAsync();

            return(await this.CreateBrokerAndRun(recoverInfo, false, clusterInfo));
        }
예제 #2
0
        /// <summary>
        /// Start broker init operations
        /// </summary>
        private async Task RecoverThreadProc(CancellationToken ct)
        {
            int retry = 0;

            BrokerRecoverInfo[] recoverInfoList;
            this.schedulerHelper = null;

            // TODO: Read Azure Storage Queue instead
            if (!SoaHelper.IsOnAzure())
            {
                while (!ct.IsCancellationRequested)
                {
                    TraceHelper.TraceEvent(TraceEventType.Information, "[BrokerManager] Try to create the perf counters, Retry count = {0}", retry);
                    try
                    {
                        this.requestQueueLengthCounter  = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableRequestsQueueLength);
                        this.responseQueueLengthCounter = BrokerPerformanceCounterHelper.GetPerfCounter(BrokerPerformanceCounterKey.DurableResponsesQueueLength);
                        break;
                    }
                    catch (Exception e)
                    {
                        // Bug 8507 : Fix leak
                        if (this.requestQueueLengthCounter != null)
                        {
                            this.requestQueueLengthCounter.Close();
                            this.requestQueueLengthCounter = null;
                        }

                        TraceHelper.TraceEvent(TraceEventType.Error, "[BrokerManager] Failed to create the perf counters: {0}", e);
                        retry++;
                        await Task.Delay(RetryPeriod, ct);
                    }
                }
            }

            while (true)
            {
                TraceHelper.TraceEvent(
                    System.Diagnostics.TraceEventType.Information,
                    "[BrokerManager] Try to connect to the headnode, Retry count = {0}.",
                    retry);
                try
                {
                    lock (this.brokerDic)
                    {
                        this.brokerDic.Clear();
                    }

                    // Bug 8507 : Fix leak
                    if (this.schedulerHelper == null)
                    {
                        this.schedulerHelper = SchedulerHelperFactory.GetSchedulerHelper(this.context);
                    }

                    recoverInfoList = await this.schedulerHelper.LoadBrokerRecoverInfo();

                    break;
                }
                catch (Exception e)
                {
                    TraceHelper.TraceEvent(
                        TraceEventType.Error,
                        "[BrokerManager] Exception throwed while connecting to head node {0}: {1}", this.headnode, e);

                    retry++;
                    await Task.Delay(RetryPeriod, ct);
                }
            }

            this.staleSessionCleanupTimer.Change(0, BrokerManager.StaleSessionCleanupPeriod);

            if (this.updateQueueLengthTimer != null)
            {
                // TODO: on azure, about the MSMQ. Don't use the MSMQ in the Azure cluster.
                this.updateQueueLengthTimer.Change(0, BrokerManager.UpdateQueueLengthPeriod);
            }

            List <BrokerRecoverInfo> failedList    = new List <BrokerRecoverInfo>();
            List <Exception>         exceptionList = new List <Exception>();

            for (int i = 0; i < RecoverBrokerRetryLimit; i++)
            {
                List <BrokerRecoverInfo> retryList = new List <BrokerRecoverInfo>();
                foreach (BrokerRecoverInfo recoverInfo in recoverInfoList)
                {
                    try
                    {
                        // Only running broker will be recovered here
                        // Should start the broker immediately
                        ClusterInfoContract clusterInfo = await this.schedulerHelper.GetClusterInfoAsync();

                        await this.CreateBrokerAndRun(recoverInfo, true, clusterInfo);

                        TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Information, "[BrokerManager] Succeeded start broker {0} during initialization", recoverInfo.SessionId);
                        TraceHelper.RuntimeTrace.LogSessionRaisedUpFailover(recoverInfo.SessionId);
                    }
                    catch (Exception e)
                    {
                        TraceHelper.TraceEvent(recoverInfo.SessionId, System.Diagnostics.TraceEventType.Error, "[BrokerManager] Exception throwed while recovering broker {0} : {1}, Retry = {2}", recoverInfo.SessionId, e, ExceptionUtility.ShouldRetry(e));
                        lock (this.brokerDic)
                        {
                            if (this.brokerDic.ContainsKey(recoverInfo.SessionId))
                            {
                                this.brokerDic.Remove(recoverInfo.SessionId);
                            }
                        }

                        if (ExceptionUtility.ShouldRetry(e))
                        {
                            retryList.Add(recoverInfo);
                        }
                        else
                        {
                            failedList.Add(recoverInfo);
                            exceptionList.Add(e);
                        }
                    }
                }

                if (retryList.Count == 0)
                {
                    if (failedList.Count == 0)
                    {
                        this.connected = true;
                        TraceHelper.TraceEvent(
                            System.Diagnostics.TraceEventType.Information,
                            "[BrokerManager] Succeeded connecting to the headnode:{0}.",
                            this.schedulerHelper.HeadNode);
                        return;
                    }
                    else
                    {
                        break;
                    }
                }

                recoverInfoList = retryList.ToArray();
                await Task.Delay(RetryPeriod, ct);
            }

            TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Warning, "[BrokerManager] Connected to the headnode and recover broker info, Failed = {0}", recoverInfoList.Length);

            // fail jobs that cannot be recovered
            for (int i = 0; i < failedList.Count; i++)
            {
                BrokerRecoverInfo recoverInfo = failedList[i];
                Exception         exception   = exceptionList[i];

                // Log the exception
                TraceHelper.TraceEvent(System.Diagnostics.TraceEventType.Error, "[BrokerManager] Failed to recover broker.  Exception: {0}", exception);

                // We do not pass exception detail to FailJob call because of the 128 byte reason message limitation, which is likely not enough for exception detail.
                await this.schedulerHelper.FailJob(recoverInfo.SessionId, "Failed to recover broker.  Check broker log for detail.");
            }

            this.connected = true;
        }
예제 #3
0
        private static void Main(string[] args)
        {
            var log = new LoggerConfiguration().ReadFrom.AppSettings().Enrich.WithMachineName().CreateLogger();

            Log.Logger = log;

            if (!ParseAndSetBrokerLauncherSettings(args, BrokerLauncherSettings.Default))
            {
                // parsing failed
                return;
            }

            if (ConfigureLogging)
            {
                Trace.TraceInformation("Log configuration for Broker Launcher has done successfully.");
                Log.CloseAndFlush();
                return;
            }

            // clusterconnectionstring could be a machine name (for single headnode) or a connection string
            ITelepathyContext context;
            string            clusterConnectionString = SoaHelper.GetSchedulerName();

            context = TelepathyContext.GetOrAdd(clusterConnectionString);

            Trace.TraceInformation("Get diag trace enabled internal.");
            SoaDiagTraceHelper.IsDiagTraceEnabledInternal = (sessionId) =>
            {
                try
                {
                    using (ISchedulerHelper helper = SchedulerHelperFactory.GetSchedulerHelper(context))
                    {
                        return(helper.IsDiagTraceEnabled(sessionId).GetAwaiter().GetResult());
                    }
                }
                catch (Exception e)
                {
                    Trace.TraceError("[SoaDiagTraceHelper] Failed to get IsDiagTraceEnabled property: {0}", e);
                    return(false);
                }
            };

            TraceHelper.IsDiagTraceEnabled = SoaDiagTraceHelper.IsDiagTraceEnabled;

            LauncherHostService host             = null;
            BrokerManagement    brokerManagement = null;

            // richci : Run as a console application if user wants to debug (-D) or run in MSCS (-FAILOVER)
            if (BrokerLauncherSettings.Default.AsConsole)
            {
                try
                {
                    host = new LauncherHostService(true, context);

                    // This instance of HpcBroker is running as a failover generic application or in debug
                    // mode so startup the brokerManagement WCF service to accept management commands
                    brokerManagement = new BrokerManagement(host.BrokerLauncher);
                    brokerManagement.Open();

                    Console.WriteLine("Press any key to exit...");
                    Thread.Sleep(-1);
                }
                finally
                {
                    if (host != null)
                    {
                        try
                        {
                            host.Stop();
                        }
                        catch (Exception e)
                        {
                            Trace.TraceError("Exception stopping HpcBroker service - " + e);
                        }
                    }

                    if (brokerManagement != null)
                    {
                        try
                        {
                            brokerManagement.Close();
                        }
                        catch (Exception e)
                        {
                            Trace.TraceError("Exception closing broker managment WCF service - " + e);
                        }
                    }
                }
            }
            else
            {
                ServiceBase[] servicesToRun;
                servicesToRun = new ServiceBase[] { new LauncherHostService(context) };
                ServiceBase.Run(servicesToRun);
            }

            Log.CloseAndFlush();
        }