示例#1
0
        /// <summary>
        /// Block a dispatcher temporarily
        /// </summary>
        /// <param name="dispatcherInfo">dispatcher info</param>
        public void BlockDispatcher(DispatcherInfo dispatcherInfo)
        {
            Dispatcher dispatcher            = null;
            int        activeDispatcherCount = 0;

            lock (this.lockThis)
            {
                dispatcher = this.RemoveActiveDispatcher(dispatcherInfo.UniqueId);
                if (dispatcher == null)
                {
                    BrokerTracing.TraceVerbose("[DispatcherManager] Block dispatcher failed: {0} is not active", dispatcherInfo.UniqueId);
                    return;
                }

                this.AddBlockedDispatcher(dispatcherInfo);

                // Note: dispatchers that are blocked but not retried are also considered as 'active'
                activeDispatcherCount = this.sharedData.DispatcherCount + this.youngBlockedDispatcherCount;
            }

            dispatcher.Close();

            // check job healthy to see if number of active dispatchers is smaller than (job.minResourceUnit)
            this.monitor.CheckJobHealthy(activeDispatcherCount);
        }
示例#2
0
        /// <summary>
        /// Add a dispatcher to blocked dispatcher list
        /// </summary>
        /// <param name="dispatcherInfo">dispatcher info</param>
        private void AddBlockedDispatcher(DispatcherInfo dispatcherInfo)
        {
            try
            {
                dispatcherInfo.BlockTime = DateTime.Now;
                this.blockedDispatcherDic.Add(dispatcherInfo.UniqueId, dispatcherInfo);

                // if a dispatcher has never been blocked, take it as "young" blocked disaptcher.
                if (dispatcherInfo.BlockRetryCount <= 0)
                {
                    BrokerTracing.TraceVerbose("[DispatcherManager] Increment youngBlockedDispatcherCount, task id={0}, BlockRetryCount={1}", dispatcherInfo.UniqueId, dispatcherInfo.BlockRetryCount);
                    this.youngBlockedDispatcherCount++;
                }

                this.blockedDispatcherQueue.Enqueue(dispatcherInfo);
                if (this.blockedDispatcherQueue.Count == 1)
                {
                    BrokerTracing.TraceVerbose("[DispatcherManager] Block dispatcher: change unblock timer, task id = {0}", dispatcherInfo.UniqueId);
                    this.unblockTimer.Change(this.blockTimeSpan, TimeSpan.FromMilliseconds(-1));
                }
                BrokerTracing.TraceInfo("[DispatcherManager] Add dispatcher {0} into the blocked dispatcher list.", dispatcherInfo.UniqueId);
            }
            catch (ArgumentException)
            {
                BrokerTracing.TraceError("[DispatcherManager] Dispatcher {0} already exist in the blocked dispatcher list.", dispatcherInfo.UniqueId);
            }
        }
示例#3
0
 /// <summary>
 /// Initializes a new instance of the WssDispatcher class
 /// </summary>
 /// <param name="info">indicating the dispatcher info</param>
 /// <param name="binding">binding information</param>
 /// <param name="sharedData">indicating the shared data</param>
 /// <param name="observer">indicating the observer</param>
 /// <param name="queueFactory">indicating the queue factory</param>
 /// <param name="dispatcherIdle">set when the dispatcher enters idle status</param>
 public WssDispatcher(DispatcherInfo info, Binding binding, SharedData sharedData, BrokerObserver observer, BrokerQueueFactory queueFactory, SchedulerAdapterClientFactory schedulerAdapterClientFactory, AutoResetEvent dispatcherIdle)
     : base(info, binding, sharedData, observer, queueFactory, schedulerAdapterClientFactory, dispatcherIdle)
 {
     if (binding is BasicHttpBinding)
     {
         BasicHttpBinding httpBinding = binding as BasicHttpBinding;
         httpBinding.Security.Mode = BasicHttpSecurityMode.Message;
         httpBinding.Security.Message.ClientCredentialType = BasicHttpMessageCredentialType.Certificate;
         httpBinding.Security.Message.AlgorithmSuite       = System.ServiceModel.Security.SecurityAlgorithmSuite.Basic128;
     }
     else
     {
         BrokerTracing.TraceWarning("[WssDispatcher]. The binding type is not HTTP {0}.", binding.GetType().ToString());
     }
 }
示例#4
0
        /// <summary>
        /// Callback to query the blocked dispatcher list, put expired item back to the active list
        /// </summary>
        /// <param name="state">null object</param>
        private void CallbackToQueryBlockedDispatcherList(object state)
        {
            List <DispatcherInfo> unblockList = new List <DispatcherInfo>();

            lock (this.lockThis)
            {
                BrokerTracing.TraceInfo("[DispatcherManager] Callback to query blocked dispatcher list.");
                Debug.Assert(this.blockedDispatcherQueue.Count > 0, "no blocked dispatcher");
                while (this.blockedDispatcherQueue.Count > 0)
                {
                    DispatcherInfo info        = this.blockedDispatcherQueue.Peek();
                    TimeSpan       elapsedTime = DateTime.Now.Subtract(info.BlockTime);
                    if (elapsedTime >= this.blockTimeSpan)
                    {
                        this.blockedDispatcherQueue.Dequeue();

                        if (null == this.RemoveBlockedDispatcher(info.UniqueId))
                        {
                            // if the task id doesn't have a match in blockedDispatcherDic, ignore it
                            continue;
                        }

                        // remove the task id from failedDispatcherList.
                        lock (this.lockFailedDispatcherList)
                        {
                            this.failedDispatcherList.Remove(info.UniqueId);
                        }

                        info.BlockRetryCount++;
                        BrokerTracing.TraceVerbose("[DispatcherManager] Increment BlockRetryCount: task id={0}, BlockRetryCount={1}", info.UniqueId, info.BlockRetryCount);
                        unblockList.Add(info);
                    }
                    else
                    {
                        BrokerTracing.TraceVerbose("[DispatcherManager] Unblock dispatcher: change unblock timer, task id = {0}", info.UniqueId);
                        this.unblockTimer.Change(this.blockTimeSpan - elapsedTime, TimeSpan.FromMilliseconds(-1));
                        break;
                    }
                }
            }

            foreach (DispatcherInfo info in unblockList)
            {
                BrokerTracing.TraceInfo("[DispatcherManager] Move dispatcher {0} from blocked list back to active.", info.UniqueId);
                this.NewDispatcherAsync(info).GetAwaiter().GetResult(); // TODO: change this to async
            }
        }
示例#5
0
        /// <summary>
        /// Service instance is failed
        /// </summary>
        /// <param name="sender"></param>
        /// <param name="e"></param>
        private void ServiceInstanceFailed(object sender, ServiceInstanceFailedEventArgs e)
        {
            Dispatcher     dispatcher     = sender as Dispatcher;
            DispatcherInfo dispatcherInfo = dispatcher.Info;

            // Filter out duplicate failures
            lock (this.lockFailedDispatcherList)
            {
                if (this.failedDispatcherList.Contains(dispatcher.TaskId))
                {
                    return;
                }

                this.failedDispatcherList.Add(dispatcher.TaskId);
            }

            BrokerTracing.TraceError("[DispatcherManager] Service instance failed! Task id = {0}, node name = {1}", dispatcherInfo.UniqueId, dispatcherInfo.MachineName);

            // If the service is unavailable, just block the dispatcher. We cannot blacklist it because it may have just been preempted and the node is still good.
            // Furthermore the network may be temporarily out but the CN and its app install is fine
            if (e.Fault.Code == (int)SOAFaultCode.Service_Unreachable)
            {
                this.BlockDispatcher(dispatcherInfo);
            }

            // If the service cannot be initialized, remove and blacklist the dispatcher. We know the service cannot be loaded
            else if (e.Fault.Code == (int)SOAFaultCode.Service_InitializeFailed)
            {
                if (this.RemoveDispatcher(dispatcherInfo.UniqueId, /*exitServiceHost =*/ false, false))
                {
                    // Should use the machine virtual name for scheduler API to exclude a node.
                    this.monitor.BlacklistNode(SoaHelper.IsOnAzure() ? dispatcherInfo.MachineVirtualName : dispatcherInfo.MachineName).GetAwaiter().GetResult();
                }
            }

            // If the service host is preempted, remove the dispatcher.
            else if (e.Fault.Code == SOAFaultCode.Service_Preempted)
            {
                this.RemoveDispatcher(dispatcherInfo.UniqueId, true, true);
            }

            // There should be no other possible failure codes
            else
            {
                Debug.Assert(false, String.Format("Invalid fault code sent to ServiceInstanceFailed - {0}", e.Fault.Code));
            }
        }
示例#6
0
        /// <summary>
        /// Exit service host
        /// </summary>
        /// <param name="dispatcherInfo"></param>
        private void ExitServiceHost(DispatcherInfo dispatcherInfo)
        {
            var controller = Dispatcher.CreateController(dispatcherInfo, this.defaultBinding, this.httpsBurst);

            BrokerTracing.TraceInfo("[DispatcherManager] .ExitServiceHost: Calling BeginExit for task id {0}, {1}", dispatcherInfo.UniqueId, dispatcherInfo.ServiceHostControllerAddress);
            controller.BeginExit(() =>
            {
                try
                {
                    this.monitor.FinishTask(dispatcherInfo).GetAwaiter().GetResult();
                }
                catch (Exception e)
                {
                    BrokerTracing.TraceWarning("[ServiceHostController] .BeginExit: onFailed callback exception for task id {0} : {1}", dispatcherInfo.UniqueId, e.ToString());
                }
            });
        }
示例#7
0
        /// <summary>
        /// Remove a dispatcher (info) from blocked dispatcher list
        /// </summary>
        /// <param name="taskId">task id related to the dispatcher</param>
        /// <returns>true on success, false on failure</returns>
        private DispatcherInfo RemoveBlockedDispatcher(string taskId)
        {
            DispatcherInfo dispatcherInfo = null;

            if (this.blockedDispatcherDic.TryGetValue(taskId, out dispatcherInfo))
            {
                this.blockedDispatcherDic.Remove(taskId);
                BrokerTracing.TraceInfo("[DispatcherManager] Dispatcher {0} removed from the blocked dispatcher list.", taskId);

                if (dispatcherInfo.BlockRetryCount <= 0)
                {
                    BrokerTracing.TraceVerbose("[DispatcherManager] Decrement youngBlockedDispatcherCount, task id={0}", dispatcherInfo.UniqueId);
                    this.youngBlockedDispatcherCount--;
                }
            }

            return(dispatcherInfo);
        }
示例#8
0
        /// <summary>
        /// Create a instance of Dispatcher
        /// </summary>
        /// <param name="dispatcherInfo">the dispatcher info</param>
        public async Task NewDispatcherAsync(DispatcherInfo dispatcherInfo)
        {
            dispatcherInfo.ApplyDefaultCapacity(this.defaultCapacity);

            Dispatcher dispatcher = null;

            // This lock is to sync operations to the dispatcherDic
            lock (this.lockThis)
            {
                if (this.dispatcherDic.ContainsKey(dispatcherInfo.UniqueId) || this.blockedDispatcherDic.ContainsKey(dispatcherInfo.UniqueId))
                {
                    BrokerTracing.TraceEvent(TraceEventType.Warning, 0, "[DispatcherManager] Task race condition detected, taskid = {0}", dispatcherInfo.UniqueId);
                    return;
                }
            }

            try
            {
                BrokerTracing.TraceInfo("[DispatcherManager] Create new dispatcher: {0}", dispatcherInfo.AllocatedNodeLocation);
                if (dispatcherInfo.AllocatedNodeLocation == NodeLocation.OnPremise ||
                    dispatcherInfo.AllocatedNodeLocation == NodeLocation.Linux ||
                    dispatcherInfo.AllocatedNodeLocation == NodeLocation.AzureEmbedded ||
                    dispatcherInfo.AllocatedNodeLocation == NodeLocation.AzureEmbeddedVM ||
                    dispatcherInfo.AllocatedNodeLocation == NodeLocation.NonDomainJoined)
                {
                    // check if using backend-security (for java soa only)
                    if (dispatcherInfo is WssDispatcherInfo)
                    {
                        // use security mode
                        dispatcher = new WssDispatcher(
                            dispatcherInfo,
                            this.defaultBinding,
                            this.sharedData,
                            this.observer,
                            this.queueFactory,
                            this.monitor.SchedulerAdapterFactory,
                            this.monitor.NeedAdjustAllocation);
                    }
                    else
                    {
                        // normal mode
                        dispatcher = new Dispatcher(
                            dispatcherInfo,
                            this.defaultBinding,
                            this.sharedData,
                            this.observer,
                            this.queueFactory,
                            this.monitor.SchedulerAdapterFactory,
                            this.monitor.NeedAdjustAllocation);
                    }
                }
#if HPCPACK
                else if (dispatcherInfo.AllocatedNodeLocation == Scheduler.Session.Data.NodeLocation.AzureVM ||
                         dispatcherInfo.AllocatedNodeLocation == Scheduler.Session.Data.NodeLocation.Azure)
                {
                    // NodeLocation.Azure, NodeLocation.AzureVM
                    if (this.httpsBurst)
                    {
                        // for https connection
                        if (this.connectionStringValid == null)
                        {
                            this.ValidateConnectionString().GetAwaiter().GetResult();
                        }

                        if (this.connectionStringValid.Value)
                        {
                            dispatcher = new AzureHttpsDispatcher(
                                this.azureQueueManager,
                                dispatcherInfo,
                                this.defaultBinding,
                                this.sharedData,
                                this.observer,
                                this.queueFactory,
                                this.monitor.SchedulerAdapterFactory,
                                this.monitor.NeedAdjustAllocation);
                        }
                        else
                        {
                            // ValidateConnectionString method already writes trace
                            // for this case.
                            return;
                        }
                    }
                    else
                    {
                        // for nettcp connection
                        dispatcher = new AzureDispatcher(
                            dispatcherInfo,
                            this.defaultBinding,
                            this.sharedData,
                            this.observer,
                            this.queueFactory,
                            this.monitor.SchedulerAdapterFactory,
                            this.monitor.NeedAdjustAllocation);
                    }
                }
#endif
                else
                {
                    BrokerTracing.TraceError("Not supported NodeLocation {0} for dispatcher", dispatcherInfo.AllocatedNodeLocation);
                }

                dispatcher.Failed    += new EventHandler(this.DispatcherFailed);
                dispatcher.Connected += new EventHandler(this.DispatcherConnected);
                dispatcher.OnServiceInstanceFailedEvent += new EventHandler <ServiceInstanceFailedEventArgs>(this.ServiceInstanceFailed);
            }
            catch (InvalidOperationException e)
            {
                BrokerTracing.TraceError("[DispatcherManager] Create dispatcher failed: {0}", e);
                return;
            }
            catch (Exception e)
            {
                BrokerTracing.TraceError("[DispatcherManager] Create dispatcher failed.  Exception: {0}", e);
                return;
            }

            bool exist = false;
            lock (this.lockThis)
            {
                if (this.dispatcherDic.ContainsKey(dispatcherInfo.UniqueId) || this.blockedDispatcherDic.ContainsKey(dispatcherInfo.UniqueId))
                {
                    BrokerTracing.TraceEvent(TraceEventType.Warning, 0, "[DispatcherManager] Task race condition detected, taskid = {0}", dispatcherInfo.UniqueId);
                    exist = true;
                }
                else
                {
                    this.dispatcherDic.Add(dispatcherInfo.UniqueId, dispatcher);
                    this.sharedData.DispatcherCount = this.dispatcherDic.Count;
                }
            }

            if (exist)
            {
                try
                {
                    dispatcher.Close();
                }
                catch (Exception ex)
                {
                    // Abandon the dispatcher, don't care about the exception.
                    BrokerTracing.TraceWarning("[DispatcherManager].Dispose: Exception {0}", ex);
                }
            }
            else
            {
                try
                {
                    // Notice: it is expensive to start the azure dispatcher because the client.open is time consuming for azure.
                    BrokerTracing.TraceVerbose("[DispatcherManager.NewDispatcher] Begin: Start dispatcher.");
                    await dispatcher.StartAsync().ConfigureAwait(false);

                    BrokerTracing.TraceVerbose("[DispatcherManager.NewDispatcher] End: Start dispatcher.");
                }
                catch (Exception e)
                {
                    BrokerTracing.TraceError("[DispatcherManager] Create dispatcher failed.  Exception: {0}", e);
                }
            }
        }
示例#9
0
        public bool RemoveDispatcher(string taskId, bool exitServiceHost, bool preemption)
        {
            Dispatcher     dispatcher     = null;
            DispatcherInfo dispatcherInfo = null;

            lock (this.lockThis)
            {
                dispatcher = this.RemoveActiveDispatcher(taskId);
                if (dispatcher != null)
                {
                    dispatcherInfo = dispatcher.Info;
                }
                else
                {
                    dispatcherInfo = this.RemoveBlockedDispatcher(taskId);
                }
            }

            if (dispatcherInfo == null)
            {
                BrokerTracing.TraceWarning(
                    "[DispatcherManager].RemoveDispatcher: Remove dispatcher failed, the dispatcher info of task {0} does not exist",
                    taskId);

                return(false);
            }
            else
            {
                BrokerTracing.TraceVerbose(
                    "[DispatcherManager].RemoveDispatcher: Attempt to remove dispatcher {0} for the node {1}",
                    taskId,
                    SoaHelper.IsOnAzure() ? dispatcherInfo.MachineVirtualName : dispatcherInfo.MachineName);
            }

            // stop dispatching to prevent too many communication exceptions when exiting serivce host
            if (dispatcher != null)
            {
                dispatcher.Stop();
            }

            if (!preemption)
            {
                // exit service host before closing dispatcher
                if (exitServiceHost)
                {
                    this.ExitServiceHost(dispatcherInfo);
                }

                if (dispatcher != null)
                {
                    dispatcher.Close();
                }
            }
            else
            {
                // The dispatcher waits for all the responses with a timeout before closing.
                // When preemption happens, the HpcServiceHost directly replies a fault message
                // without invoking the hosted service, so the responses come back quickly.
                if (dispatcher != null)
                {
                    dispatcher.Close();
                }

                if (exitServiceHost)
                {
                    this.ExitServiceHost(dispatcherInfo);
                }
            }

            // Also remember to remove it from failed dispatcher list
            lock (this.lockFailedDispatcherList)
            {
                this.failedDispatcherList.Remove(taskId);
            }

            return(true);
        }