示例#1
0
        private async Task <XContainer> GetStatus()
        {
            int reasonableNumber = Math.Max(1, (targetNumberOfWorkers * 3) / 4);
            int nearlyAll        = targetNumberOfWorkers - 3;

            StringBuilder sb = new StringBuilder(serverAddress);

            sb.Append("status");

            if (knownWorkers.Count >= reasonableNumber)
            {
                // we will try to set this repeatedly, so make sure it doesn't throw an exception
                // the second time
                reasonableReached.TrySetResult(true);
            }

            if (targetNumberOfWorkers < 0)
            {
                // we haven't seen any status yet. Don't add any predicates to the request,
                // so it will return immediately
            }
            else if (knownWorkers.Count < nearlyAll)
            {
                // wait until the epoch changes, or we get close to all, or a few seconds have passed
                sb.AppendFormat("?epochGreater={0}", epoch);
                sb.AppendFormat("&thresholdGreater=Worker:{0}", nearlyAll - 1);
                sb.Append("&timeout=2000");
            }
            else
            {
                // wait until the epoch changes, or any machine state changes, or a few seconds have passed
                sb.AppendFormat("?epochGreater={0}", epoch);
                sb.AppendFormat("&versionGreater={0}", version);
                sb.Append("&timeout=30000");
            }

            IHttpRequest request = ClusterInterface.HttpClient.Create(sb.ToString());

            request.Timeout = 60 * 1000; // if the query doesn't eventually return, something is wrong

            try
            {
                using (IHttpResponse status = await request.GetResponseAsync())
                {
                    using (Stream response = status.GetResponseStream())
                    {
                        using (var reader = System.Xml.XmlReader.Create(response))
                        {
                            return(XDocument.Load(reader));
                        }
                    }
                }
            }
            catch (Exception e)
            {
                logger.Log("Failed getting status: " + e.ToString());
                return(null);
            }
        }
示例#2
0
        /// <summary>
        /// construct a new Computer object
        /// </summary>
        /// <param name="n">the unique name of the daemon</param>
        /// <param name="host">the computer the daemon is running on</param>
        /// <param name="rn">the rack the daemon is running on</param>
        /// <param name="rack">the scheduling queue associated with the computer's rack</param>
        /// <param name="cluster">the global scheduling queue associated with the cluster</param>
        /// <param name="pServer">the address of the daemon's http server for process scheduling</param>
        /// <param name="fServer">the address of the daemon's http server for file proxying</param>
        /// <param name="directory">the daemon's local directory</param>
        /// <param name="log">connection to the logging subsystem</param>
        public Computer(string n, string host, string rn, ProcessQueue rack, ProcessQueue cluster,
                        string pServer, string fServer, string directory, ClusterInterface.ILogger log)
        {
            logger         = log;
            name           = n;
            localDirectory = directory;
            processServer  = pServer;
            fileServer     = fServer;
            computerName   = host;
            rackName       = rn;
            localQueue     = new ProcessQueue();
            rackQueue      = rack;
            clusterQueue   = cluster;

            logger.Log("Created computer " + name + " on host " + computerName + ":" + rackName + ":" + localDirectory + ":" + fileServer);

            // make the Task that CommandLoop blocks on; when finishWaiter is started it returns null
            // causing CommandLoop to exit.
            finishWaiter       = new TaskCompletionSource <Process>();
            childFinishWaiters = new HashSet <TaskCompletionSource <Process> >();
            finishWaiter.Task.ContinueWith((t) => Task.Run(() => SetChildFinishWaiters()));

            // this is started when the Command Loop exits
            exited = new TaskCompletionSource <bool>();

            nextTask = 1;
        }
示例#3
0
        public LocalScheduler(ClusterInterface.ILogger l)
        {
            logger = l;

            computers    = new Dictionary <string, Computer>();
            localities   = new Dictionary <string, List <Computer> >();
            racks        = new Dictionary <string, Rack>();
            clusterQueue = new ProcessQueue();

            flusher = new Task(() => { });

            clusterInterface = new PeloponneseInterface();

            dummyCancelComputer = new Computer("dummy for canceling", "nowhere", "no rack", null, null,
                                               "no server", "no server", "no directory", logger);

            l.Log("LocalScheduler created");
        }
        public bool Initialize(LocalScheduler p, ClusterInterface.ILogger l)
        {
            parent = p;
            logger = l;
            epoch = 0;
            version = 0;
            targetNumberOfWorkers = -1;
            knownWorkers = new Dictionary<string, string>();
            reasonableReached = new TaskCompletionSource<bool>();
            shutdownTask = new TaskCompletionSource<XContainer>();
            waitingForComputer = new List<Task>();
            exited = new TaskCompletionSource<bool>();

            jobGuid = Environment.GetEnvironmentVariable(Constants.EnvJobGuid);
            if (jobGuid == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvJobGuid + ": exiting");
                return false;
            }

            serverAddress = Environment.GetEnvironmentVariable(Constants.EnvManagerServerUri);
            if (serverAddress == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvManagerServerUri + ": exiting");
                return false;
            }

            var groupName = Environment.GetEnvironmentVariable(Constants.EnvProcessGroup);
            if (groupName == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvProcessGroup + ": exiting");
                return false;
            }

            var procIdentifier = Environment.GetEnvironmentVariable(Constants.EnvProcessIdentifier);
            if (procIdentifier == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvProcessIdentifier + ": exiting");
                return false;
            }

            var element = new XElement("ProcessDetails");
            var status = element.ToString();

            string registration = String.Format("{0}register?guid={1}&group={2}&identifier={3}", serverAddress, jobGuid, groupName, procIdentifier);
            IHttpRequest request = ClusterInterface.HttpClient.Create(registration);
            request.Timeout = 30 * 1000; // if this doesn't come back quickly, we'll get an exception and quit
            request.Method = "POST";

            try
            {
                using (Stream upload = request.GetRequestStream())
                {
                    using (StreamWriter sw = new StreamWriter(upload))
                    {
                        sw.Write(status);
                    }
                }

                using (IHttpResponse response = request.GetResponse())
                {
                    logger.Log("Server registration succeeded");
                    return true;
                }
            }
            catch (NotHttpException e)
            {
                // if this failed, there's nothing much more we can do
                logger.Log("Server registration failed message " + e.Message + " status " + e.Response.StatusCode + ": " + e.Response.StatusDescription);
                return false;
            }
            catch (Exception e)
            {
                // if this failed, there's nothing much more we can do
                logger.Log("Server registration failed message " + e.Message);
                return false;
            }
        }
示例#5
0
        private async void ScheduleProcessInternal(Process process, List <ClusterInterface.Affinity> affinities,
                                                   ClusterInterface.RunProcess callback)
        {
            logger.Log("Scheduling process " + process.Id);

            process.SetCallback(callback);

            Task rackBlocker;
            Task clusterBlocker;

            lock (this)
            {
                rackBlocker    = Task.WhenAny(flusher, Task.Delay(rackDelay));
                clusterBlocker = Task.WhenAny(flusher, Task.Delay(clusterDelay));
            }

            bool isHardConstraint = affinities.Aggregate(false, (a, b) => a || b.isHardContraint);

            if (isHardConstraint)
            {
                // the constraint generator should have intersected the hard constraint into a single one
                Debug.Assert(affinities.Count() == 1);
                logger.Log("Process " + process.Id + " has a hard constraint");
            }

            var allAffinities      = affinities.SelectMany(a => a.affinities).Distinct();
            var computerAffinities = allAffinities.Where(a => a.level == ClusterInterface.AffinityResourceLevel.Host);

            bool addedAny = false;

            // get a snapshot of available computers
            Dictionary <string, List <Computer> > localitySnapshot = new Dictionary <string, List <Computer> >();

            lock (localities)
            {
                foreach (var c in localities)
                {
                    localitySnapshot.Add(c.Key, c.Value);
                }
            }

            if (localitySnapshot.Count == 0)
            {
                await process.OnScheduled(null, -1, null, "No cluster computers available");

                return;
            }

            var racksUsed = new List <string>();

            foreach (var a in computerAffinities)
            {
                List <Computer> cl;
                if (localitySnapshot.TryGetValue(a.locality, out cl))
                {
                    addedAny = true;
                    logger.Log("Adding Process " + process.Id + " to queues for computers with locality " + a.locality);
                    foreach (var c in cl)
                    {
                        logger.Log("Adding Process " + process.Id + " to queue for computer " + c.Name);
                        if (c.LocalQueue.AddProcess(process))
                        {
                            // this returns true if p has been matched to a computer, in which case we
                            // can stop adding it to queues
                            logger.Log("Process " + process.Id + " claimed by computer " + c.Name);
                            return;
                        }
                    }
                    // remember the rack this computer was in, to include it for soft affinities below
                    racksUsed.Add(cl.First().RackName);
                }
            }

            if (addedAny)
            {
                // hacky delay scheduling; wait until the upper level has finished adding processes in
                // the current stage, or some time has passed, before relaxing affinities if the process
                // had affinities for particular computers
                logger.Log("Process " + process.Id + " delay scheduling for rack");
                await rackBlocker;
            }

            // reset flag before adding to racks
            addedAny = false;

            // get a snapshot of available racks
            Dictionary <string, Rack> rackSnapshot = new Dictionary <string, Rack>();

            lock (racks)
            {
                foreach (var r in racks)
                {
                    rackSnapshot.Add(r.Key, r.Value);
                }
            }

            var rackAffinities = allAffinities.Where(a => a.level == ClusterInterface.AffinityResourceLevel.Rack).Select(a => a.locality).Distinct();

            if (!isHardConstraint)
            {
                rackAffinities = rackAffinities.Concat(racksUsed).Distinct();
            }

            foreach (var a in rackAffinities)
            {
                Rack r;
                if (rackSnapshot.TryGetValue(a, out r))
                {
                    addedAny = true;
                    logger.Log("Adding Process " + process.Id + " to queue for rack " + a);
                    if (r.queue.AddProcess(process))
                    {
                        // this returns true if p has been matched to a computer, in which case we
                        // can stop adding it to queues
                        logger.Log("Process " + process.Id + " claimed by rack " + a);
                        return;
                    }
                }
            }

            if (isHardConstraint)
            {
                // let the process know it won't get added to any more queues. This will signal the
                // upper layer if it didn't get added to any queues
                process.FinishedScheduling();
                return;
            }

            if (addedAny)
            {
                // hacky delay scheduling; wait until the upper level has finished adding processes in
                // the current stage, or some time has passed, before relaxing affinities if the process
                // had affinities for particular racks
                logger.Log("Process " + process.Id + " delay scheduling for cluster");
                await clusterBlocker;
            }

            logger.Log("Adding Process " + process.Id + " to queue for cluster");
            clusterQueue.AddProcess(process);

            // let the process know it won't get added to any more queues
            process.FinishedScheduling();
        }
示例#6
0
        /// <summary>
        /// discard all the processes on our local queue and unblock the finishWaiter
        /// causing the CommandLoop to exit
        /// </summary>
        public void ShutDown()
        {
            logger.Log("Computer " + name + " stopping local queue");
            // stop the local queue accepting any more processes
            localQueue.ShutDown();

            logger.Log("Computer " + name + " starting finishWaiter");
            finishWaiter.SetResult(null);
        }
示例#7
0
        public bool Initialize(LocalScheduler p, ClusterInterface.ILogger l)
        {
            parent  = p;
            logger  = l;
            epoch   = 0;
            version = 0;
            targetNumberOfWorkers = -1;
            knownWorkers          = new Dictionary <string, string>();
            reasonableReached     = new TaskCompletionSource <bool>();
            shutdownTask          = new TaskCompletionSource <XContainer>();
            waitingForComputer    = new List <Task>();
            exited = new TaskCompletionSource <bool>();

            jobGuid = Environment.GetEnvironmentVariable(Constants.EnvJobGuid);
            if (jobGuid == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvJobGuid + ": exiting");
                return(false);
            }

            serverAddress = Environment.GetEnvironmentVariable(Constants.EnvManagerServerUri);
            if (serverAddress == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvManagerServerUri + ": exiting");
                return(false);
            }

            var groupName = Environment.GetEnvironmentVariable(Constants.EnvProcessGroup);

            if (groupName == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvProcessGroup + ": exiting");
                return(false);
            }

            var procIdentifier = Environment.GetEnvironmentVariable(Constants.EnvProcessIdentifier);

            if (procIdentifier == null)
            {
                logger.Log("Can't find environment variable " + Constants.EnvProcessIdentifier + ": exiting");
                return(false);
            }

            var element = new XElement("ProcessDetails");
            var status  = element.ToString();

            string       registration = String.Format("{0}register?guid={1}&group={2}&identifier={3}", serverAddress, jobGuid, groupName, procIdentifier);
            IHttpRequest request      = ClusterInterface.HttpClient.Create(registration);

            request.Timeout = 30 * 1000; // if this doesn't come back quickly, we'll get an exception and quit
            request.Method  = "POST";

            try
            {
                using (Stream upload = request.GetRequestStream())
                {
                    using (StreamWriter sw = new StreamWriter(upload))
                    {
                        sw.Write(status);
                    }
                }

                using (IHttpResponse response = request.GetResponse())
                {
                    logger.Log("Server registration succeeded");
                    return(true);
                }
            }
            catch (NotHttpException e)
            {
                // if this failed, there's nothing much more we can do
                logger.Log("Server registration failed message " + e.Message + " status " + e.Response.StatusCode + ": " + e.Response.StatusDescription);
                return(false);
            }
            catch (Exception e)
            {
                // if this failed, there's nothing much more we can do
                logger.Log("Server registration failed message " + e.Message);
                return(false);
            }
        }