Beispiel #1
0
        private static JobWaitingParams WaitForJobInternal(ClusterSubmitterArgs clusterArgs)
        {
            v2008R2.ISchedulerJob job = clusterArgs.GetV2Job();
            var jobState = job.State;
            //clusterArgs.JobV2.Refresh();
            //clusterArgs.JobState = clusterArgs.JobV2.State;

            JobWaitingParams jobWaitingParams = new JobWaitingParams
            {
                Job              = job,
                JobState         = jobState,
                ManualResetEvent = new ManualResetEvent(false)
            };


            SetupJobEventHandler(jobWaitingParams);

            int heartBeatPeriod = 60 * 1000; // beat once a minute

            // put in a using statement to guarantee dispose will be called and the timer will be shutdown.
            using (Timer timer = HeartbeatTimer(clusterArgs.JobID, clusterArgs.Cluster, jobWaitingParams, heartBeatPeriod))
            {
                //wait
                jobWaitingParams.Job.Refresh();
                if (!JobIsFinished(jobWaitingParams.Job.State))
                {
                    jobWaitingParams.ManualResetEvent.WaitOne();
                }
                timer.Change(Timeout.Infinite, Timeout.Infinite);   // shutdown the timer
            }
            return(jobWaitingParams);
        }
Beispiel #2
0
        private static v2008R2.ISchedulerTask CreateCleanupTask(v2008R2.ISchedulerJob job, string internalRemoteDirectoryName, string stdErrDirName, string stdOutDirName, string name, bool isFinalCleanup)
        {
            v2008R2.ISchedulerTask cleanupTask = job.CreateTask();

            cleanupTask.WorkDirectory  = internalRemoteDirectoryName;
            cleanupTask.Name           = name;
            cleanupTask.IsExclusive    = false;
            cleanupTask.StdErrFilePath = string.Format(@"{0}\{1}.txt", stdErrDirName, name);
            cleanupTask.StdOutFilePath = string.Format(@"{0}\{1}.txt", stdOutDirName, name);

            if (isFinalCleanup)
            {
            }
            else
            {
                cleanupTask.CommandLine = "ECHO The cleanup task is running.";
            }

            return(cleanupTask);
        }
Beispiel #3
0
        private static v2008R2.ISchedulerTask CreateTask(int?taskNumber, ClusterSubmitterArgs clusterArgs, v2008R2.ISchedulerJob job, IDistributable distributableObj, v2008R2.IStringCollection nodesToUse)
        {
            Distribute.Locally local = new Distribute.Locally()
            {
                Cleanup         = false,
                TaskCount       = clusterArgs.TaskCount,
                Tasks           = taskNumber.HasValue ? new RangeCollection(taskNumber.Value) : null,
                ParallelOptions = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = 1
                }
            };

            v2008R2.ISchedulerTask task = job.CreateTask();
            if (nodesToUse != null)
            {
                task.RequiredNodes = nodesToUse;
            }
            if (clusterArgs.NumCoresPerTask != null)
            {
                task.MinimumNumberOfCores = clusterArgs.NumCoresPerTask.Value;
                task.MaximumNumberOfCores = clusterArgs.NumCoresPerTask.Value;
                task.MaximumNumberOfNodes = 1;
                local.ParallelOptions.MaxDegreeOfParallelism = clusterArgs.NumCoresPerTask.Value;
            }
            else if (clusterArgs.IsExclusive)
            {
                //task.MinimumNumberOfCores = 1;
                //task.MaximumNumberOfCores = 8;
                //task.MaximumNumberOfNodes = 1;
            }
            task.WorkDirectory = clusterArgs.ExternalRemoteDirectoryName;

            Distribute.Distribute distributeExe = new Distribute.Distribute()
            {
                Distributable = distributableObj,
                Distributor   = local
            };

            string taskArgString = CreateTaskString(distributeExe, clusterArgs.MinimalCommandLine);
            string exeName       = distributeExe.Distributable is DistributableWrapper ? clusterArgs.ExeName : distributeExe.GetType().Assembly.GetName().Name;

            string taskCommandLine = null;

            if (clusterArgs.UseMPI)
            {
                taskCommandLine = string.Format("mpiexec -n {0} {1}\\{2} {3}", clusterArgs.NumCoresPerTask, clusterArgs.ExeRelativeDirectoryName, exeName, taskArgString);
            }
            else
            {
                taskCommandLine = string.Format("{0}\\{1} {2}", clusterArgs.ExeRelativeDirectoryName, exeName, taskArgString);
            }
            task.CommandLine = taskCommandLine;

            string taskNumberAsString = taskNumber.HasValue ? taskNumber.Value.ToString() : "*";

            task.Name           = Helper.CreateDelimitedString(" ", clusterArgs.Name, taskNumberAsString);
            task.StdErrFilePath = string.Format(@"{0}\{1}.txt", clusterArgs.StdErrDirName, taskNumberAsString);
            task.StdOutFilePath = string.Format(@"{0}\{1}.txt", clusterArgs.StdOutDirName, taskNumberAsString);

            if (task.StdErrFilePath.Length >= 160)
            {
                Console.WriteLine("Caution, std error file path is {0} characters, which will probably cause HPC to crash.", task.StdErrFilePath.Length);
            }

            return(task);
        }
Beispiel #4
0
        private static v2008R2.ISchedulerTask AddCleanupTaskToJob(ClusterSubmitterArgs clusterArgs, v2008R2.IScheduler scheduler, v2008R2.ISchedulerJob job, IDistributable distributableJob)
        {
            v2008R2.ISchedulerCollection taskList        = job.GetTaskList(scheduler.CreateFilterCollection(), scheduler.CreateSortCollection(), true);
            v2008R2.IStringCollection    dependencyTasks = scheduler.CreateStringCollection();

            if (!clusterArgs.OnlyDoCleanup)
            {
                dependencyTasks.Add(((v2008R2.ISchedulerTask)taskList[0]).Name);
            }
            v2008R2.ISchedulerTask cleanupTask = CreateCleanupTask(job, clusterArgs.ExternalRemoteDirectoryName, clusterArgs.StdErrDirName, clusterArgs.StdOutDirName, "cleanup", true);

            Distribute.Locally local = new Distribute.Locally()
            {
                Cleanup         = true,
                TaskCount       = clusterArgs.TaskCount,
                Tasks           = new RangeCollection(),
                ParallelOptions = new ParallelOptions()
                {
                    MaxDegreeOfParallelism = 1
                }
            };

            Distribute.Distribute distributeExe = new Distribute.Distribute()
            {
                Distributor   = local,
                Distributable = distributableJob
            };

            string exeName = distributableJob is DistributableWrapper ? clusterArgs.ExeName : distributeExe.GetType().Assembly.GetName().Name;

            //args.AddOptionalFlag("cleanup");
            //args.AddOptional("tasks", "empty");
            string taskCommandLine = string.Format("{0}\\{1} {2}", clusterArgs.ExeRelativeDirectoryName, exeName, CreateTaskString(distributeExe, clusterArgs.MinimalCommandLine));

            cleanupTask.CommandLine = taskCommandLine;

            if (!clusterArgs.OnlyDoCleanup)
            {
                cleanupTask.DependsOn = dependencyTasks;
            }
            job.AddTask(cleanupTask);
            return(cleanupTask);
        }
Beispiel #5
0
        private static v2008R2.IStringCollection GetNodesToUse(ClusterSubmitterArgs clusterArgs, v2008R2.IScheduler scheduler, v2008R2.ISchedulerJob job)
        {
            job.AutoCalculateMax = false;
            job.AutoCalculateMin = false;
            var availableNodes = scheduler.GetNodeList(null, null);

            v2008R2.IStringCollection nodesToUse = scheduler.CreateStringCollection();
            List <string>             nodesFound = new List <string>();

            foreach (var node in availableNodes)
            {
                string nodeName = ((Microsoft.Hpc.Scheduler.SchedulerNode)node).Name;
                if (!clusterArgs.NodeExclusionList.Contains(nodeName))
                {
                    nodesToUse.Add(nodeName);
                }
                else
                {
                    nodesFound.Add(nodeName);
                }
            }
            Helper.CheckCondition(nodesFound.Count != clusterArgs.NodeExclusionList.Count, "not all nodes in exclusion list found: check for typo " + clusterArgs.NodeExclusionList);

            return(nodesToUse);
        }
Beispiel #6
0
        private static void SubmitViaAPI3(ClusterSubmitterArgs clusterArgs, IDistributable distributableObj)
        {
            Console.WriteLine(string.Format("Connecting to cluster {0} using API version 3 .", clusterArgs.Cluster));

            using (v2008R2.IScheduler scheduler = new v2008R2.Scheduler())
            {
                scheduler.Connect(clusterArgs.Cluster);
                v2008R2.ISchedulerJob job = scheduler.CreateJob();
                job.Name     = clusterArgs.Name;
                job.Priority = (v2008R2.Properties.JobPriority)clusterArgs.ApiPriority;

                if (clusterArgs.JobTemplate != null)
                {
                    Microsoft.Hpc.Scheduler.IStringCollection jobTemplates = scheduler.GetJobTemplateList();
                    string decodedJobTemplate = HttpUtility.UrlDecode(clusterArgs.JobTemplate);
                    if (jobTemplates.Contains(decodedJobTemplate))
                    {
                        job.SetJobTemplate(decodedJobTemplate);
                    }
                    else
                    {
                        Console.WriteLine("Job template '" + decodedJobTemplate + "' does not exist at specified cluster. Existing templates are:");
                        foreach (var template in jobTemplates)
                        {
                            Console.Write("'" + template + "' ");
                        }
                        Console.WriteLine("\nUsing Default job template...");
                    }
                }


                if (clusterArgs.NumCoresPerTask != null)
                {
                    clusterArgs.IsExclusive = false;
                }

                v2008R2.IStringCollection nodesToUse = null;

                if (clusterArgs.NodeExclusionList != null && clusterArgs.NodeExclusionList.Count > 0)
                {
                    nodesToUse = GetNodesToUse(clusterArgs, scheduler, job);
                }
                else if (clusterArgs.NodesToUseList != null && clusterArgs.NodesToUseList.Count > 0)
                {
                    nodesToUse = scheduler.CreateStringCollection();
                    foreach (string nodeName in clusterArgs.NodesToUseList)
                    {
                        nodesToUse.Add(nodeName);
                    }
                }
                else if (clusterArgs.NumCoresPerTask != null)
                {
                    job.AutoCalculateMax = true;
                    job.AutoCalculateMin = true;
                }
                else if (clusterArgs.IsExclusive)
                {
                    job.UnitType = Microsoft.Hpc.Scheduler.Properties.JobUnitType.Node;
                    if (clusterArgs.MinimumNumberOfNodes != null)
                    {
                        job.MaximumNumberOfNodes = clusterArgs.MaximumNumberOfNodes.Value;
                        job.MinimumNumberOfNodes = clusterArgs.MinimumNumberOfNodes.Value;
                    }
                }
                else if (clusterArgs.MinimumNumberOfCores != null)
                {
                    job.MaximumNumberOfCores = clusterArgs.MaximumNumberOfCores.Value;
                    Helper.CheckCondition(clusterArgs.MinimumNumberOfCores != null, "must provide both MinCores and MaxCores, not just one");
                    job.MinimumNumberOfCores = clusterArgs.MinimumNumberOfCores.Value;
                    job.AutoCalculateMax     = false;
                    job.AutoCalculateMin     = false;
                }
                else
                {
                    job.AutoCalculateMax = true;
                    job.AutoCalculateMin = true;
                }


                //bool checkIfValid = ValidateParamsOrNull != null;

                if (!clusterArgs.OnlyDoCleanup)
                {
                    if (clusterArgs.TaskRange.IsContiguous())
                    {
                        if (clusterArgs.TaskRange.LastElement > clusterArgs.TaskCount - 1)
                        {
                            clusterArgs.TaskRange = new RangeCollection(clusterArgs.TaskRange.FirstElement, clusterArgs.TaskCount - 1);
                        }
                        v2008R2.ISchedulerTask task = CreateTask(null, clusterArgs, job, distributableObj, nodesToUse);

                        task.IsParametric = true; // IsParametric is marked as obsolete. But is it necessary to submit to a v2 cluster??

                        //task.Type = TaskType.ParametricSweep;

                        task.StartValue = 0;
                        task.EndValue   = clusterArgs.TaskCount - 1;

                        job.AddTask(task);
                    }
                    else
                    {
                        job.AddTasks(clusterArgs.TaskRange.Select(taskNum => CreateTask((int)taskNum, clusterArgs, job, distributableObj, nodesToUse)).ToArray());
                    }
                }
                else
                {
                    clusterArgs.Cleanup = true;
                }

                v2008R2.ISchedulerTask cleanupTask = null;
                if (clusterArgs.Cleanup)
                {
                    cleanupTask = AddCleanupTaskToJob(clusterArgs, scheduler, job, distributableObj);
                }

                Console.WriteLine("Submitting job.");
                scheduler.SubmitJob(job, null, null);
                clusterArgs.JobID = job.Id;
                Console.WriteLine(job.Name + " submitted.");
            }
        }