Exemplo n.º 1
0
 private ISchedulerCounters GetCounters()
 {
     try
     {
         ISchedulerCounters counters = _scheduler.GetCounters();
         return(counters);
     }
     catch (Exception)
     {
         _scheduler = null;
         return(null);
     }
 }
Exemplo n.º 2
0
        protected static int GetFreeNodes(IScheduler scheduler, string cluster)
        {
            int res = 0;

            try
            {
                scheduler.Connect(cluster);
                ISchedulerCounters ctrs = scheduler.GetCounters();
                res = ctrs.IdleCores;
            }
            catch
            {
                // That's okay.
            }
            return(res);
        }
Exemplo n.º 3
0
        static void Main(string[] args)
        {
            string[] hpcNames = { "server1", "server2" };

            Scheduler scheduler = new Scheduler();

            foreach (string serverPath in hpcNames)
            {
                try
                {
                    scheduler.Connect(serverPath);
                    //Console.WriteLine("connected to {0}", serverPath);

                    ISchedulerCounters schedulerCounters = scheduler.GetCounters();
                    int runningJobs = schedulerCounters.RunningJobs;
                    Console.WriteLine("[{0}] running jobs (queue): {1}", serverPath, runningJobs);
                }
                catch (Exception e)
                {
                    Console.WriteLine("[{0}] Not accessible", serverPath);
                }
            }
        }
Exemplo n.º 4
0
        public void SubmitHPCJob(string db, bool isNew, int newID, string cluster, string nodegroup, int priority,
                                 string locality, string limitsMin, string limitsMax, string sharedDir,
                                 string executor,
                                 string jobTemplate,
                                 int jobTimeout, int taskTimeout,
                                 int nworkers = 0)
        {
            string limitsMinTrimmed = limitsMin.Trim();
            string limitsMaxTrimmed = limitsMax.Trim();

            SqlConnection sql = Connect(db);
            SqlCommand    cmd = null;

            scheduler.Connect(cluster);
            ISchedulerJob hpcJob = scheduler.CreateJob();

            if (jobTemplate != null)
            {
                hpcJob.SetJobTemplate(jobTemplate);
            }
            if (jobTimeout != 0)
            {
                hpcJob.Runtime = jobTimeout;
            }
            hpcJob.FailOnTaskFailure = false;

            try
            {
                if (nodegroup != "<Any>")
                {
                    hpcJob.NodeGroups.Add(nodegroup);
                }
                hpcJob.Name        = "Z3 Performance Test (" + newID + ")";
                hpcJob.IsExclusive = true;
                hpcJob.CanPreempt  = true;
                SetPriority(hpcJob, priority);
                hpcJob.Project = "Z3";

                if (locality == "Socket")
                {
                    hpcJob.UnitType = JobUnitType.Socket;
                }
                else if (locality == "Core")
                {
                    hpcJob.UnitType = JobUnitType.Core;
                }
                else if (locality == "Node")
                {
                    hpcJob.UnitType = JobUnitType.Node;
                }
                else
                {
                    throw new Exception("Unknown locality.");
                }

                uint min = 0;
                uint max = 0;

                if (limitsMinTrimmed != "")
                {
                    try { min = Convert.ToUInt32(limitsMinTrimmed); }
                    catch { min = 0; }
                }
                if (limitsMax != "")
                {
                    try { max = Convert.ToUInt32(limitsMaxTrimmed); }
                    catch { max = 0; }
                }

                ISchedulerCounters ctrs = scheduler.GetCounters();
                if (locality == "Socket")
                {
                    if (min > 0)
                    {
                        hpcJob.MinimumNumberOfSockets = (int)min;
                    }
                    max = ((max == 0) ? (uint)ctrs.TotalSockets: max);
                    hpcJob.MaximumNumberOfSockets = (int)max;
                }
                else if (locality == "Core")
                {
                    if (min > 0)
                    {
                        hpcJob.MinimumNumberOfCores = (int)min;
                    }
                    max = ((max == 0) ? (uint)ctrs.TotalCores : max);
                    hpcJob.MaximumNumberOfCores = (int)max;
                }
                else if (locality == "Node")
                {
                    if (min > 0)
                    {
                        hpcJob.MinimumNumberOfNodes = (int)min;
                    }
                    max = ((max == 0) ? (uint)ctrs.TotalNodes: max);
                    hpcJob.MaximumNumberOfNodes = (int)max;
                }

                uint progressTotal = max + 3;

                // Add population task.
                if (WorkerReportsProgress)
                {
                    ReportProgress(Convert.ToInt32(100.0 * 1 / (double)max));
                }
                ISchedulerTask populateTask = hpcJob.CreateTask();
                SetResources(populateTask, locality);
                populateTask.IsRerunnable = false;
                populateTask.IsExclusive  = false;
                // populateTask.WorkDirectory = sharedDir;
                //populateTask.CommandLine = executor + " " + newID + " ? \"" + db + "\"";
                populateTask.CommandLine = "pushd " + sharedDir + " & " + Path.GetFileName(executor) + " " + newID + " ? \"" + db + "\"";
                populateTask.Name        = "Populate";
                if (taskTimeout != 0)
                {
                    populateTask.Runtime = taskTimeout;
                }
                populateTask.FailJobOnFailure = true;
                hpcJob.AddTask(populateTask);

                for (int i = 0; i < max; i++)
                {
                    // Add worker task.
                    if (WorkerReportsProgress)
                    {
                        ReportProgress(Convert.ToInt32(100.0 * (i + 1) / (double)max));
                    }
                    ISchedulerTask task = hpcJob.CreateTask();
                    SetResources(task, locality);
                    // task.WorkDirectory = sharedDir;
                    // task.CommandLine = executor + " " + newID + " \"" + db + "\"";
                    task.CommandLine  = "pushd " + sharedDir + " & " + Path.GetFileName(executor) + " " + newID + " \"" + db + "\"";
                    task.IsExclusive  = false;
                    task.IsRerunnable = true;
                    task.DependsOn.Add("Populate");
                    task.Name = "Worker";
                    if (taskTimeout != 0)
                    {
                        task.Runtime = taskTimeout;
                    }
                    populateTask.FailJobOnFailure = false;
                    hpcJob.AddTask(task);
                }

                // Add recovery task.
                if (WorkerReportsProgress)
                {
                    ReportProgress(Convert.ToInt32(100.0 * (progressTotal - 1) / (double)max));
                }
                ISchedulerTask rTask = hpcJob.CreateTask();
                SetResources(rTask, locality);
                rTask.IsRerunnable = true;
                rTask.IsExclusive  = false;
                // rTask.WorkDirectory = sharedDir;
                // rTask.CommandLine = executor + " " + newID + " ! \"" + db + "\"";
                rTask.CommandLine = "pushd " + sharedDir + " & " + Path.GetFileName(executor) + " " + newID + " ! \"" + db + "\"";
                rTask.DependsOn.Add("Worker");
                rTask.Name = "Recovery";
                if (taskTimeout != 0)
                {
                    rTask.Runtime = taskTimeout;
                }
                rTask.FailJobOnFailure = true;
                hpcJob.AddTask(rTask);

                // Add deletion task.
                if (WorkerReportsProgress)
                {
                    ReportProgress(Convert.ToInt32(100.0 * (progressTotal) / (double)max));
                }
                ISchedulerTask dTask = hpcJob.CreateTask();
                SetResources(dTask, locality);
                dTask.IsRerunnable = true;
                dTask.IsExclusive  = false;
                // dTask.WorkDirectory = sharedDir;
                // dTask.CommandLine = "del " + sharedDir + "\\" + executor;
                dTask.CommandLine = "pushd " + sharedDir + " & del " + Path.GetFileName(executor);
                dTask.Name        = "Delete worker";
                dTask.DependsOn.Add("Recovery");
                if (taskTimeout != 0)
                {
                    dTask.Runtime = taskTimeout;
                }
                dTask.FailJobOnFailure = false;
                hpcJob.AddTask(dTask);

                scheduler.AddJob(hpcJob);
                scheduler.SubmitJob(hpcJob, null, null);

                if (isNew)
                {
                    cmd = new SqlCommand("UPDATE Experiments SET ClusterJobID=" + hpcJob.Id.ToString() + " WHERE ID=" + newID.ToString() + "; ", sql);
                    cmd.CommandTimeout = 0;
                    cmd.ExecuteNonQuery();
                }
            }
            catch (Exception ex)
            {
                cmd = new SqlCommand("DELETE FROM JobQueue WHERE ExperimentID=" + newID + "; DELETE FROM Experiments WHERE ID=" + newID, sql);
                cmd.CommandTimeout = 0;
                cmd.ExecuteNonQuery();
                if (hpcJob.State == JobState.Configuring ||
                    hpcJob.State == JobState.ExternalValidation ||
                    hpcJob.State == JobState.Queued ||
                    hpcJob.State == JobState.Running ||
                    hpcJob.State == JobState.Submitted ||
                    hpcJob.State == JobState.Validating)
                {
                    try { scheduler.CancelJob(hpcJob.Id, "Aborted."); }
                    catch (Exception) { }
                }
                throw ex;
            }

            //return totalJobs;
        }
Exemplo n.º 5
0
        public void SubmitCatchall(string db, string cluster, string locality, int priority, string nodegroup, string executor, string min, string max, string jobTemplate, int jobTimeout, int taskTimeout)
        {
            scheduler.Connect(cluster);
            ISchedulerJob hpcJob = scheduler.CreateJob();

            if (jobTemplate != null)
            {
                hpcJob.SetJobTemplate(jobTemplate);
            }
            if (jobTimeout != 0)
            {
                hpcJob.Runtime = jobTimeout;
            }
            try
            {
                if (nodegroup != "<Any>")
                {
                    hpcJob.NodeGroups.Add(nodegroup);
                }
                hpcJob.Name        = "Z3 Performance Test (catchall)";
                hpcJob.IsExclusive = true;
                hpcJob.CanPreempt  = true;
                SetPriority(hpcJob, priority);
                hpcJob.Project = "Z3";

                uint   fmin             = 0;
                uint   fmax             = 0;
                string limitsMinTrimmed = min.Trim();
                string limitsMaxTrimmed = max.Trim();

                if (limitsMinTrimmed != "")
                {
                    try { fmin = Convert.ToUInt32(limitsMinTrimmed); }
                    catch { fmin = 0; }
                }
                if (limitsMaxTrimmed != "")
                {
                    try { fmax = Convert.ToUInt32(limitsMaxTrimmed); }
                    catch { fmax = 0; }
                }

                ISchedulerCounters ctrs = scheduler.GetCounters();
                if (locality == "Socket")
                {
                    hpcJob.UnitType = JobUnitType.Socket;
                    if (fmin > 0)
                    {
                        hpcJob.MinimumNumberOfSockets = (int)fmin;
                    }
                    fmax = ((fmax == 0) ? (uint)ctrs.TotalSockets : fmax);
                    hpcJob.MaximumNumberOfSockets = (int)fmax;
                }
                else if (locality == "Core")
                {
                    hpcJob.UnitType = JobUnitType.Core;
                    if (fmin > 0)
                    {
                        hpcJob.MinimumNumberOfCores = (int)fmin;
                    }
                    fmax = ((fmax == 0) ? (uint)ctrs.TotalCores : fmax);
                    hpcJob.MaximumNumberOfCores = (int)fmax;
                }
                else if (locality == "Node")
                {
                    hpcJob.UnitType = JobUnitType.Node;
                    if (fmin > 0)
                    {
                        hpcJob.MinimumNumberOfNodes = (int)fmin;
                    }
                    fmax = ((fmax == 0) ? (uint)ctrs.TotalNodes : fmax);
                    hpcJob.MaximumNumberOfNodes = (int)fmax;
                }

                for (int i = 0; i < fmax; i++)
                {
                    // Add worker task.
                    if (WorkerReportsProgress)
                    {
                        ReportProgress(Convert.ToInt32(100.0 * (i + 1) / (double)fmax));
                    }
                    ISchedulerTask task = hpcJob.CreateTask();
                    SetResources(task, locality);
                    task.WorkDirectory = Path.GetDirectoryName(Path.GetFullPath(executor));
                    task.CommandLine   = "pushd " + Path.GetDirectoryName(Path.GetFullPath(executor)) + " & " + Path.GetFileName(executor) + " \"" + db + "\"";
                    // task.CommandLine = Path.GetFileName(executor) + " \"" + db + "\"";
                    task.IsExclusive  = false;
                    task.IsRerunnable = true;
                    task.Name         = "Worker";
                    task.Runtime      = taskTimeout;

                    hpcJob.AddTask(task);
                }

                scheduler.AddJob(hpcJob);
                scheduler.SubmitJob(hpcJob, null, null);
            }
            catch (Exception ex)
            {
                scheduler.CancelJob(hpcJob.Id, "Aborted.");
                throw ex;
            }
        }