public TaskListViewModel(ISchedulerJob job, IEnumerable <VM> activeVMList, Scheduler hpcSched, CompositeCommand _CancelTaskListCommand, CompositeCommand _RequeueTaskListCommand) : base() { TaskList = new ObservableCollection <TaskViewModel>(); SelectAllCommand = new CompositeCommand(); UnselectAllCommand = new CompositeCommand(); CancelTaskListCommand = _CancelTaskListCommand; this.RequeueTaskListCommand = _RequeueTaskListCommand; //IEnumerable<VM> activeVMList = VMModel.GetInstance().ActiveVMList; try { //scheduler.Connect(JobListViewModel.MainNodeName); job.Refresh(); ISchedulerCollection tasks = job.GetTaskList(null, null, true); var query = from ISchedulerTask task in tasks where activeVMList.Any(j => task.AllocatedNodes.Contains(j.GetDomainName())) select task; foreach (ISchedulerTask task in query) { TaskList.Add(new TaskViewModel(task, job, this)); } } catch (Exception ex) { } //foreach (ISchedulerTask task in job.GetTaskList(null, null, true)) //{ // TaskList.Add(new TaskViewModel(task, job)); //} }
static void Main(string[] args) { IScheduler scheduler = new Scheduler(); string clustername = null; string username = null; if (args.Length != 2) { Console.Error.WriteLine("Usage: Finish clustername username "); return; } clustername = args[0]; username = args[1]; scheduler.Connect(clustername); ISchedulerJob job = scheduler.CreateJob(); job.UnitType = JobUnitType.Core; job.MinimumNumberOfCores = 1; job.MaximumNumberOfCores = 1; scheduler.AddJob(job); ISchedulerTask task = job.CreateTask(); task.CommandLine = @"ping -t localhost"; job.AddTask(task); scheduler.SubmitJob(job, username, null); Console.WriteLine("job {0} Submitted ", job.Id); Thread.Sleep(12 * 1000); job.Refresh(); Console.WriteLine("Job {0} State {1}", job.Id, job.State); ((ISchedulerJobV3)job).Finish(); Thread.Sleep(10000); job.Refresh(); task.Refresh(); Console.WriteLine("After finish Job {0} State {1} message {2}", job.Id, job.State, task.Output); }
static bool submitJobs() { //wait for a maximum of one minute for scheduler connect before exiting if (connected.WaitOne(1 * 1000)) //timesout in one second { //create a job equivalent to "job submit echo Hello World" ISchedulerJob job = scheduler.CreateJob(); ISchedulerTask task = job.CreateTask(); task.CommandLine = "echo Hello World"; job.AddTask(task); scheduler.SubmitJob(job, null, null); job.Refresh(); Console.WriteLine("Job {0} was submitted", job.Id); Thread.Sleep(2 * 1000); //pause for 2 seconds return(true); } return(false); }
/// <summary> /// Requeues failed and canceled tasks for the given job. First checks that there are any, otherwise won't do anything. /// This is thread safe, locking on the job. /// </summary> /// <param name="scheduler"></param> /// <param name="job"></param> public static void RequeueFailedAndCanceledTasks(IScheduler scheduler, ISchedulerJob job) { lock (job) { job.Refresh(); var counters = job.GetCounters(); if (counters.FailedTaskCount > 0 || counters.CanceledTaskCount > 0) { var failedTasks = GetFailedAndCanceledTasks(scheduler, job); foreach (ISchedulerTask task in failedTasks) { job.RequeueTask(task.TaskId); } if (job.State != Microsoft.Hpc.Scheduler.Properties.JobState.Running) { scheduler.ConfigureJob(job.Id); scheduler.SubmitJob(job, null, null); } } } }
static void Main(string[] args) { //change the headnode name here const string headnode = "[headnode]"; const string serviceName = "EchoService"; const int numRequests = 8; SessionStartInfo info = new SessionStartInfo(headnode, serviceName); //the sample code needs at least 2 cores in the cluster info.SessionResourceUnitType = SessionUnitType.Core; info.MaximumUnits = 2; info.MinimumUnits = 2; Console.Write("Creating a session for EchoService..."); using (DurableSession session = DurableSession.CreateSession(info)) { Console.WriteLine("done session id = {0}", session.Id); NetTcpBinding binding = new NetTcpBinding(SecurityMode.Transport); using (BrokerClient <IService1> client = new BrokerClient <IService1>(session, binding)) { Console.Write("Sending {0} requests...", numRequests); for (int i = 0; i < numRequests; i++) { EchoOnExitRequest request = new EchoOnExitRequest(new TimeSpan(0, 0, 5)); client.SendRequest <EchoOnExitRequest>(request, i); } client.EndRequests(); Console.WriteLine("done"); // cancel half of the service tasks when processing the requests ThreadPool.QueueUserWorkItem(delegate { //wait 5 seconds to try cancel service tasks. Thread.Sleep(3 * 1000); try { Scheduler scheduler = new Scheduler(); try { scheduler.Connect(headnode); } catch (Exception e) { Console.WriteLine("Error connecting store.{0}", e.ToString()); return; } int jobId = session.GetProperty <int>("HPC_ServiceJobId"); ISchedulerJob job = scheduler.OpenJob(jobId); job.Refresh(); ISchedulerCollection taskList = job.GetTaskList(null, null, true); int onFlag = 0; foreach (ISchedulerTask task in taskList) { // cancel half of the service tasks if (onFlag++ % 2 == 0) { try { if (task.State == TaskState.Running) { Console.WriteLine("Try to cancel task {0}", task.TaskId); job.CancelTask(task.TaskId); job.Commit(); } } catch (Exception ex) { Console.WriteLine("Got exception when trying to cancel task {0}:{1}", task.TaskId, ex.Message); } } } } catch (Exception ex) { Console.WriteLine("Exception when trying to cancel the service tasks. {0}", ex.Message); } }); Console.WriteLine("Retrieving responses..."); try { int count = 0; foreach (var response in client.GetResponses <EchoOnExitResponse>()) { try { string reply = response.Result.EchoOnExitResult; Console.WriteLine("\tReceived response for request {0}: {1}", response.GetUserData <int>(), reply); count++; } catch (Exception ex) { Console.WriteLine("Error occured while processing {0}-th request: {1}", response.GetUserData <int>(), ex.Message); } } Console.WriteLine("Done retrieving responses.{0}/{1} responses retrieved ", count, numRequests); } catch (SessionException ex) { Console.WriteLine("SessionException while getting responses: {0}", ex.Message); } catch (Exception ex) { Console.WriteLine("Exception while getting responses: {0}", ex.Message); } } // Close connections and delete messages stored in the system session.Close(); Console.WriteLine("Press any key to exit."); Console.ReadKey(); } }
//for best results, run this sample code in queued scheduling mode static void Main(string[] args) { string clusterName = Environment.GetEnvironmentVariable("CCP_SCHEDULER"); using (IScheduler scheduler = new Scheduler()) { Console.WriteLine("Connecting to {0}", clusterName); scheduler.Connect(clusterName); //assume you have two nodegroups, NodeGroup1 and NodeGroup2 IStringCollection nodeGroup1 = scheduler.GetNodesInNodeGroup("NodeGroup1"); IStringCollection nodeGroup2 = scheduler.GetNodesInNodeGroup("NodeGroup2"); if (nodeGroup1.Count == 0 || nodeGroup2.Count == 0) { Console.WriteLine("Node groups are not set up correctly"); return; } //and nodes in NodeGroup2 are not in NodeGroup1, and vise versa. string nodeToMove = ""; foreach (string node in nodeGroup2) { if (!nodeGroup1.Contains(node)) { nodeToMove = node; break; } } if (string.IsNullOrEmpty(nodeToMove)) { Console.WriteLine("No eligible nodes to move"); return; } //create a job to run on NodeGroup1 ISchedulerJob job = scheduler.CreateJob(); job.NodeGroups.Add("NodeGroup1"); //Set unit type to node, but let it autocalculate resources job.UnitType = JobUnitType.Node; ISchedulerTask task = job.CreateTask(); task.CommandLine = "ver"; task.Type = TaskType.Service; job.AddTask(task); job.OnTaskState += new EventHandler <TaskStateEventArg>(job_OnTaskState); Console.WriteLine("Submitting job on NodeGroup1"); scheduler.SubmitJob(job, null, null); Console.WriteLine("Job {0} Submitted", job.Id); //wait for the job to start running running.WaitOne(); job.Refresh(); int allocationCount = job.AllocatedNodes.Count; Console.WriteLine("Number of allocated nodes: {0}", allocationCount); //Check the status of NodeGroup1 nodes int idleCores = 0; foreach (string nodename in nodeGroup1) { ISchedulerNode node = scheduler.OpenNodeByName(nodename); idleCores += node.GetCounters().IdleCoreCount; } //There are no more idle cores remaining in this node group //So we'll place one of the nodes from NodeGroup2 allow the job to grow if (idleCores == 0) { running.Reset(); //Changing nodegroups is available through the UI or PowerShell string powershellScript = String.Format("add-pssnapin microsoft.hpc; " + "add-hpcgroup -scheduler {0} -name {1} -nodename {2}", clusterName, "NodeGroup1", nodeToMove); using (PowerShell ps = PowerShell.Create()) { ps.AddScript(powershellScript, true); ps.Invoke(); } running.WaitOne(); Console.WriteLine("(Waiting 5 seconds for job to update the scheduler)"); Thread.Sleep(5 * 1000); job.Refresh(); int newAllocationCount = job.AllocatedNodes.Count; //verify that job has grown if (newAllocationCount > allocationCount) { Console.WriteLine("Job has grown to {0} nodes", newAllocationCount); } } else { Console.WriteLine("There are still idle cores in the nodegroup"); } } }
static void Main(string[] args) { string clusterName = Environment.GetEnvironmentVariable("CCP_SCHEDULER"); //create a scheduler object used to connect to the scheduler using (IScheduler scheduler = new Scheduler()) { //connect to the scheduler Console.WriteLine("Connecting to cluster {0}", clusterName); scheduler.Connect(clusterName); //create a job equilvalent to the cmdline string: job submit /parametric:1-500 "echo *" Console.WriteLine("Creating parametric sweep job"); //first create a SchedulerJob object ISchedulerJob job = scheduler.CreateJob(); //and a task object ISchedulerTask task = job.CreateTask(); //set the command line to "echo *" task.CommandLine = "echo *"; //and we set the parametric task settings task.Type = TaskType.ParametricSweep; task.StartValue = 1; task.IncrementValue = 1; task.EndValue = 500; //add the task to the job job.AddTask(task); //Create an event handler so that we know when the job starts running job.OnJobState += new EventHandler <JobStateEventArg>(job_OnJobState); //and submit //you will be prompted for your credentials if they aren't already cached Console.WriteLine("Submitting job..."); scheduler.SubmitJob(job, null, null); Console.WriteLine("Job submitted"); //Wait for the job to start running jobStatus.WaitOne(); jobStatus.Reset(); //you can get realtime updates on the job through the api //we'll keep checking every second for 5 seconds for (int i = 0; i < 5; i++) { //refresh the job object with updates from the cluster job.Refresh(); Console.Write("Current job progress: " + job.Progress); Console.SetCursorPosition(0, Console.CursorTop); //we want to check again after a second Thread.Sleep(1 * 1000); } //this field isn't read-only. You can specify your own progress value depending on your needs Console.WriteLine(); Console.WriteLine("Manually changing job progress"); job.Progress = 0; //commit the changes to the server job.Commit(); Console.WriteLine("Current job progress: " + job.Progress); //you can also set progress messages, which will also be viewable in the Job Management UI Console.WriteLine("Setting job progress message"); job.ProgressMessage = "Job is still running"; //commit the changes to the server job.Commit(); Console.WriteLine("Progress message: " + job.ProgressMessage); //Wait for the job to finish Console.WriteLine("Waiting for the job to finish..."); jobStatus.WaitOne(); //job.Progress will no longer increment automatically //the job will finish regardless of the value of job.Progress Console.WriteLine("Finished job progress: " + job.Progress); //close the scheduler connection scheduler.Close(); } }
public static bool CreateJob(int endValue) { string headnode = ConfigurationManager.AppSettings["HeadNodeName"]; string targetNodes = ConfigurationManager.AppSettings["NodeGroup"]; bool retVal = false; if (!string.IsNullOrEmpty(headnode)) { try { Scheduler scheduler = new Scheduler(); scheduler.Connect(headnode); // Define job settings ISchedulerJob job = scheduler.CreateJob(); job.Name = "Aqsis on Azure"; job.MinimumNumberOfCores = 1; job.MaximumNumberOfCores = 1; job.UnitType = JobUnitType.Core; // Let the scheduler calculate the required resources for the job job.AutoCalculateMax = true; job.NodeGroups.Add(targetNodes); // Create a parametric sweep task ISchedulerTask task = job.CreateTask(); task.Type = TaskType.ParametricSweep; task.StartValue = 0; task.EndValue = endValue; task.IncrementValue = 1; // Run the aqsis command to render the images // The (*) wildcard is used as a placeholder for the current index value task.CommandLine = @"%CCP_PACKAGE_ROOT%\Aqsis\bin\run.cmd frame-*"; task.WorkDirectory = "%CCP_PACKAGE_ROOT%"; Console.WriteLine("Running job"); job.AddTask(task); scheduler.SubmitJob(job, username: null, password: null); job.Refresh(); while (job.State != JobState.Finished && job.State != JobState.Canceled && job.State != JobState.Failed) { // Wait for the job to complete Thread.Sleep(5000); job.Refresh(); } switch (job.State) { case JobState.Canceled: Console.WriteLine("Job canceled"); break; case JobState.Finished: Console.WriteLine("Job finished"); retVal = true; break; case JobState.Failed: Console.WriteLine("Job failed"); break; } } catch (Exception ex) { Utility.Logger("CreateJob Failed. Exception Message: " + ex.Message); } } return(retVal); }
/// <summary> /// Waits for the specified job to reach a terminal state of Finished, Failed or Canceled /// </summary> /// <param name="scheduler"></param> /// <param name="job"></param> static void WaitForJob(IScheduler scheduler, ISchedulerJob job) { const JobState exitStates = JobState.Finished | JobState.Failed | JobState.Canceled; ManualResetEvent checkJobState = new ManualResetEvent(false); // Event handler for when the job state changes EventHandler <JobStateEventArg> jobStatusCheck = (sender, e) => { Console.WriteLine(String.Format(" Job {0} state is now {1}.", job.Id, e.NewState)); if ((e.NewState & exitStates) != 0) { checkJobState.Set(); } }; // Event handler for when the eventing channel gets reconnected after a failure EventHandler <ConnectionEventArg> schedulerConnectionEvent = (sender, e) => { if (e.Code == ConnectionEventCode.EventReconnect) { Console.WriteLine(" Reconnect event detected"); //signal the thread to recheck the job state since the job state event may have been missed // while we were disconnected. checkJobState.Set(); } else { Console.WriteLine(String.Format(" schedulerConnectionEvent {0}.", e.Code)); } }; Console.WriteLine(String.Format("Waiting for job {0}...", job.Id)); // Register event handlers before checkJobState is Reset job.OnJobState += jobStatusCheck; scheduler.OnSchedulerReconnect += schedulerConnectionEvent; try { do { checkJobState.Reset(); // Always Reset before job.Refresh to avoid losing state transitions job.Refresh(); if ((job.State & exitStates) != 0) { Console.WriteLine(String.Format("Job {0} completed with state {1}.", job.Id, job.State)); return; } checkJobState.WaitOne(); } while (true); } finally { // must unregester handlers using the same job and scheduler objects that were used to register them above // see comment "Register event handlers" job.OnJobState -= jobStatusCheck; scheduler.OnSchedulerReconnect -= schedulerConnectionEvent; } }
static int Main(string[] args) { Dictionary <string, string> arguments = getArgs(args); string userName = string.Empty; string password = string.Empty; int secs = 60; string clusterName = "localhost"; if (arguments.ContainsKey("scheduler")) { clusterName = arguments["scheduler"]; } try { if (arguments.ContainsKey("wait")) { secs = int.Parse(arguments["wait"]); } } catch (Exception e) { warn("" + DateTime.Now.ToString() + "\t Invalid sleep argument ! Switching ti default (" + secs + ")"); warn(e.StackTrace); } Dictionary <int, ISchedulerJob> prevRunningJobs = new Dictionary <int, ISchedulerJob>(); Dictionary <int, ISchedulerJob> runningJobs = new Dictionary <int, ISchedulerJob>(); HashSet <int> prev = new HashSet <int>(); HashSet <int> curr = new HashSet <int>(); HashSet <int> diff = new HashSet <int>(); IScheduler scheduler = new Scheduler(); try { scheduler.Connect(clusterName); } catch (Exception e) { System.Console.Error.WriteLine("" + DateTime.Now.ToString() + "\t Could not connect to " + clusterName + " ! Exiting !!!"); System.Console.Error.WriteLine(e.StackTrace); return(-1); } try { if (arguments.ContainsKey("outputPath")) { outputPath = arguments["outputPath"]; if (!File.Exists(outputPath)) { outputPath = "."; } } else { outputPath = "."; outputPath = "."; } } catch (Exception e) { debug("\t Could not use path"); debug(e.StackTrace); } string timeStampFile = string.Format("{0}\\jobTimeStampFile.txt", outputPath); debug("\t timestampfile: path=" + timeStampFile + ""); if (File.Exists(timeStampFile)) { DateTime dt = getMinimumStartDateFromFile(timeStampFile); debug("\t Using date from timestampfile: path=" + timeStampFile + "\n" + "\t Time Used : " + dt.ToString()); if ((DateTime.Now - dt).Hours < 24) { Grid tempGrid = new Grid(scheduler); Dictionary <int, ISchedulerJob> jobs = tempGrid.GetJobs(dt); ISchedulerCollection jobValues = tempGrid.lastGetJobsRun; // foreach (ISchedulerJob job in jobs.Values) foreach (ISchedulerJob job in jobValues) { System.Console.WriteLine(getLogLine(job).Trim()); } } else { debug("\t Serice stop time is more than 24hours. [" + dt + "]"); } } Grid grid = new Grid(scheduler); try { bool pollingGridFlag = true; while (pollingGridFlag) { runningJobs = grid.GetJobs(JobState.Running); if (runningJobs.Count > 0) { pollingGridFlag = false; info("There are now running jobs. Sleeping (" + runningJobPollDuration + ")"); continue; } else { warn("There are no running jobs. Sleeping (" + runningJobPollDuration + ")"); } Thread.Sleep(runningJobPollDuration * 1000); } while (true) { Dictionary <int, ISchedulerJob> allJobs = new Dictionary <int, ISchedulerJob>(); List <int> jobIds = new List <int>(); prevRunningJobs = runningJobs; runningJobs = grid.GetJobsByStartTime(); prev = curr; curr = new HashSet <int>(runningJobs.Keys); // diff = new HashSet<int>(prev.Except(curr)); diff = grid.getSetDifference(prev, curr); foreach (int jobId in diff) { // print log line ISchedulerJob job = prevRunningJobs [jobId]; job.Refresh(); //System.Console.WriteLine(getLogLine(job).Trim()); allJobs[job.Id] = job; jobIds.Add(job.Id); } foreach (ISchedulerJob job in runningJobs.Values) { //System.Console.WriteLine(getLogLine(job).Trim()); allJobs[job.Id] = job; jobIds.Add(job.Id); } List <ISchedulerJob> jobs = new List <ISchedulerJob> (grid.GetJobs(JobState.Queued).Values.ToArray()); foreach (ISchedulerJob job in jobs) { //System.Console.WriteLine(getLogLine(job).Trim().Trim()); allJobs[job.Id] = job; jobIds.Add(job.Id); } jobIds.Sort(new JobComparer(allJobs)); foreach (int jobId in jobIds) { ISchedulerJob job = allJobs[jobId]; System.Console.WriteLine(getLogLine(job).Trim()); } File.WriteAllText(timeStampFile, grid.MinJobStartTime.ToString()); Thread.Sleep(secs * 1000); } }catch (Exception e) { debug("\t + "); } return(0); }
public void RefreshJobStatus() { if (_job != null) { LogEntry oldVersion = (LogEntry)this.MemberwiseClone(); _job.Refresh(); JobState = _job.State; ISchedulerJobCounters counters = _job.GetCounters(); string stateStr = string.Format("{0}/{1}/{2}/{3}", counters.QueuedTaskCount, counters.RunningTaskCount, counters.FailedTaskCount, counters.FinishedTaskCount); FailedTaskCount = counters.FailedTaskCount; TaskStatus = stateStr; if (FailedTaskCount > 0) { IEnumerable <ISchedulerTask> tasklist = GetFailedTasks(_job); string failedTaskRangeAsString = tasklist.Select(task => task.TaskId.JobTaskId).StringJoin(","); if ("" != failedTaskRangeAsString) { this.FailedTasks = RangeCollection.Parse(failedTaskRangeAsString).ToString(); } else { FailedTasks = ""; } } else { FailedTasks = ""; } if (JobState == JobState.Finished) { if (WallTime.Ticks == 0) { DateTime startTime = _job.SubmitTime; DateTime endTime = _job.EndTime; WallTime = endTime - startTime; } if (CpuTime.Ticks == 0) { var tasklist = _job.GetTaskList(null, null, true).Cast <ISchedulerTask>(); var totalTicks = tasklist.Select(task => (task.EndTime - task.StartTime).Ticks).Sum(); CpuTime = new TimeSpan(totalTicks); } } bool taskStateChanged = FailedTasks != oldVersion.FailedTasks || TaskStatus != oldVersion.TaskStatus; bool jobStateChanged = JobState != oldVersion.JobState || (FailedTaskCount == 0) != (oldVersion.FailedTaskCount == 0) || string.IsNullOrEmpty(FailedTasks) != string.IsNullOrEmpty(oldVersion.FailedTasks) || CpuTime != oldVersion.CpuTime || WallTime != oldVersion.WallTime; //if (_taskStateChangedSinceLastEvent != taskStateChanged || _jobStateChangedSinceLastEvent != jobStateChanged) // Console.WriteLine("bad"); if (taskStateChanged) { RaiseTaskStateChangedEvent(); } if (jobStateChanged) { RaiseJobStateChangedEvent(); } } }