/// <summary> /// Get instance from job id /// </summary> /// <param name="clustername">cluster name</param> /// <param name="jobID">job id</param> /// <returns></returns> public static LogEntry GetInstanceFromJobID(string clustername, int jobID) { LogEntry entry = new LogEntry(); entry.ClusterArgs = new ClusterSubmitterArgs(); entry.ClusterArgs.Cluster = clustername; entry.ClusterArgs.JobID = jobID; JobListener.TryConnect(clustername, jobID, null, out entry._jobListener).Enforce("Unable to recover job {0} from cluster {1}.", jobID, clustername); //entry.Connect(); //entry._job = entry.GetJob(); //entry.ClusterArgs.Name = entry._job.Name; entry.Date = entry._jobListener.Job.SubmitTime; ISchedulerTask exampleTask = entry._jobListener.Job.GetTaskList(null, null, true).Cast <ISchedulerTask>().First(); entry.ClusterArgs.StdErrDirName = exampleTask.StdErrFilePath; entry.ClusterArgs.StdOutDirName = exampleTask.StdOutFilePath; entry.ClusterArgs.Dir = exampleTask.WorkDirectory; string clusterPath = entry.ClusterDir.ToLower(); string rootClusterPath = Path.Combine(HpcLibSettings.KnownClusters[entry.Cluster].StoragePath, "username"); string relativeDir = clusterPath.Replace(rootClusterPath.ToLower(), ""); if (!relativeDir.StartsWith("\\")) { relativeDir = "\\" + relativeDir; } entry.LocalDir = @"d:\projects" + relativeDir; return(entry); }
/// <summary> /// Connect to HPC /// </summary> /// <param name="schedulerName">scheduler name</param> /// <param name="jobID">job id</param> /// <param name="usernameOrNull">user name</param> /// <param name="jobListener">job listener</param> /// <returns></returns> public static bool TryConnect(string schedulerName, int jobID, string usernameOrNull, out JobListener jobListener) { IScheduler scheduler; ISchedulerJob job; if (HpcLib.TryConnect(schedulerName, out scheduler) && HpcLib.TryGetJob(scheduler, usernameOrNull, jobID, out job)) { jobListener = new JobListener(scheduler, job); return(true); } else { jobListener = null; return(false); } }
private static JobState WaitForJobInternal(ClusterSubmitterArgs clusterArgs, int maxNumTimesToResubmitFailedTasks) { JobListener jobListener; JobListener.TryConnect(clusterArgs.Cluster, clusterArgs.JobID, clusterArgs.Username, out jobListener).Enforce("Could not connect to scheduler {0} or find jobID {1} for user {2}.", clusterArgs.Cluster, clusterArgs.JobID, clusterArgs.Username); ManualResetEvent mre = new ManualResetEvent(false); // setup a notification for when the job is done. jobListener.OnJobStateChanged += (o, e) => { if (jobListener.JobIsDone) { mre.Set(); } }; // setup the notification to requeue failed tasks. if (maxNumTimesToResubmitFailedTasks > 0) { jobListener.OnTaskStateChanged += (o, e) => { if (jobListener.JobCounters.FailedTaskCount > 0 && maxNumTimesToResubmitFailedTasks-- > 0) { Console.WriteLine(Resource.Tasks_failed); RequeueFailedAndCanceledTasks(jobListener.Scheduler, jobListener.Job); } }; } if (!jobListener.JobIsDone) { mre.WaitOne(); } return(jobListener.JobState); }
/// <summary> /// Start tracking job /// </summary> /// <returns></returns> public bool StartTrackingJob() { if (HpcLibSettings.ActiveClusters.Contains(Cluster, StringComparer.CurrentCultureIgnoreCase) && JobListener.TryConnect(Cluster, ClusterArgs.JobID, ClusterArgs.Username, out _jobListener)) { _jobListener.OnJobStateChanged += (o, e) => { RefreshJobStatus(); RaiseJobStateChangedEvent(); }; _jobListener.OnTaskStateChanged += (o, e) => { RefreshJobStatus(); RaiseTaskStateChangedEvent(); }; if (JobState != _jobListener.JobState) { RefreshJobStatus(); RaiseJobStateChangedEvent(); RaiseTaskStateChangedEvent(); } else if (JobState == Microsoft.Hpc.Scheduler.Properties.JobState.Running) { RefreshJobStatus(); RaiseTaskStateChangedEvent(); } return(true); } else { return(false); } }