/// <summary> /// Recompute the list of jobs on the cluster and add them to the clusterJobs field. /// </summary> /// <param name="virtualCluster">Unused.</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) { return; } string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs"); if (!Directory.Exists(joblist)) { Directory.CreateDirectory(joblist); } string[] files = Directory.GetFiles(joblist, "*.xml"); foreach (var file in files) { manager.Token.ThrowIfCancellationRequested(); DryadLinqJobSummary job = Utilities.LoadXml <DryadLinqJobSummary>(file); string cjid = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status); ci.SetAssociatedSummary(job); if (this.clusterJobs.ContainsKey(cjid)) { manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error); continue; } this.clusterJobs.Add(cjid, ci); } manager.Progress(100); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); if (!Directory.Exists(this.config.JobsFolder)) { return; } string[] subfolders = Directory.GetDirectories(this.config.JobsFolder); int done = 0; foreach (var job in subfolders) { manager.Token.ThrowIfCancellationRequested(); string jobId = Path.GetFileName(job); ClusterJobInformation info = this.GetJobInfo(job, jobId); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(jobId, info); } manager.Progress(done++ *100 / subfolders.Length); } manager.Progress(100); }
/// <summary> /// Extract the job information from a folder with logs on the local machine. /// </summary> /// <param name="jobRootFolder">Folder with logs for the specified job.</param> /// <returns>The job information, or null if not found.</returns> private ClusterJobInformation GetJobInfo(string jobRootFolder) { Uri uri = DfsFile.UriFromPath(this.config.JobsFolderUri, jobRootFolder); long time; long size; this.config.DfsClient.GetFileStatus(uri, out time, out size); DateTime date = DfsFile.TimeFromLong(time); ClusterJobInformation.ClusterJobStatus status = ClusterJobInformation.ClusterJobStatus.Unknown; string jobName = Path.GetFileName(jobRootFolder); string errorMsg = ""; try { var jobinfo = this.yarnClient.QueryJob(jobName, uri); var jobstatus = jobinfo.GetStatus(); errorMsg = jobinfo.ErrorMsg; switch (jobstatus) { case JobStatus.NotSubmitted: case JobStatus.Waiting: status = ClusterJobInformation.ClusterJobStatus.Unknown; break; case JobStatus.Running: status = ClusterJobInformation.ClusterJobStatus.Running; break; case JobStatus.Success: status = ClusterJobInformation.ClusterJobStatus.Succeeded; break; case JobStatus.Cancelled: status = ClusterJobInformation.ClusterJobStatus.Cancelled; break; case JobStatus.Failure: status = ClusterJobInformation.ClusterJobStatus.Failed; break; default: throw new ArgumentOutOfRangeException(); } } catch (Exception) { } TimeSpan running = TimeSpan.Zero; var info = new ClusterJobInformation(config.Name, "", jobName, jobName, Environment.UserName, date, running, status); return(info); }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="summary">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public override void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { ClusterJobInformation info = this.GetJobInfo(summary.JobID); if (info == null) { summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } summary.Status = info.Status; }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="summary">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public virtual void RefreshStatus(DryadLinqJobSummary summary, CommManager manager) { // refresh the whole list: too expensive // this.RecomputeClusterJobList(summary.VirtualCluster, manager); ClusterJobInformation info = this.DiscoverClusterJob(summary, manager); if (info == null) { summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } summary.Status = info.Status; }
/// <summary> /// Extract the job information from a folder with logs on the local machine. /// </summary> /// <param name="jobRootFolder">Folder with logs for the specified job.</param> /// <returns>The job information, or null if not found.</returns> /// <param name="jobId">Job id.</param> private ClusterJobInformation GetJobInfo(string jobRootFolder, string jobId) { string jmFolder = Path.Combine(jobRootFolder, "jm"); if (!Directory.Exists(jmFolder)) { return(null); } var date = File.GetCreationTime(jmFolder); ClusterJobInformation info = new ClusterJobInformation(this.config.Name, "", jobId, jobId, Environment.UserName, date, TimeSpan.Zero, ClusterJobInformation.ClusterJobStatus.Unknown); return(info); }
/// <summary> /// Refresh the job summary status. /// </summary> /// <param name="job">Summary to refresh.</param> /// <param name="manager">Communication manager.</param> public override void RefreshStatus(DryadLinqJobSummary job, CommManager manager) { ClusterConfiguration actual = (this.Config as CacheClusterConfiguration).ActualConfig(job); ClusterStatus actualStatus = actual.CreateClusterStatus(); actualStatus.RefreshStatus(job, manager); ClusterJobInformation info = actualStatus.DiscoverClusterJob(job, manager); if (info == null) { job.Status = ClusterJobInformation.ClusterJobStatus.Unknown; return; } job.Status = info.Status; }
/// <summary> /// Discover the (unique) dryadlinq job corresponding to a cluster job. /// </summary> /// <param name="clusterJob">Cluster Job.</param> /// <returns>The job description.</returns> /// <param name="reporter">Delegate used to report errors.</param> public override DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter) { DryadLinqJobSummary result = new DryadLinqJobSummary( clusterJob.Cluster, this.Config.TypeOfCluster, "", // virtual cluster "", // machine clusterJob.ClusterJobID, // jobId clusterJob.ClusterJobID, // clusterJobId new DryadProcessIdentifier("jm"), // jmProcessGuid clusterJob.Name, clusterJob.User, clusterJob.Date, clusterJob.Date + clusterJob.EstimatedRunningTime, clusterJob.Status); return(result); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); var jobs = this.config.AzureClient.EnumerateDirectory("").ToList(); int done = 0; foreach (var job in jobs) { manager.Token.ThrowIfCancellationRequested(); ClusterJobInformation info = this.GetJobInfo(job); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(job, info); } manager.Progress(100 * done++ / jobs.Count); } manager.Progress(100); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> // ReSharper disable once UnusedParameter.Global protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); var uri = DfsFile.UriFromPath(this.config.JobsFolderUri, ""); var jobs = this.config.DfsClient.EnumerateSubdirectories(uri).ToList(); int done = 0; foreach (var job in jobs) { manager.Token.ThrowIfCancellationRequested(); ClusterJobInformation info = this.GetJobInfo(DfsFile.PathFromUri(this.config.JobsFolderUri, job)); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(info.ClusterJobID, info); } manager.Progress(100 * done++ / jobs.Count); } manager.Progress(100); }
/// <summary> /// Force the recomputation of the cluster job list. /// </summary> /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param> /// <param name="manager">Communication manager.</param> protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager) { this.clusterJobs = new Dictionary <string, ClusterJobInformation>(); var jobs = this.config.AzureClient.ExpandFileOrDirectory(AzureDfsFile.UriFromPath(this.config, "")).ToList(); int done = 0; foreach (var job in jobs) { manager.Token.ThrowIfCancellationRequested(); string jobRootFolder = AzureDfsFile.PathFromUri(this.config, job); ClusterJobInformation info = this.GetJobInfo(jobRootFolder); if (info != null) { // ReSharper disable once AssignNullToNotNullAttribute this.clusterJobs.Add(job.AbsolutePath, info); } manager.Progress(100 * done++ / jobs.Count); } manager.Progress(100); }
/// <summary> /// Extract the job information from a folder with logs on the local machine. /// </summary> /// <param name="jobRootFolder">Folder with logs for the specified job.</param> /// <returns>The job information, or null if not found.</returns> private ClusterJobInformation GetJobInfo(string jobRootFolder) { DateTime date = DateTime.MinValue; DateTime lastHeartBeat = DateTime.MinValue; ClusterJobInformation.ClusterJobStatus status = ClusterJobInformation.ClusterJobStatus.Unknown; bool found = false; Uri uri = AzureDfsFile.UriFromPath(this.config, jobRootFolder); var jobsFolders = this.config.AzureClient.ExpandFileOrDirectory(uri).ToList(); jobRootFolder = GetBlobName(this.config.Container, jobRootFolder); string jobName = jobRootFolder; foreach (var file in jobsFolders) { if (file.AbsolutePath.EndsWith("heartbeat")) { string blobName = GetBlobName(this.config.Container, file.AbsolutePath); var blob = this.config.AzureClient.Container.GetPageBlobReference(blobName); blob.FetchAttributes(); var props = blob.Metadata; if (props.ContainsKey("status")) { var st = props["status"]; switch (st) { case "failure": status = ClusterJobInformation.ClusterJobStatus.Failed; break; case "success": status = ClusterJobInformation.ClusterJobStatus.Succeeded; break; case "running": status = ClusterJobInformation.ClusterJobStatus.Running; break; case "killed": status = ClusterJobInformation.ClusterJobStatus.Cancelled; break; default: Console.WriteLine("Unknown status " + st); break; } } if (props.ContainsKey("heartbeat")) { var hb = props["heartbeat"]; if (DateTime.TryParse(hb, out lastHeartBeat)) { lastHeartBeat = lastHeartBeat.ToLocalTime(); if (status == ClusterJobInformation.ClusterJobStatus.Running && DateTime.Now - lastHeartBeat > TimeSpan.FromSeconds(40)) { // job has in fact crashed status = ClusterJobInformation.ClusterJobStatus.Failed; } } } if (props.ContainsKey("jobname")) { jobName = props["jobname"]; } if (props.ContainsKey("starttime")) { var t = props["starttime"]; if (DateTime.TryParse(t, out date)) { date = date.ToLocalTime(); } } found = true; } else if (file.AbsolutePath.Contains("DryadLinqProgram__") && // newer heartbeats contain the date date != DateTime.MinValue) { var blob = this.config.AzureClient.Container.GetBlockBlobReference(AzureDfsFile.PathFromUri(this.config, file)); blob.FetchAttributes(); var props = blob.Properties; if (props.LastModified.HasValue) { date = props.LastModified.Value.DateTime; date = date.ToLocalTime(); } } } if (!found) { return(null); } TimeSpan running = TimeSpan.Zero; if (date != DateTime.MinValue && lastHeartBeat != DateTime.MinValue) { running = lastHeartBeat - date; } var info = new ClusterJobInformation(this.config.Name, "", jobRootFolder, jobName, Environment.UserName, date, running, status); return(info); }
/// <summary> /// Not needed, all summaries are already known. /// </summary> /// <param name="clusterJob">Cluster job information.</param> /// <param name="reporter">Delegate used to report errors.</param> /// <returns>Throws an exception.</returns> public override DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter) { throw new InvalidOperationException(); }
/// <summary> /// Discover the (unique) dryadlinq job corresponding to a cluster job. /// </summary> /// <param name="clusterJob">Cluster Job.</param> /// <returns>The job description.</returns> /// <param name="reporter">Delegate used to report errors.</param> public abstract DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter);