Exemplo n.º 1
0
        /// <summary>
        /// Recompute the list of jobs on the cluster and add them to the clusterJobs field.
        /// </summary>
        /// <param name="virtualCluster">Unused.</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory))
            {
                return;
            }

            string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs");

            if (!Directory.Exists(joblist))
            {
                Directory.CreateDirectory(joblist);
            }

            string[] files = Directory.GetFiles(joblist, "*.xml");
            foreach (var file in files)
            {
                manager.Token.ThrowIfCancellationRequested();
                DryadLinqJobSummary job  = Utilities.LoadXml <DryadLinqJobSummary>(file);
                string cjid              = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters
                ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status);
                ci.SetAssociatedSummary(job);
                if (this.clusterJobs.ContainsKey(cjid))
                {
                    manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error);
                    continue;
                }
                this.clusterJobs.Add(cjid, ci);
            }
            manager.Progress(100);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            if (!Directory.Exists(this.config.JobsFolder))
            {
                return;
            }
            string[] subfolders = Directory.GetDirectories(this.config.JobsFolder);

            int done = 0;

            foreach (var job in subfolders)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobId = Path.GetFileName(job);
                ClusterJobInformation info = this.GetJobInfo(job, jobId);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(jobId, info);
                }
                manager.Progress(done++ *100 / subfolders.Length);
            }
            manager.Progress(100);
        }
Exemplo n.º 3
0
        /// <summary>
        /// Extract the job information from a folder with logs on the local machine.
        /// </summary>
        /// <param name="jobRootFolder">Folder with logs for the specified job.</param>
        /// <returns>The job information, or null if not found.</returns>
        private ClusterJobInformation GetJobInfo(string jobRootFolder)
        {
            Uri  uri = DfsFile.UriFromPath(this.config.JobsFolderUri, jobRootFolder);
            long time;
            long size;

            this.config.DfsClient.GetFileStatus(uri, out time, out size);

            DateTime date = DfsFile.TimeFromLong(time);

            ClusterJobInformation.ClusterJobStatus status = ClusterJobInformation.ClusterJobStatus.Unknown;
            string jobName = Path.GetFileName(jobRootFolder);

            string errorMsg = "";

            try
            {
                var jobinfo   = this.yarnClient.QueryJob(jobName, uri);
                var jobstatus = jobinfo.GetStatus();
                errorMsg = jobinfo.ErrorMsg;
                switch (jobstatus)
                {
                case JobStatus.NotSubmitted:
                case JobStatus.Waiting:
                    status = ClusterJobInformation.ClusterJobStatus.Unknown;
                    break;

                case JobStatus.Running:
                    status = ClusterJobInformation.ClusterJobStatus.Running;
                    break;

                case JobStatus.Success:
                    status = ClusterJobInformation.ClusterJobStatus.Succeeded;
                    break;

                case JobStatus.Cancelled:
                    status = ClusterJobInformation.ClusterJobStatus.Cancelled;
                    break;

                case JobStatus.Failure:
                    status = ClusterJobInformation.ClusterJobStatus.Failed;
                    break;

                default:
                    throw new ArgumentOutOfRangeException();
                }
            }
            catch (Exception)
            {
            }

            TimeSpan running = TimeSpan.Zero;
            var      info    = new ClusterJobInformation(config.Name, "", jobName, jobName, Environment.UserName, date, running, status);

            return(info);
        }
Exemplo n.º 4
0
        /// <summary>
        /// Refresh the job summary status.
        /// </summary>
        /// <param name="summary">Summary to refresh.</param>
        /// <param name="manager">Communication manager.</param>
        public override void RefreshStatus(DryadLinqJobSummary summary, CommManager manager)
        {
            ClusterJobInformation info = this.GetJobInfo(summary.JobID);

            if (info == null)
            {
                summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown;
                return;
            }
            summary.Status = info.Status;
        }
Exemplo n.º 5
0
        /// <summary>
        /// Refresh the job summary status.
        /// </summary>
        /// <param name="summary">Summary to refresh.</param>
        /// <param name="manager">Communication manager.</param>
        public virtual void RefreshStatus(DryadLinqJobSummary summary, CommManager manager)
        {
            // refresh the whole list: too expensive
            // this.RecomputeClusterJobList(summary.VirtualCluster, manager);
            ClusterJobInformation info = this.DiscoverClusterJob(summary, manager);

            if (info == null)
            {
                summary.Status = ClusterJobInformation.ClusterJobStatus.Unknown;
                return;
            }
            summary.Status = info.Status;
        }
Exemplo n.º 6
0
        /// <summary>
        /// Extract the job information from a folder with logs on the local machine.
        /// </summary>
        /// <param name="jobRootFolder">Folder with logs for the specified job.</param>
        /// <returns>The job information, or null if not found.</returns>
        /// <param name="jobId">Job id.</param>
        private ClusterJobInformation GetJobInfo(string jobRootFolder, string jobId)
        {
            string jmFolder = Path.Combine(jobRootFolder, "jm");

            if (!Directory.Exists(jmFolder))
            {
                return(null);
            }

            var date = File.GetCreationTime(jmFolder);
            ClusterJobInformation info = new ClusterJobInformation(this.config.Name, "", jobId, jobId, Environment.UserName, date, TimeSpan.Zero, ClusterJobInformation.ClusterJobStatus.Unknown);

            return(info);
        }
Exemplo n.º 7
0
        /// <summary>
        /// Refresh the job summary status.
        /// </summary>
        /// <param name="job">Summary to refresh.</param>
        /// <param name="manager">Communication manager.</param>
        public override void RefreshStatus(DryadLinqJobSummary job, CommManager manager)
        {
            ClusterConfiguration actual       = (this.Config as CacheClusterConfiguration).ActualConfig(job);
            ClusterStatus        actualStatus = actual.CreateClusterStatus();

            actualStatus.RefreshStatus(job, manager);
            ClusterJobInformation info = actualStatus.DiscoverClusterJob(job, manager);

            if (info == null)
            {
                job.Status = ClusterJobInformation.ClusterJobStatus.Unknown;
                return;
            }
            job.Status = info.Status;
        }
Exemplo n.º 8
0
        /// <summary>
        /// Discover the (unique) dryadlinq job corresponding to a cluster job.
        /// </summary>
        /// <param name="clusterJob">Cluster Job.</param>
        /// <returns>The job description.</returns>
        /// <param name="reporter">Delegate used to report errors.</param>
        public override DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter)
        {
            DryadLinqJobSummary result = new DryadLinqJobSummary(
                clusterJob.Cluster,
                this.Config.TypeOfCluster,
                "",                               // virtual cluster
                "",                               // machine
                clusterJob.ClusterJobID,          // jobId
                clusterJob.ClusterJobID,          // clusterJobId
                new DryadProcessIdentifier("jm"), // jmProcessGuid
                clusterJob.Name,
                clusterJob.User,
                clusterJob.Date,
                clusterJob.Date + clusterJob.EstimatedRunningTime,
                clusterJob.Status);

            return(result);
        }
Exemplo n.º 9
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var jobs = this.config.AzureClient.EnumerateDirectory("").ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(job);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemplo n.º 10
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        // ReSharper disable once UnusedParameter.Global
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var uri  = DfsFile.UriFromPath(this.config.JobsFolderUri, "");
            var jobs = this.config.DfsClient.EnumerateSubdirectories(uri).ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(DfsFile.PathFromUri(this.config.JobsFolderUri, job));
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(info.ClusterJobID, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemplo n.º 11
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var jobs = this.config.AzureClient.ExpandFileOrDirectory(AzureDfsFile.UriFromPath(this.config, "")).ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobRootFolder       = AzureDfsFile.PathFromUri(this.config, job);
                ClusterJobInformation info = this.GetJobInfo(jobRootFolder);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job.AbsolutePath, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemplo n.º 12
0
        /// <summary>
        /// Extract the job information from a folder with logs on the local machine.
        /// </summary>
        /// <param name="jobRootFolder">Folder with logs for the specified job.</param>
        /// <returns>The job information, or null if not found.</returns>
        private ClusterJobInformation GetJobInfo(string jobRootFolder)
        {
            DateTime date          = DateTime.MinValue;
            DateTime lastHeartBeat = DateTime.MinValue;

            ClusterJobInformation.ClusterJobStatus status = ClusterJobInformation.ClusterJobStatus.Unknown;
            bool found = false;

            Uri uri         = AzureDfsFile.UriFromPath(this.config, jobRootFolder);
            var jobsFolders = this.config.AzureClient.ExpandFileOrDirectory(uri).ToList();

            jobRootFolder = GetBlobName(this.config.Container, jobRootFolder);
            string jobName = jobRootFolder;

            foreach (var file in jobsFolders)
            {
                if (file.AbsolutePath.EndsWith("heartbeat"))
                {
                    string blobName = GetBlobName(this.config.Container, file.AbsolutePath);
                    var    blob     = this.config.AzureClient.Container.GetPageBlobReference(blobName);
                    blob.FetchAttributes();
                    var props = blob.Metadata;
                    if (props.ContainsKey("status"))
                    {
                        var st = props["status"];
                        switch (st)
                        {
                        case "failure":
                            status = ClusterJobInformation.ClusterJobStatus.Failed;
                            break;

                        case "success":
                            status = ClusterJobInformation.ClusterJobStatus.Succeeded;
                            break;

                        case "running":
                            status = ClusterJobInformation.ClusterJobStatus.Running;
                            break;

                        case "killed":
                            status = ClusterJobInformation.ClusterJobStatus.Cancelled;
                            break;

                        default:
                            Console.WriteLine("Unknown status " + st);
                            break;
                        }
                    }
                    if (props.ContainsKey("heartbeat"))
                    {
                        var hb = props["heartbeat"];
                        if (DateTime.TryParse(hb, out lastHeartBeat))
                        {
                            lastHeartBeat = lastHeartBeat.ToLocalTime();
                            if (status == ClusterJobInformation.ClusterJobStatus.Running &&
                                DateTime.Now - lastHeartBeat > TimeSpan.FromSeconds(40))
                            {
                                // job has in fact crashed
                                status = ClusterJobInformation.ClusterJobStatus.Failed;
                            }
                        }
                    }
                    if (props.ContainsKey("jobname"))
                    {
                        jobName = props["jobname"];
                    }
                    if (props.ContainsKey("starttime"))
                    {
                        var t = props["starttime"];
                        if (DateTime.TryParse(t, out date))
                        {
                            date = date.ToLocalTime();
                        }
                    }

                    found = true;
                }
                else if (file.AbsolutePath.Contains("DryadLinqProgram__") &&
                         // newer heartbeats contain the date
                         date != DateTime.MinValue)
                {
                    var blob = this.config.AzureClient.Container.GetBlockBlobReference(AzureDfsFile.PathFromUri(this.config, file));
                    blob.FetchAttributes();
                    var props = blob.Properties;
                    if (props.LastModified.HasValue)
                    {
                        date = props.LastModified.Value.DateTime;
                        date = date.ToLocalTime();
                    }
                }
            }

            if (!found)
            {
                return(null);
            }

            TimeSpan running = TimeSpan.Zero;

            if (date != DateTime.MinValue && lastHeartBeat != DateTime.MinValue)
            {
                running = lastHeartBeat - date;
            }
            var info = new ClusterJobInformation(this.config.Name, "", jobRootFolder, jobName, Environment.UserName, date, running, status);

            return(info);
        }
Exemplo n.º 13
0
 /// <summary>
 /// Not needed, all summaries are already known.
 /// </summary>
 /// <param name="clusterJob">Cluster job information.</param>
 /// <param name="reporter">Delegate used to report errors.</param>
 /// <returns>Throws an exception.</returns>
 public override DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter)
 {
     throw new InvalidOperationException();
 }
Exemplo n.º 14
0
 /// <summary>
 /// Discover the (unique) dryadlinq job corresponding to a cluster job.
 /// </summary>
 /// <param name="clusterJob">Cluster Job.</param>
 /// <returns>The job description.</returns>
 /// <param name="reporter">Delegate used to report errors.</param>
 public abstract DryadLinqJobSummary DiscoverDryadLinqJobFromClusterJob(ClusterJobInformation clusterJob, StatusReporter reporter);