Exemple #1
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            if (!Directory.Exists(this.config.JobsFolder))
            {
                return;
            }
            string[] subfolders = Directory.GetDirectories(this.config.JobsFolder);

            int done = 0;

            foreach (var job in subfolders)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobId = Path.GetFileName(job);
                ClusterJobInformation info = this.GetJobInfo(job, jobId);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(jobId, info);
                }
                manager.Progress(done++ *100 / subfolders.Length);
            }
            manager.Progress(100);
        }
Exemple #2
0
        /// <summary>
        /// Diagnose a list of jobs.
        /// </summary>
        /// <param name="jobs">Jobs to diagnose.</param>
        /// <param name="config">Cluster configuration.</param>
        /// <param name="manager">Communicatino manager.</param>
        public static List <DiagnosisLog> DiagnoseJobs(IEnumerable <DryadLinqJobSummary> jobs, ClusterConfiguration config, CommManager manager)
        {
            var dryadLinqJobSummaries = jobs as DryadLinqJobSummary[] ?? jobs.ToArray();
            int jobCount = dryadLinqJobSummaries.Count();

            List <DiagnosisLog> result = new List <DiagnosisLog>();
            int done = 0;

            foreach (DryadLinqJobSummary summary in dryadLinqJobSummaries)
            {
                if (summary == null)
                {
                    continue;
                }

                manager.Token.ThrowIfCancellationRequested();
                JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, manager);
                manager.Status("Diagnosing " + summary.ShortName(), StatusKind.LongOp);
                DiagnosisLog log = diagnosis.Diagnose();
                result.Add(log);

                done++;
                manager.Progress(done * 100 / jobCount);
            }
            manager.Status("Diagnosis complete", StatusKind.OK);
            return(result);
        }
Exemple #3
0
        /// <summary>
        /// Recompute the list of jobs on the cluster and add them to the clusterJobs field.
        /// </summary>
        /// <param name="virtualCluster">Unused.</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory))
            {
                return;
            }

            string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs");

            if (!Directory.Exists(joblist))
            {
                Directory.CreateDirectory(joblist);
            }

            string[] files = Directory.GetFiles(joblist, "*.xml");
            foreach (var file in files)
            {
                manager.Token.ThrowIfCancellationRequested();
                DryadLinqJobSummary job  = Utilities.LoadXml <DryadLinqJobSummary>(file);
                string cjid              = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters
                ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status);
                ci.SetAssociatedSummary(job);
                if (this.clusterJobs.ContainsKey(cjid))
                {
                    manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error);
                    continue;
                }
                this.clusterJobs.Add(cjid, ci);
            }
            manager.Progress(100);
        }
Exemple #4
0
        /// <summary>
        /// Look to see whether the vertices failed reading from some common set of machines.
        /// This is incomplete: e.g., it does not work for tidyfs streams.
        /// </summary>
        /// <returns>Yes if there were correlated failures.</returns>
        /// <param name="manager">Communication manager.</param>
        protected Decision LookForCorrelatedReadFailures(CommManager manager)
        {
            // if we have more than this many failures we start to worry
            const int maxFailures = 5;
            IEnumerable <ExecutedVertexInstance> failures =
                this.Job.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Failed).
                Where(v => !v.IsManager).
                ToList();
            int totalFailures = failures.Count();

            if (totalFailures < maxFailures)
            {
                return(Decision.No);
            }

            List <ChannelEndpointDescription> channelsFailed = new List <ChannelEndpointDescription>();
            int verticesDone = 0;

            foreach (ExecutedVertexInstance v in failures)
            {
                var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, manager).ChannelReadFailure(manager);
                if (crf != null)
                {
                    channelsFailed.Add(crf);
                }
                verticesDone++;
                manager.Progress(verticesDone * 100 / totalFailures);
            }
            if (channelsFailed.Count() < maxFailures)
            {
                return(Decision.No);
            }
            this.Log(DiagnosisMessage.Importance.Final, "There are " + channelsFailed.Count() + " read failures in the job", "");
            var files = channelsFailed.Where(ced => ced.UriType == "file").ToList();

            if (files.Count() == 0)
            {
                this.Log(DiagnosisMessage.Importance.Final, "All channels with failures are distributed files", "No further information is available");
                return(Decision.Dontknow);
            }

            Decision result   = Decision.Dontknow;
            var      machines = files.Select(f => new UNCPathname(f.LocalPath).Machine).GroupBy(w => w).ToList();

            foreach (var m in machines)
            {
                int failuresOnM = m.Count();
                if (failuresOnM > 3)
                {
                    this.Log(DiagnosisMessage.Importance.Final, "There are " + failuresOnM + " read failures reading from machine", m.Key);
                    result = Decision.Yes;
                }
            }

            return(result);
        }
Exemple #5
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var jobs = this.config.AzureClient.EnumerateDirectory("").ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(job);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemple #6
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        // ReSharper disable once UnusedParameter.Global
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var uri  = DfsFile.UriFromPath(this.config.JobsFolderUri, "");
            var jobs = this.config.DfsClient.EnumerateSubdirectories(uri).ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(DfsFile.PathFromUri(this.config.JobsFolderUri, job));
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(info.ClusterJobID, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemple #7
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary <string, ClusterJobInformation>();
            var jobs = this.config.AzureClient.ExpandFileOrDirectory(AzureDfsFile.UriFromPath(this.config, "")).ToList();

            int done = 0;

            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobRootFolder       = AzureDfsFile.PathFromUri(this.config, job);
                ClusterJobInformation info = this.GetJobInfo(jobRootFolder);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job.AbsolutePath, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>        
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary<string, ClusterJobInformation>();   
            var jobs = this.config.AzureClient.ExpandFileOrDirectory(AzureDfsFile.UriFromPath(this.config, "")).ToList();

            int done = 0;
            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobRootFolder = AzureDfsFile.PathFromUri(this.config, job);
                ClusterJobInformation info = this.GetJobInfo(jobRootFolder);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job.AbsolutePath, info);
                }
                manager.Progress(100*done++/jobs.Count);
            }
            manager.Progress(100);
        }
Exemple #9
0
        /// <summary>
        /// Parse the stdout.txt file from the job manager.
        /// </summary>
        /// <param name="file">File to parse.</param>
        /// <param name="manager">Communication manager.</param>
        /// <returns>True if the parsing succeeds.</returns>
        private bool ParseStdout(IClusterResidentObject file, CommManager manager)
        {
            int currentLine = 0;
            if (this.stdoutLinesParsed == 0)
                // don't lose it if we are only parsing the tail.
                this.lastTimestampSeen = this.Summary.Date; // start from the job submission timestamp

            // we are reusing the stream
            this.stdoutLinesParsed = 0;

            try
            {
                long filesize = file.Size;
                long readbytes = 0;
                string message = "Scanning JM stdout " + file;
                if (filesize >= 0)
                    message += string.Format("({0:N0} bytes)", filesize);
                manager.Status(message, StatusKind.LongOp);

                if (this.cachedStdoutReader == null)
                    this.cachedStdoutReader = file.GetStream();
                if (this.cachedStdoutReader.Exception != null)
                {
                    manager.Status("Exception while opening stdout " + this.cachedStdoutReader.Exception.Message, StatusKind.Error);
                    return false;
                }

                while (!this.cachedStdoutReader.EndOfStream)
                {
                    string line = this.cachedStdoutReader.ReadLine();
                    readbytes += line.Length;
                    if (currentLine >= this.stdoutLinesParsed)
                    {
                        while (true)
                        {
                            manager.Token.ThrowIfCancellationRequested();
                            int startLine = currentLine;
                            bool completeLine = true;
                            try
                            {
                                completeLine = this.ParseStdoutLineNew(line);
                            }
                            catch (Exception ex)
                            {
                                manager.Status(string.Format("Line {0}: Exception {1}", currentLine, ex.Message), StatusKind.Error);
                                Console.WriteLine("Line {0}: Exception {1}", currentLine, ex);
                            }
                            if (!completeLine)
                            {
                                if (this.cachedStdoutReader.EndOfStream)
                                {
                                    throw new Exception("File ended while scanning for closing quote started on line " + startLine);
                                }

                                string extraline = this.cachedStdoutReader.ReadLine();
                                line += "\n" + extraline;
                                currentLine++;
                            }
                            else break;
                        }
                    }
                    currentLine++;
                    if (currentLine % 100 == 0 && filesize > 0)
                    {
                        manager.Progress(Math.Min(100, (int)(100 * readbytes / filesize)));
                    }
                }

                this.stdoutLinesParsed = currentLine;

                if (this.ManagerVertex != null)
                {
                    if (this.ManagerVertex.End == DateTime.MinValue)
                        // approximation
                        this.ManagerVertex.End = this.lastTimestampSeen;

                    // we are done with this stream
                    if (this.ManagerVertex.State == ExecutedVertexInstance.VertexState.Failed ||
                        this.ManagerVertex.State == ExecutedVertexInstance.VertexState.Successful)
                    {
                        this.cachedStdoutReader.Close();
                        this.cachedStdoutReader = null; // will force reopening if refreshed
                    }
                }
                return true;
            }
            catch (Exception e)
            {
                manager.Status("Exception while reading stdout " + e.Message, StatusKind.Error);
                Trace.TraceInformation(e.ToString());
                return false;
            }
        }
Exemple #10
0
        /// <summary>
        /// Diagnose a list of jobs.
        /// </summary>
        /// <param name="jobs">Jobs to diagnose.</param>
        /// <param name="config">Cluster configuration.</param>
        /// <param name="manager">Communicatino manager.</param>
        public static List<DiagnosisLog> DiagnoseJobs(IEnumerable<DryadLinqJobSummary> jobs, ClusterConfiguration config, CommManager manager)
        {
            var dryadLinqJobSummaries = jobs as DryadLinqJobSummary[] ?? jobs.ToArray();
            int jobCount = dryadLinqJobSummaries.Count();

            List<DiagnosisLog> result = new List<DiagnosisLog>();
            int done = 0;
            foreach (DryadLinqJobSummary summary in dryadLinqJobSummaries)
            {
                if (summary == null) continue;

                manager.Token.ThrowIfCancellationRequested(); 
                JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, manager);
                manager.Status("Diagnosing " + summary.ShortName(), StatusKind.LongOp);
                DiagnosisLog log = diagnosis.Diagnose();
                result.Add(log);

                done++;
                manager.Progress(done * 100 / jobCount);
            }
            manager.Status("Diagnosis complete", StatusKind.OK);
            return result;
        }
Exemple #11
0
        /// <summary>
        /// Parse a part of the 'originalInfo.txt' file to discover a set of channel endpoints.
        /// </summary>
        /// <param name="sr">Stream reader which contains the channel information.</param>
        /// <returns>The list of channels, or null on failure.</returns>
        /// <param name="uriprefix">If the channel is an output, prefix the path with this; this is null for inputs.</param>
        /// <param name="skip">If true, do not return anything (still useful to advance the stream reader).</param>
        /// <param name="fast">If true the channel sizes are not discovered; this is much faster, since no remote machines are queried for files.</param>
        /// <param name="manager">Communication manager.</param>
        private Dictionary<int, ChannelEndpointDescription> DiscoverOriginalInfoChannels(ISharedStreamReader sr, string uriprefix, bool skip, bool fast, CommManager manager)
        {
            bool isInput = uriprefix == null;

            string countline = sr.ReadLine();
            if (countline == null)
                return null;
            int channelCount;
            int spaceIndex = countline.IndexOf(' ');
            if (spaceIndex > 0)
                countline = countline.Substring(0, spaceIndex);
            bool success = int.TryParse(countline, out channelCount);
            if (!success)
                return null;
            var channels = new Dictionary<int, ChannelEndpointDescription>(channelCount);
            for (int i = 0; i < channelCount; i++)
            {
                string channel = sr.ReadLine();
                if (channel == null)
                {
                    manager.Progress(100);
                    return null;
                }
                if (!skip)
                {
                    ChannelEndpointDescription desc = new ChannelEndpointDescription(isInput, i, channel, uriprefix, fast, manager.Status);
                    channels.Add(i, desc);
                    manager.Progress(i * 100 / channelCount);
                }
            }
            
            manager.Progress(100);
            if (skip)
                return null;
            return channels;
        }
Exemple #12
0
        /// <summary>
        /// Look to see whether the vertices failed reading from some common set of machines.
        /// This is incomplete: e.g., it does not work for tidyfs streams.
        /// </summary>
        /// <returns>Yes if there were correlated failures.</returns>
        /// <param name="manager">Communication manager.</param>
        protected Decision LookForCorrelatedReadFailures(CommManager manager)
        {
            // if we have more than this many failures we start to worry
            const int maxFailures = 5;
            IEnumerable<ExecutedVertexInstance> failures =
                this.Job.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Failed).
                Where(v => !v.IsManager).
                ToList();
            int totalFailures = failures.Count();
            if (totalFailures < maxFailures)
                return Decision.No;

            List<ChannelEndpointDescription> channelsFailed = new List<ChannelEndpointDescription>();
            int verticesDone = 0;
            foreach (ExecutedVertexInstance v in failures)
            {
                var crf = VertexFailureDiagnosis.CreateVertexFailureDiagnosis(this.Job, this.StaticPlan, v, manager).ChannelReadFailure(manager);
                if (crf != null)
                {
                    channelsFailed.Add(crf);
                }
                verticesDone++;
                manager.Progress(verticesDone * 100 / totalFailures);
            }
            if (channelsFailed.Count() < maxFailures)
                return Decision.No;
            this.Log(DiagnosisMessage.Importance.Final, "There are " + channelsFailed.Count() + " read failures in the job", "");
            var files = channelsFailed.Where(ced => ced.UriType == "file").ToList();
            if (files.Count() == 0)
            {
                this.Log(DiagnosisMessage.Importance.Final, "All channels with failures are distributed files", "No further information is available");
                return Decision.Dontknow;
            }

            Decision result = Decision.Dontknow;
            var machines = files.Select(f => new UNCPathname(f.LocalPath).Machine).GroupBy(w => w).ToList();
            foreach (var m in machines)
            {
                int failuresOnM = m.Count();
                if (failuresOnM > 3)
                {
                    this.Log(DiagnosisMessage.Importance.Final, "There are " + failuresOnM + " read failures reading from machine", m.Key);
                    result = Decision.Yes;
                }
            }

            return result;
        }
Exemple #13
0
        /// <summary>
        /// Fill the job info by parsing the stdout.txt.
        /// <returns>The updated job.</returns>
        /// <param name="manager">Communication manager.</param>
        /// </summary>
        public bool CollectEssentialInformation(CommManager manager)
        {
            this.RefreshJobStatus(manager);
            if (this.ManagerVertex == null)
            {
                this.ManagerVertex = new ExecutedVertexInstance(this, -1, 0, "JobManager", "", this.Summary.Date);
                this.ManagerVertex.IsManager = true;
                this.ManagerVertex.SetStartInformation(this, this.Summary.Machine, this.Summary.Date, this.Summary.ManagerProcessGuid, "");
                this.ManagerVertex.StartCommandTime = this.ManagerVertex.CreationTime = this.ManagerVertex.VertexScheduleTime = this.Summary.Date;
                ExecutedVertexInstance.VertexState jmstate = ExecutedVertexInstance.VertexState.Started;
                switch (this.Summary.Status)
                {
                    case ClusterJobInformation.ClusterJobStatus.Failed:
                        jmstate = ExecutedVertexInstance.VertexState.Failed;
                        break;
                    /*
                    case ClusterJobInformation.ClusterJobStatus.Succeeded:
                        jmstate = ExecutedVertexInstance.VertexState.Successful;
                        break;
                    */
                }
                this.ManagerVertex.SetState(jmstate);
                this.jobVertices.Add(this.ManagerVertex);
            }

            if (this.stdoutpath == null)
                return false;
            bool success = this.ParseStdout(this.stdoutpath, manager);
            manager.Progress(100);
            if (!success)
                return false;

            this.JobInfoCannotBeCollected = false;
            manager.Status("Stdout parsed", StatusKind.OK);

            this.LastUpdatetime = DateTime.Now;
            if (this.Summary.Status == ClusterJobInformation.ClusterJobStatus.Running)
            {
                foreach (var vertex in this.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Started))
                    vertex.MarkVertexWasRunning(this.LastUpdatetime);
                this.ManagerVertex.MarkVertexWasRunning(this.LastUpdatetime);
            }
            else if (this.jobSummary.Status == ClusterJobInformation.ClusterJobStatus.Failed)
            {
                if (this.ManagerVertex.State == ExecutedVertexInstance.VertexState.Started)
                    this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Failed);
                foreach (var vertex in this.Vertices.Where(v => v.State == ExecutedVertexInstance.VertexState.Started))
                    vertex.MarkVertexWasRunning(this.ManagerVertex.End);
            }
            
            return true;
        }
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>
        // ReSharper disable once UnusedParameter.Global
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary<string, ClusterJobInformation>();
            var uri = DfsFile.UriFromPath(this.config.JobsFolderUri, "");
            var jobs = this.config.DfsClient.EnumerateSubdirectories(uri).ToList();

            int done = 0;
            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(DfsFile.PathFromUri(this.config.JobsFolderUri, job));
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(info.ClusterJobID, info);
                }
                manager.Progress(100 * done++ / jobs.Count);
            }
            manager.Progress(100);
        }
Exemple #15
0
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>        
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary<string, ClusterJobInformation>();
            var jobs = this.config.AzureClient.EnumerateDirectory("").ToList();

            int done = 0;
            foreach (var job in jobs)
            {
                manager.Token.ThrowIfCancellationRequested();
                ClusterJobInformation info = this.GetJobInfo(job);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(job, info);
                }
                manager.Progress(100*done++/jobs.Count);
            }
            manager.Progress(100);
        }
        /// <summary>
        /// Force the recomputation of the cluster job list.
        /// </summary>
        /// <param name="virtualCluster">Virtual cluster to use (defined only for some cluster types).</param>
        /// <param name="manager">Communication manager.</param>        
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary<string, ClusterJobInformation>();
            if (!Directory.Exists(this.config.JobsFolder))
                return;
            string[] subfolders = Directory.GetDirectories(this.config.JobsFolder);

            int done = 0;
            foreach (var job in subfolders)
            {
                manager.Token.ThrowIfCancellationRequested();
                string jobId = Path.GetFileName(job);
                ClusterJobInformation info = this.GetJobInfo(job, jobId);
                if (info != null)
                {
                    // ReSharper disable once AssignNullToNotNullAttribute
                    this.clusterJobs.Add(jobId, info);
                }
                manager.Progress(done++ *100/subfolders.Length);
            }
            manager.Progress(100);
        }
        /// <summary>
        /// Recompute the list of jobs on the cluster and add them to the clusterJobs field.
        /// </summary>
        /// <param name="virtualCluster">Unused.</param>
        /// <param name="manager">Communication manager.</param>
        protected override void RecomputeClusterJobList(string virtualCluster, CommManager manager)
        {
            this.clusterJobs = new Dictionary<string, ClusterJobInformation>();
            if (string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory))
                return;

            string joblist = Path.Combine(CachedClusterResidentObject.CacheDirectory, "jobs");
            if (!Directory.Exists(joblist))
                Directory.CreateDirectory(joblist);

            string[] files = Directory.GetFiles(joblist, "*.xml");
            foreach (var file in files)
            {
                manager.Token.ThrowIfCancellationRequested();
                DryadLinqJobSummary job = Utilities.LoadXml<DryadLinqJobSummary>(file);
                string cjid = job.Cluster + "-" + job.ClusterJobId; // there may be two jobs with same id from different clusters
                ClusterJobInformation ci = new ClusterJobInformation(this.Config.Name, job.Cluster, cjid, job.Name, job.User, job.Date, job.EndTime - job.Date, job.Status);
                ci.SetAssociatedSummary(job);
                if (this.clusterJobs.ContainsKey(cjid))
                {
                    manager.Status("Duplicate job id, cannot insert in cache " + job.AsIdentifyingString(), StatusKind.Error);
                    continue;
                }
                this.clusterJobs.Add(cjid, ci);
            }
            manager.Progress(100);
        }
Exemple #18
0
        /// <summary>
        /// Cache the vertices in the list; executed on the background thread.
        /// </summary>
        /// <returns>True: success.</returns>
        /// <param name="manager">Communication manager.</param>
        /// <param name="config">Cluster configuration.</param>
        /// <param name="summary">Job to cache.</param>
        /// <param name="vertices">Vertices to cache.</param>
        private static bool CacheAllVertices(
            ClusterConfiguration config, DryadLinqJobSummary summary, List<ExecutedVertexInstance> vertices,
            CommManager manager)
        {
            int done = 0;
            int todo = vertices.Count;
            int files = 0;
            manager.Status("Caching data for " + todo + " vertices", StatusKind.LongOp);
            foreach (ExecutedVertexInstance v in vertices)
            {
                files += CacheVertexInfo(config, summary, v);
                done++;
                manager.Progress(done / todo);
            }

            manager.Progress(100);
            manager.Status("Cached " + files + " files", StatusKind.OK);
            return true;
        }