/// <summary> /// Create a form to show the diagnosis result. /// </summary> /// <param name="job">Job diagnosed; may be null.</param> /// <param name="log">Diagnosis log.</param> /// <param name="summary">Job summary.</param> public DiagnosisResult(DryadLinqJobInfo job, DryadLinqJobSummary summary, DiagnosisLog log) { this.InitializeComponent(); this.job = job; if (this.job == null) this.button_job.Enabled = false; // ReSharper disable once DoNotCallOverridableMethodsInConstructor this.Text = "Diagnosis results for " + summary.Name + " " + summary.Date; this.textBox_job.Text = "Job being diangosed: " + summary.AsIdentifyingString(); foreach (string s in log.Message()) { this.textBox_message.AppendText(s); this.textBox_message.AppendText(Environment.NewLine); } }
/// <summary> /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of JobFailureDiagnosis with the type appropriate for the job.</returns> /// <param name="plan">Plan of the job being diagnosed.</param> public static JobFailureDiagnosis CreateJobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); throw new InvalidOperationException("Configuration of type " + config.TypeOfCluster + " not supported for diagnosis"); }
/// <summary> /// Create a VertexFailureDiagnosis of the appropriate type. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of VertexFailureDiagnosis.</returns> /// <param name="plan">Plan of the executed job.</param> public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); throw new InvalidOperationException("Config of type " + config.TypeOfCluster + " not handled"); }
/// <summary> /// Create a class to diagnose the problems of a job. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="plan">Plan of the diagnosed job.</param> /// <param name="manager">Communication manager.</param> protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) : base(job, plan, manager) { this.diagnosisLog = new DiagnosisLog(job, job.Summary); this.jobManager = this.Job.ManagerVertex; }
/// <summary> /// Create a FailureDiagnosis object. /// </summary> /// <param name="job">Job being diagnosed.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { this.Job = job; this.StaticPlan = plan; this.Manager = manager; this.Summary = job.Summary; this.cluster = job.ClusterConfiguration; }
/// <summary> /// Create a class to diagnose the problems of a vertex. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="plan">Plan of the executed job.</param> /// <param name="manager">Communication manager.</param> protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) : base(job, plan, manager) { this.Job = job; this.Vertex = vertex; // ReSharper disable once DoNotCallOverridableMethodsInConstructor this.stackTraceFile = "dryadLinqStackTrace.txt"; }
/// <summary> /// Initialize a job browser for a specified job. /// </summary> /// <param name="job">Job to display.</param> public JobBrowser(DryadLinqJobInfo job) { this.doingStartup = true; this.InitializeComponent(); this.queueWorker = new BackgroundWorker(); this.queue = new BackgroundWorkQueue(this.queueWorker, this.toolStripStatusLabel_currentWork, this.toolStripStatusLabel_backgroundWork); this.WarnedAboutDebugging = false; this.status = new StatusWriter(this.toolStripStatusLabel, this.statusStrip, this.Status); this.refreshTimer = new System.Windows.Forms.Timer(); this.refreshTimer.Interval = 30000; // 30 seconds this.refreshTimer.Tick += this.refreshTimer_Tick; #region SET_JOB_HEADER this.jobHeaderData = new BindingListSortable<PropertyEnumerator<DryadLinqJobInfo>.PropertyValue>(); this.dataGridView_jobHeader.DataSource = this.jobHeaderData; this.SetDataGridViewColumnsSize(this.dataGridView_jobHeader); this.jobPropertyEnumerator = new PropertyEnumerator<DryadLinqJobInfo>(); this.jobPropertyEnumerator.ValueFormatter = this.PropertyValueFormatter; List<string> jobPropertiesToSkip = new List<string> { "ClusterConfiguration", "Processes", "Vertices", "JM", "Name", "JobManagerVertex", "JMStdoutIncomplete", "JobInfoCannotBeCollected" }; this.jobPropertyEnumerator.Skip(jobPropertiesToSkip); this.jobPropertyEnumerator.Expand("Summary"); #endregion #region SET_STAGE_HEADER this.stageHeaderData = new BindingListSortable<PropertyEnumerator<DryadLinqJobStage>.PropertyValue>(); this.stagePropertyEnumerator = new PropertyEnumerator<DryadLinqJobStage>(); this.stagePropertyEnumerator.ValueFormatter = this.PropertyValueFormatter; List<string> stagePropertiesToSkip = new List<string> { "Vertices", "Name" }; this.stagePropertyEnumerator.Skip(stagePropertiesToSkip); this.tableHeaderData = new BindingListSortable<PropertyEnumerator<StaticPartitionedTableInformation>.PropertyValue>(); this.tablePropertyEnumerator = new PropertyEnumerator<StaticPartitionedTableInformation>(); this.tablePropertyEnumerator.ValueFormatter = this.PropertyValueFormatter; this.tablePropertyEnumerator.Skip("Partitions", "Header", "Code"); this.SetNoStageOrTable("", false); #endregion #region SET_STAGE_DATA this.stageData = new BindingListSortable<ExecutedVertexInstance>(); this.tablePartitionsData = new BindingListSortable<StaticPartitionedTableInformation.StaticPartitionInformation>(); #endregion #region SET_VERTEX_HEADER this.vertexHeaderData = new BindingListSortable<PropertyEnumerator<ExecutedVertexInstance>.PropertyValue>(); this.dataGridView_vertexHeader.DataSource = this.vertexHeaderData; this.SetDataGridViewColumnsSize(this.dataGridView_vertexHeader); this.vertexPropertyEnumerator = new PropertyEnumerator<ExecutedVertexInstance>(); this.vertexPropertyEnumerator.ValueFormatter = this.PropertyValueFormatter; List<string> vertexPropertiesToSkip = new List<string> { "JobSummary", "InputChannels", "OutputChannels", "Name", "LogFilesPattern", "IsManager" }; this.vertexPropertyEnumerator.Skip(vertexPropertiesToSkip); #endregion // Disable the vertex context menu, since none of these operatios work at this point this.contextMenu_stageVertex.Enabled = false; this.plansHaveBeenBuilt = false; this.graphViewer = new Msagl.GraphViewerGdi.GViewer(); this.graphViewer.Dock = DockStyle.Fill; this.graphViewer.NavigationVisible = false; this.graphViewer.ToolBarIsVisible = false; this.graphViewer.MouseClick += this.graphViewer_MouseClick; this.graphViewer.MouseDoubleClick += this.graphViewer_MouseDoubleClick; this.graphViewer.InsertingEdge = false; this.staticGraphZoomLevel = 0; this.planDrawSurface = new DrawingSurface2D(this.panel_jobSchedule); this.planDrawSurface.SetMargins(4, 4, 4, 4); this.panel_jobSchedule.MouseDoubleClick += this.panel_jobSchedule_MouseDoubleClick; this.planDrawSurface.FastDrawing = false; this.colorByStagestatusToolStripMenuItem.Checked = true; this.defaultBackColor = this.label_job.BackColor; this.planVisible = PlanVisible.None; this.linkCache = new Dictionary<string, IClusterResidentObject>(); this.mouseIsHeld = false; this.draggingMouse = false; this.drawingSurfaceSize = 0.0; #region TOOLTIPS ToolTip help = new ToolTip(); help.SetToolTip(this.richTextBox_file, "Click on links to follow; control-click to open in explorer; alt-click to follow an input channel to its source."); help.SetToolTip(this.panel_scheduleContainer, "Displays the job schedule; click to select."); help.SetToolTip(this.checkBox_refresh, "Refreshes the job status ever 30s."); help.SetToolTip(this.graphViewer, "Click to select stages; Ctrl +/- to zoom."); help.SetToolTip(this.dataGridView_jobHeader, "Selecting some rows filters the data."); help.SetToolTip(this.comboBox_plan, "Display the job plan in various forms."); help.SetToolTip(this.comboBox_vertexInformation, "Display more information about the vertex."); help.SetToolTip(this.label_job, "Global job information."); help.SetToolTip(this.label_stage, "Information about the selected stage/table. Select rows for filtering."); help.SetToolTip(this.label_Vertex, "Information about the selected vertex."); help.SetToolTip(this.panel_jobSchedule, "Click on the stages or tables for more information; drag to zoom."); help.SetToolTip(this.textBox_stageCode, "Code executed by the selected stage."); help.SetToolTip(this.textBox_find, "Type a string to find."); help.SetToolTip(this.button_clearFind, "Stop finding."); help.SetToolTip(this.button_filter, "Show only lines matching string to find (case-sensitive)."); help.SetToolTip(this.button_findNext, "Find next occurence of string."); help.SetToolTip(this.button_findPrev, "Find previous occurence of string."); help.SetToolTip(this.label_title, "File currently displayed."); #endregion this.Job = job; }
/// <summary> /// Create a new diagnostic log. /// </summary> public DiagnosisLog(DryadLinqJobInfo job, DryadLinqJobSummary summary) { this.messages = new List<DiagnosisMessage>(); this.Summary = summary; this.Job = job; }
/// <summary> /// The vertex has started. /// </summary> /// <param name="machine">Machine on which vertex is run.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="approxStartTime">Approximate starting time (the real value is known when the vertex is terminated).</param> /// <param name="identifier">Id of process running this vertex (several vertices may share a process).</param> /// <param name="uniqueId">Unique identifier.</param> public void SetStartInformation(DryadLinqJobInfo job, string machine, DateTime approxStartTime, DryadProcessIdentifier identifier, string uniqueId) { this.Machine = machine; this.Start = approxStartTime; this.ProcessIdentifier = identifier; this.WorkDirectory = job.ClusterConfiguration.ProcessWorkDirectory(this.ProcessIdentifier, false, machine, job.Summary); this.StdoutFile = job.ClusterConfiguration.ProcessStdoutFile(this.ProcessIdentifier, false, machine, job.Summary); this.SetState(VertexState.Started); if (approxStartTime == DateTime.MinValue) throw new DryadException("Unexpected small start time for vertex"); this.LogDirectory = job.ClusterConfiguration.ProcessLogDirectory(this.ProcessIdentifier, false, machine, job.Summary); this.LogFilesPattern = job.ClusterConfiguration.VertexLogFilesPattern(false, job.Summary); this.UniqueID = uniqueId; if (this.StdoutFile != null) this.StdoutFile.ShouldCacheLocally = false; // don't cache until vertex proved terminated if (this.LogDirectory != null) this.LogDirectory.ShouldCacheLocally = false; }
/// <summary> /// Create information about a job run on the cluster. /// </summary> /// <param name="cf">Cluster configuration.</param> /// <param name="summary">Summary description of the job.</param> /// <returns>The Dryad job description, or null.</returns> /// <param name="fill">If true, fill all the information, otherwise the user will have to call FillInformation on the result later.</param> /// <param name="manager">Communication manager.</param> public static DryadLinqJobInfo CreateDryadLinqJobInfo(ClusterConfiguration cf, DryadLinqJobSummary summary, bool fill, CommManager manager) { try { DryadLinqJobInfo job = new DryadLinqJobInfo(cf, summary); if (fill) job.CollectEssentialInformation(manager); return job; } catch (Exception e) { Trace.TraceInformation(e.ToString()); manager.Status("Could not collect job information for " + summary.Name + ": " + e.Message, StatusKind.Error); return null; } }
/// <summary> /// Create a vertex information. /// </summary> /// <param name="job">Information about the current job.</param> /// <param name="number">Vertex number, unique in job.</param> /// <param name="version">Vertex version.</param> /// <param name="name">Name of vertex in graph.</param> /// <param name="uniqueId">Unique vertex identifier; on some platforms the value is not correct at this point.</param> /// <param name="timeStamp">Time when vertex was created; maybe MinValue if unknown.</param> public ExecutedVertexInstance(DryadLinqJobInfo job, int number, int version, string name, string uniqueId, DateTime timeStamp) { this.Number = number; this.Name = name; this.Version = version; this.ProcessIdentifier = new DryadProcessIdentifier(); this.IsManager = false; this.DataRead = -1; this.DataWritten = -1; this.State = VertexState.Created; this.error = ""; this.Machine = ""; this.timingSet = false; this.UniqueID = uniqueId; this.ClusterConfigType = job.ClusterConfiguration.GetType().ToString(); this.channelsAreFinal = false; this.ComputeStageName(); this.CreationTime = timeStamp; this.Start = this.StartCommandTime = this.VertexScheduleTime = this.End = DateTime.MinValue; }
/// <summary> /// Factory: create the plan for a given job. /// </summary> /// <param name="dryadLinqJobInfo">Job to create plan for.</param> /// <returns>The plan or null.</returns> /// <param name="manager">Communication manager.</param> public static DryadJobStaticPlan CreatePlan(DryadLinqJobInfo dryadLinqJobInfo, CommManager manager) { manager.Status("Trying to build static plan", StatusKind.LongOp); ClusterConfiguration config = dryadLinqJobInfo.ClusterConfiguration; IClusterResidentObject file = config.JobQueryPlan(dryadLinqJobInfo.Summary); if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(dryadLinqJobInfo.Summary); if (file.Exception == null) { DryadJobStaticPlan retval; { retval = new DryadLinqJobStaticPlan(config, file.GetStream()); } retval.ParseQueryPlan(manager); return retval; } else { manager.Status("Exception while looking for plan " + file.Exception.Message, StatusKind.Error); return null; } }
/// <summary> /// Generate a new view of the same partitioned table. /// </summary> /// <returns>The new view.</returns> /// <param name="statusReporter">Delegate used to report errors.</param> /// <param name="job">Job containing the table.</param> /// <param name="showCancelled">Show the cancelled vertices.</param> public StaticPartitionedTableInformation Refresh(DryadLinqJobInfo job, StatusReporter statusReporter, bool showCancelled) { if (this.constructorArguments.code != null) return new StaticPartitionedTableInformation(this.Config, this.UriType, this.Uri, this.constructorArguments.code, statusReporter); else return StageOutput(job, this.constructorArguments.plan, this.constructorArguments.source, statusReporter, showCancelled); }
/// <summary> /// Create a class representing the set of edges between two stages in the job plan. /// These collectively look like a partitioned table. /// If the source stage has multiple outputs there is not enough information to return meaningful information. /// </summary> /// <param name="job">Job whose slice we are displaying.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="source">Stage in the job which produces the data.</param> /// <param name="status">Delegate used to report errors.</param> /// <param name="showCancelled">If true include cancelled vertices.</param> public static StaticPartitionedTableInformation StageOutput( DryadLinqJobInfo job, DryadJobStaticPlan plan, DryadJobStaticPlan.Stage source, StatusReporter status, bool showCancelled) { string header = "Output of " + source.Name; // First check whether in the static plan this is virtual while (source.IsTee) { var sourceInputs = plan.GetStageConnections(source, true).ToList(); if (sourceInputs.Count() != 1) throw new DryadException("Unexpected number of inputs for stage " + source.Name); source = sourceInputs.First().From; } // If we reached the input return information about that input if (source.IsInput) { status("Scanning " + source.Name, StatusKind.LongOp); StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(job.ClusterConfiguration, source.UriType, source.Uri, source.Code, status); result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; return result; } else { StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(); result.Name = "Output of vertices in stage " + source.Name; result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; // Check whether this stage has multiple outputs; this can only happen for 'Fork' operators. var destinations = plan.GetStageConnections(source, false); if (destinations.Count() > 1) { result.Error = "Cannot provide information about one of multiple outputs of a stage."; return result; } DryadLinqJobStage stage = job.GetStage(source.Name); if (stage == null) { result.Error = "There is no information about the output of stage " + source.Name; return result; } result.EstimatedSize = 0; result.PartitionCount = stage.TotalInitiatedVertices; int count = 0; foreach (ExecutedVertexInstance vi in stage.Vertices) { if (vi.State == ExecutedVertexInstance.VertexState.Successful || vi.State == ExecutedVertexInstance.VertexState.Failed || (showCancelled && vi.State == ExecutedVertexInstance.VertexState.Cancelled)) { StaticPartitionInformation spi = new StaticPartitionInformation(count++, vi.DataWritten, vi.Name + " v." + vi.Version); result.AddPartition(spi); if (vi.DataWritten != -1) result.EstimatedSize += vi.DataWritten; } } return result; } }
/// <summary> /// Create a DryadLinqJobSchedule object starting from the job information. /// </summary> /// <param name="jobInfo">Job whose schedule is computed.</param> /// <param name="hideCancelledVertices">If true do not show the cancelled vertices.</param> public DryadLinqJobSchedule(DryadLinqJobInfo jobInfo, bool hideCancelledVertices) { this.utilization = new MachineUtilization(); this.startTime = jobInfo.StartJMTime; this.X = jobInfo.RunningTime.TotalSeconds; this.vertices = new List<ExecutedVertexInstance>(); foreach (var stage in jobInfo.AllStages().ToList()) { foreach (var vertex in stage.Vertices) { if (hideCancelledVertices && vertex.State == ExecutedVertexInstance.VertexState.Cancelled) continue; string m = vertex.Machine; this.utilization.Add(m, vertex); this.vertices.Add(vertex); } } this.utilization.Sort(); this.Y = spacing * this.utilization.MachineCount; }