/// <summary> /// The partitioned table is a partitioned file. /// </summary> /// <param name="statusReporter">Delegate used to report errors.</param> private void ParsePartitionedFile(StatusReporter statusReporter) { this.EstimatedSize = 0; try { if (!File.Exists(this.Uri)) { this.Error = "File not found"; statusReporter("Cannot find file " + this.Uri, StatusKind.Error); return; } PartitionedFileMetadata pfi = new PartitionedFileMetadata(new UNCPathname(this.Uri)); this.PartitionCount = pfi.NumberOfPartitions; foreach (var p in pfi.Partitions) { StaticPartitionInformation spi = new StaticPartitionInformation(p.Number, p.Size, p.NumberOfReplicas); this.partitions.Add(spi); if (spi.PartitionSize >= 0) { this.EstimatedSize += spi.PartitionSize; } } } catch (Exception ex) { this.Error = ex.Message; } }
/// <summary> /// The partitioned table is a partitioned file. /// </summary> /// <param name="statusReporter">Delegate used to report errors.</param> private void ParsePartitionedFile(StatusReporter statusReporter) { this.EstimatedSize = 0; try { if (!File.Exists(this.Uri)) { this.Error = "File not found"; statusReporter("Cannot find file " + this.Uri, StatusKind.Error); return; } PartitionedFileMetadata pfi = new PartitionedFileMetadata(new UNCPathname(this.Uri)); this.PartitionCount = pfi.NumberOfPartitions; foreach (var p in pfi.Partitions) { StaticPartitionInformation spi = new StaticPartitionInformation(p.Number, p.Size, p.NumberOfReplicas); this.partitions.Add(spi); if (spi.PartitionSize >= 0) this.EstimatedSize += spi.PartitionSize; } } catch (Exception ex) { this.Error = ex.Message; } }
/// <summary> /// Create a class representing the set of edges between two stages in the job plan. /// These collectively look like a partitioned table. /// If the source stage has multiple outputs there is not enough information to return meaningful information. /// </summary> /// <param name="job">Job whose slice we are displaying.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="source">Stage in the job which produces the data.</param> /// <param name="status">Delegate used to report errors.</param> /// <param name="showCancelled">If true include cancelled vertices.</param> public static StaticPartitionedTableInformation StageOutput( DryadLinqJobInfo job, DryadJobStaticPlan plan, DryadJobStaticPlan.Stage source, StatusReporter status, bool showCancelled) { string header = "Output of " + source.Name; // First check whether in the static plan this is virtual while (source.IsTee) { var sourceInputs = plan.GetStageConnections(source, true).ToList(); if (sourceInputs.Count() != 1) throw new DryadException("Unexpected number of inputs for stage " + source.Name); source = sourceInputs.First().From; } // If we reached the input return information about that input if (source.IsInput) { status("Scanning " + source.Name, StatusKind.LongOp); StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(job.ClusterConfiguration, source.UriType, source.Uri, source.Code, status); result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; return result; } else { StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(); result.Name = "Output of vertices in stage " + source.Name; result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; // Check whether this stage has multiple outputs; this can only happen for 'Fork' operators. var destinations = plan.GetStageConnections(source, false); if (destinations.Count() > 1) { result.Error = "Cannot provide information about one of multiple outputs of a stage."; return result; } DryadLinqJobStage stage = job.GetStage(source.Name); if (stage == null) { result.Error = "There is no information about the output of stage " + source.Name; return result; } result.EstimatedSize = 0; result.PartitionCount = stage.TotalInitiatedVertices; int count = 0; foreach (ExecutedVertexInstance vi in stage.Vertices) { if (vi.State == ExecutedVertexInstance.VertexState.Successful || vi.State == ExecutedVertexInstance.VertexState.Failed || (showCancelled && vi.State == ExecutedVertexInstance.VertexState.Cancelled)) { StaticPartitionInformation spi = new StaticPartitionInformation(count++, vi.DataWritten, vi.Name + " v." + vi.Version); result.AddPartition(spi); if (vi.DataWritten != -1) result.EstimatedSize += vi.DataWritten; } } return result; } }
/// <summary> /// Add a new partition. /// </summary> /// <param name="spi">Partition to add.</param> public void AddPartition(StaticPartitionInformation spi) { this.partitions.Add(spi); }
/// <summary> /// Create a class representing the set of edges between two stages in the job plan. /// These collectively look like a partitioned table. /// If the source stage has multiple outputs there is not enough information to return meaningful information. /// </summary> /// <param name="job">Job whose slice we are displaying.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="source">Stage in the job which produces the data.</param> /// <param name="status">Delegate used to report errors.</param> /// <param name="showCancelled">If true include cancelled vertices.</param> public static StaticPartitionedTableInformation StageOutput( DryadLinqJobInfo job, DryadJobStaticPlan plan, DryadJobStaticPlan.Stage source, StatusReporter status, bool showCancelled) { string header = "Output of " + source.Name; // First check whether in the static plan this is virtual while (source.IsTee) { var sourceInputs = plan.GetStageConnections(source, true).ToList(); if (sourceInputs.Count() != 1) { throw new DryadException("Unexpected number of inputs for stage " + source.Name); } source = sourceInputs.First().From; } // If we reached the input return information about that input if (source.IsInput) { status("Scanning " + source.Name, StatusKind.LongOp); StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(job.ClusterConfiguration, source.UriType, source.Uri, source.Code, status); result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; return(result); } else { StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(); result.Name = "Output of vertices in stage " + source.Name; result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; // Check whether this stage has multiple outputs; this can only happen for 'Fork' operators. var destinations = plan.GetStageConnections(source, false); if (destinations.Count() > 1) { result.Error = "Cannot provide information about one of multiple outputs of a stage."; return(result); } DryadLinqJobStage stage = job.GetStage(source.Name); if (stage == null) { result.Error = "There is no information about the output of stage " + source.Name; return(result); } result.EstimatedSize = 0; result.PartitionCount = stage.TotalInitiatedVertices; int count = 0; foreach (ExecutedVertexInstance vi in stage.Vertices) { if (vi.State == ExecutedVertexInstance.VertexState.Successful || vi.State == ExecutedVertexInstance.VertexState.Failed || (showCancelled && vi.State == ExecutedVertexInstance.VertexState.Cancelled)) { StaticPartitionInformation spi = new StaticPartitionInformation(count++, vi.DataWritten, vi.Name + " v." + vi.Version); result.AddPartition(spi); if (vi.DataWritten != -1) { result.EstimatedSize += vi.DataWritten; } } } return(result); } }