public static bool ToStoreSubmitGetEnumerator() // pass { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/ToStoreSubmitGetEnumerator.txt"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); var q1 = pt1.Select(x => 100 + x).HashPartition(x => x); var q2 = q1.Where(x => true); IQueryable <int> output = q2.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); DryadLinqJobInfo info = output.SubmitAndWait(); foreach (int x in output) // should not run a new dryad job. { //Console.WriteLine(x); } } catch (DryadLinqException) { passed &= false; } return(passed); }
public static bool CopyPlainDataViaToStoreMaterialize() { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/CopyPlainDataViaToStoreMaterialize.txt"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); DryadLinqJobInfo info = DryadLinqQueryable.Submit(q); info.Wait(); foreach (int x in q) { //Console.WriteLine(x); } passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); } catch (DryadLinqException) { passed &= false; } return(passed); }
public static bool Bug11782_Aggregate() { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/Bug11782_Aggregate.out"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); //test Aggregate() var c = pt1.Select(x => x).Aggregate((x, y) => x + y); //test AggregateAsQuery() var q = pt1.Select(x => x).AggregateAsQuery((x, y) => x + y).ToStore(outFile); DryadLinqJobInfo info = DryadLinqQueryable.Submit(q); info.Wait(); passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); } catch (DryadLinqException) { passed &= false; } return(passed); }
/// <summary> /// Create a DryadLinqJobSchedule object starting from the job information. /// </summary> /// <param name="jobInfo">Job whose schedule is computed.</param> /// <param name="hideCancelledVertices">If true do not show the cancelled vertices.</param> public DryadLinqJobSchedule(DryadLinqJobInfo jobInfo, bool hideCancelledVertices) { this.utilization = new MachineUtilization(); this.startTime = jobInfo.StartJMTime; this.X = jobInfo.RunningTime.TotalSeconds; this.vertices = new List <ExecutedVertexInstance>(); foreach (var stage in jobInfo.AllStages().ToList()) { foreach (var vertex in stage.Vertices) { if (hideCancelledVertices && vertex.State == ExecutedVertexInstance.VertexState.Cancelled) { continue; } string m = vertex.Machine; this.utilization.Add(m, vertex); this.vertices.Add(vertex); } } this.utilization.Sort(); this.Y = spacing * this.utilization.MachineCount; }
/// <summary> /// Create a FailureDiagnosis object. /// </summary> /// <param name="job">Job being diagnosed.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { this.Job = job; this.StaticPlan = plan; this.Manager = manager; this.Summary = job.Summary; this.cluster = job.ClusterConfiguration; }
/// <summary> /// Create a class to diagnose the problems of a vertex. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="plan">Plan of the executed job.</param> /// <param name="manager">Communication manager.</param> protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) : base(job, plan, manager) { this.Job = job; this.Vertex = vertex; // ReSharper disable once DoNotCallOverridableMethodsInConstructor this.stackTraceFile = "dryadLinqStackTrace.txt"; }
/// <summary> /// Generate a new view of the same partitioned table. /// </summary> /// <returns>The new view.</returns> /// <param name="statusReporter">Delegate used to report errors.</param> /// <param name="job">Job containing the table.</param> /// <param name="showCancelled">Show the cancelled vertices.</param> public StaticPartitionedTableInformation Refresh(DryadLinqJobInfo job, StatusReporter statusReporter, bool showCancelled) { if (this.constructorArguments.code != null) { return(new StaticPartitionedTableInformation(this.Config, this.UriType, this.Uri, this.constructorArguments.code, statusReporter)); } else { return(StageOutput(job, this.constructorArguments.plan, this.constructorArguments.source, statusReporter, showCancelled)); } }
/// <summary> /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of JobFailureDiagnosis with the type appropriate for the job.</returns> /// <param name="plan">Plan of the job being diagnosed.</param> public static JobFailureDiagnosis CreateJobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) { config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); } throw new InvalidOperationException("Configuration of type " + config.TypeOfCluster + " not supported for diagnosis"); }
/* * public static bool PlainEnumerableAsDryadQueryToStoreSubmit() * { * var context = new DryadLinqContext(Config.cluster); * context.LocalExecution = false; * bool passed = true; * try * { * string outFile = "unittest/output/PlainEnumerableAsDryadQueryToStoreSubmit.txt"; * * int[] plainData = { 5, 6, 7 }; * * var q = context.AsDryadQuery(plainData, CompressionScheme.None).ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile); * DryadLinqJobInfo info = q.Submit(); * info.Wait(); * * foreach (int x in q) * { * //Console.WriteLine(x); * } * * passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); * } * catch (DryadLinqException e) * { * passed &= false; * } * return passed; * } */ public static bool RepeatSubmit() { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/RepeatSubmit.txt"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); DryadLinqJobInfo info1 = null; DryadLinqJobInfo info2 = null; try { info1 = q.Submit(); info2 = q.Submit(); // does not throw if (!context.LocalDebug) { passed &= false; } } catch (ArgumentException) { passed &= true; } //wait for any jobs to complete. if (info1 != null) { info1.Wait(); } if (info2 != null) { info2.Wait(); } } catch (DryadLinqException) { passed &= false; } return(passed); }
/// <summary> /// Try to find the job information from cluster and summary. /// </summary> /// <param name="manager">Communication manager.</param> protected void FindJobInfo(CommManager manager) { DryadLinqJobInfo jobinfo = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.cluster, this.Summary, true, manager); if (jobinfo == null) { manager.Status("Cannot collect information for " + Summary.ShortName() + " to diagnose", StatusKind.Error); return; } this.Job = jobinfo; this.StaticPlan = JobObjectModel.DryadJobStaticPlan.CreatePlan(jobinfo, manager); }
/// <summary> /// Create a VertexFailureDiagnosis of the appropriate type. /// </summary> /// <param name="vertex">Vertex to diagnose.</param> /// <param name="job">Job containing the vertex.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of VertexFailureDiagnosis.</returns> /// <param name="plan">Plan of the executed job.</param> public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager) { ClusterConfiguration config = job.ClusterConfiguration; if (config is CacheClusterConfiguration) { config = (config as CacheClusterConfiguration).ActualConfig(job.Summary); } throw new InvalidOperationException("Config of type " + config.TypeOfCluster + " not handled"); }
/// <summary> /// Create a form to show the diagnosis result. /// </summary> /// <param name="job">Job diagnosed; may be null.</param> /// <param name="log">Diagnosis log.</param> /// <param name="summary">Job summary.</param> public DiagnosisResult(DryadLinqJobInfo job, DryadLinqJobSummary summary, DiagnosisLog log) { this.InitializeComponent(); this.job = job; if (this.job == null) { this.button_job.Enabled = false; } // ReSharper disable once DoNotCallOverridableMethodsInConstructor this.Text = "Diagnosis results for " + summary.Name + " " + summary.Date; this.textBox_job.Text = "Job being diangosed: " + summary.AsIdentifyingString(); foreach (string s in log.Message()) { this.textBox_message.AppendText(s); this.textBox_message.AppendText(Environment.NewLine); } }
public static bool MaterializeMentionsSameQueryTwice() // pass { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/MaterializeMentionsSameQueryTwice.txt"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); var q = pt1.ToStore(AzureUtils.ToAzureUri(Config.accountName, Config.storageKey, Config.containerName, outFile), true); DryadLinqJobInfo info1 = null; try { info1 = DryadLinqQueryable.Submit(q, q); //materialize // throws passed &= false; // for Config.cluster execution, second materialize should throw; } catch (ArgumentException) { passed &= true; } //wait for any jobs to complete. if (info1 != null) { info1.Wait(); } } catch (DryadLinqException) { passed &= false; } return(passed); }
/// <summary> /// Start a job browser on the specified job. /// </summary> /// <param name="sender">Unused.</param> /// <param name="e">Unused.</param> private void jobBrowserToolStripMenuItem_Click(object sender, EventArgs e) { IEnumerable <ClusterJobInformation> ti = this.SelectedJobs(); this.Status("Starting job browser...", StatusKind.LongOp); IEnumerable <DryadLinqJobSummary> jobs = ti.Select(t => t.DiscoverDryadLinqJob(this.clusterStatus, this.Status)).ToList(); CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); IEnumerable <DryadLinqJobInfo> detailed = jobs.Select(j => DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, j, false, manager)); foreach (DryadLinqJobInfo j in detailed) { if (j == null) { continue; } JobBrowser jb = new JobBrowser(j); jb.Show(); } this.Status("OK", StatusKind.OK); }
/// <summary> /// Start the job browser from a job summary. /// </summary> /// <param name="js">Job summary to browse.</param> private void browseFromJobSummary(DryadLinqJobSummary js) { if (js == null) { return; } // TODO: this should run in the background CommManager manager = new CommManager(this.Status, delegate { }, new System.Threading.CancellationTokenSource().Token); DryadLinqJobInfo job = DryadLinqJobInfo.CreateDryadLinqJobInfo(this.clusterStatus.Config, js, false, manager); if (job != null) { JobBrowser browser = new JobBrowser(job); browser.Show(); this.Status("OK", StatusKind.OK); } else { this.Status("Could not find information about job", StatusKind.Error); } }
public static bool Bug11781_CountandFirstOrDefault() { var context = new DryadLinqContext(Config.cluster); context.LocalExecution = false; bool passed = true; try { string outFile = "unittest/output/Bug11781.out"; IQueryable <LineRecord> input = context.FromStore <LineRecord>(AzureUtils.ToAzureUri(Config.accountName, Config.containerName, "unittest/inputdata/SimpleFile.txt")); IQueryable <IEnumerable <int> > simple = input.Apply(x => DataGenerator.CreateSimpleFileSets()); IQueryable <int> pt1 = simple.Select(x => x.First()); //Test Count() var c = pt1.Count(); //Test CountAsQuery() var q = pt1.CountAsQuery().ToStore(outFile); DryadLinqJobInfo info = q.Submit(); info.Wait(); passed &= Utils.FileExists(Config.accountName, Config.storageKey, Config.containerName, outFile); // Also test FirstOrDefault // the affected code for dlq.Execute() also has a branch for FirstOrDefault() and friends. int y = pt1.FirstOrDefault(); } catch (DryadLinqException) { passed &= false; } return(passed); }
/// <summary> /// Create a class to diagnose the problems of a job. /// </summary> /// <param name="job">Job to diagnose.</param> /// <param name="plan">Plan of the diagnosed job.</param> /// <param name="manager">Communication manager.</param> protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) : base(job, plan, manager) { this.diagnosisLog = new DiagnosisLog(job, job.Summary); this.jobManager = this.Job.ManagerVertex; }
/// <summary> /// Create a new diagnostic log. /// </summary> public DiagnosisLog(DryadLinqJobInfo job, DryadLinqJobSummary summary) { this.messages = new List <DiagnosisMessage>(); this.Summary = summary; this.Job = job; }
/// <summary> /// Create a class representing the set of edges between two stages in the job plan. /// These collectively look like a partitioned table. /// If the source stage has multiple outputs there is not enough information to return meaningful information. /// </summary> /// <param name="job">Job whose slice we are displaying.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="source">Stage in the job which produces the data.</param> /// <param name="status">Delegate used to report errors.</param> /// <param name="showCancelled">If true include cancelled vertices.</param> public static StaticPartitionedTableInformation StageOutput( DryadLinqJobInfo job, DryadJobStaticPlan plan, DryadJobStaticPlan.Stage source, StatusReporter status, bool showCancelled) { string header = "Output of " + source.Name; // First check whether in the static plan this is virtual while (source.IsTee) { var sourceInputs = plan.GetStageConnections(source, true).ToList(); if (sourceInputs.Count() != 1) { throw new DryadException("Unexpected number of inputs for stage " + source.Name); } source = sourceInputs.First().From; } // If we reached the input return information about that input if (source.IsInput) { status("Scanning " + source.Name, StatusKind.LongOp); StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(job.ClusterConfiguration, source.UriType, source.Uri, source.Code, status); result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; return(result); } else { StaticPartitionedTableInformation result = new StaticPartitionedTableInformation(); result.Name = "Output of vertices in stage " + source.Name; result.Header = "Output of " + header; result.constructorArguments = new SaveConstructorArguments { code = null, source = source, plan = plan }; // Check whether this stage has multiple outputs; this can only happen for 'Fork' operators. var destinations = plan.GetStageConnections(source, false); if (destinations.Count() > 1) { result.Error = "Cannot provide information about one of multiple outputs of a stage."; return(result); } DryadLinqJobStage stage = job.GetStage(source.Name); if (stage == null) { result.Error = "There is no information about the output of stage " + source.Name; return(result); } result.EstimatedSize = 0; result.PartitionCount = stage.TotalInitiatedVertices; int count = 0; foreach (ExecutedVertexInstance vi in stage.Vertices) { if (vi.State == ExecutedVertexInstance.VertexState.Successful || vi.State == ExecutedVertexInstance.VertexState.Failed || (showCancelled && vi.State == ExecutedVertexInstance.VertexState.Cancelled)) { StaticPartitionInformation spi = new StaticPartitionInformation(count++, vi.DataWritten, vi.Name + " v." + vi.Version); result.AddPartition(spi); if (vi.DataWritten != -1) { result.EstimatedSize += vi.DataWritten; } } } return(result); } }