/// <summary> /// Create a cluster status. /// </summary> /// <param name="config">Cluster configuration.</param> protected ClusterStatus(ClusterConfiguration config) { this.Config = config; this.Initialize(); if (ClusterStatuses.ContainsKey(config.Name)) ClusterStatuses[config.Name] = this; else ClusterStatuses.Add(config.Name, this); }
/// <summary> /// See if a status is already cached. /// </summary> /// <param name="config">Cluster configuration.</param> /// <returns>The cached status.</returns> public static ClusterStatus LookupStatus(ClusterConfiguration config) { if (ClusterStatuses.ContainsKey(config.Name)) { var retval = ClusterStatuses[config.Name]; if (retval.Config.Equals(config)) return retval; } return null; }
/// <summary> /// Creates a new instance of this class in preparation for debugging a vertex locally. /// </summary> /// <param name="guid">Guid for the vertex to debug.</param> /// <param name="config">Cluster where job debugged is running.</param> /// <param name="vertexWorkingDirPath">Path to the (remote) working directory of the vertex.</param> /// <param name="statusWriter">Used to display status messages nicely.</param> /// <param name="version">Vertex version to debug.</param> /// <param name="managed">If true debug managed version.</param> /// <param name="cpuSampling">If true perform cpu sampling based profiling.</param> /// <param name="number">Vertex number.</param> public LocalDebuggingAndProfiling(ClusterConfiguration config, string guid, int number, int version, IClusterResidentObject vertexWorkingDirPath, bool managed, bool cpuSampling, StatusReporter statusWriter) { this.cluster = config; this.workingDirPath = (vertexWorkingDirPath as UNCFile).Pathname; this.guid = guid; this.reporter = statusWriter; this.cpuSampling = cpuSampling; this.number = number; this.version = version; if (!managed) throw new Exception("Unmanaged debugging not supported"); }
/// <summary> /// Cache the interesting files of this vertex. /// </summary> /// <param name="v">Vertex whose files should be cached.</param> /// <returns>Number of files cached.</returns> /// <param name="config">Cluster configuration.</param> /// <param name="summary">Job summary.</param> private static int CacheVertexInfo(ClusterConfiguration config, DryadLinqJobSummary summary, ExecutedVertexInstance v) { int cached = 0; IClusterResidentObject folder = config.ProcessWorkDirectory(v.ProcessIdentifier, v.VertexIsCompleted, v.Machine, summary); if (folder == null || folder.Exception != null) return 0; foreach (IClusterResidentObject file in folder.GetFilesAndFolders("*")) { if (file.RepresentsAFolder) continue; if (!Utilities.FileNameIndicatesTextFile(file.Name)) { continue; } if (!file.ShouldCacheLocally) { continue; } ISharedStreamReader reader = file.GetStream(); // ReSharper disable once UnusedVariable foreach (string line in reader.ReadAllLines()) { // discard; causes caching } cached++; } return cached; }
/// <summary> /// Create a fake cluster status. /// </summary> /// <param name="config">Configuration to use for this cluster.</param> public CacheClusterStatus(ClusterConfiguration config) : base(config) { if (!(config is CacheClusterConfiguration)) throw new ArgumentException("Expected configuration to be for a cache cluster"); }
/// <summary> /// Create a suitable Job Failure diagnosis object for the job being analyzed. /// </summary> /// <param name="summary">Job to diagnose.</param> /// <param name="config">Cluster where job resides.</param> /// <param name="manager">Communication manager.</param> /// <returns>A subclass of JobFailureDiagnosis with the type appropriate for the job.</returns> public static JobFailureDiagnosis CreateJobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { if (config is CacheClusterConfiguration) config = (config as CacheClusterConfiguration).ActualConfig(summary); throw new InvalidOperationException("Configuration of type " + config.TypeOfCluster + " not supported for diagnosis"); }
/// <summary> /// Empty information about a partitioned table. /// </summary> private StaticPartitionedTableInformation() { this.constructorArguments = null; this.Config = null; this.UriType = ""; this.Error = ""; this.Name = ""; this.Header = ""; this.PartitionCount = -1; this.partitions = new List<StaticPartitionInformation>(); this.EstimatedSize = -1; }
/// <summary> /// Create a cluster containing just the local machine. /// </summary> /// <param name="conf">Configuration for the local machine.</param> public WebHdfsClusterStatus(ClusterConfiguration conf) : base(conf) { if (!(conf is WebHdfsClusterConfiguration)) throw new ArgumentException("Expected a WebHdfsClusterConfiguration, got a " + conf.GetType()); this.config = conf as WebHdfsClusterConfiguration; this.yarnClient = new NativeYarnClient(this.config.StatusNode, this.config.StatusNodePort, new HdfsClient(this.config.UserName)); }
private DfsFile(ClusterConfiguration config, DryadLinqJobSummary job, Exception ex) : base(config, job) { this.Exception = ex; }
/// <summary> /// Initialize an empty cached cluster resident object. /// </summary> /// <param name="config">Cluster where the file resides.</param> /// <param name="job">Job who owns these files.</param> protected CachedClusterResidentObject(ClusterConfiguration config, DryadLinqJobSummary job) { this.cacheWriter = null; this.tempFileName = null; this.Job = job; this.Config = config; }
/// <summary> /// Create a cluster containing just the local machine. /// </summary> /// <param name="config">Configuration for the local machine.</param> public AzureDfsClusterStatus(ClusterConfiguration config) : base(config) { if (!(config is AzureDfsClusterConfiguration)) throw new ArgumentException("Expected a AzureDfsClusterConfiguration, got a " + config.GetType()); this.config = config as AzureDfsClusterConfiguration; }
/// <summary> /// Create information about a job run on the cluster. /// </summary> /// <param name="cf">Cluster configuration.</param> /// <param name="summary">Summary description of the job.</param> /// <returns>The Dryad job description, or null.</returns> /// <param name="fill">If true, fill all the information, otherwise the user will have to call FillInformation on the result later.</param> /// <param name="manager">Communication manager.</param> public static DryadLinqJobInfo CreateDryadLinqJobInfo(ClusterConfiguration cf, DryadLinqJobSummary summary, bool fill, CommManager manager) { try { DryadLinqJobInfo job = new DryadLinqJobInfo(cf, summary); if (fill) job.CollectEssentialInformation(manager); return job; } catch (Exception e) { Trace.TraceInformation(e.ToString()); manager.Status("Could not collect job information for " + summary.Name + ": " + e.Message, StatusKind.Error); return null; } }
/// <summary> /// Read the information about a job which ran the JM on the cluster /// </summary> /// <param name="cf">Configuration of the cluster.</param> /// <param name="summary">Summary of the job.</param> protected DryadLinqJobInfo(ClusterConfiguration cf, DryadLinqJobSummary summary) { this.JobInfoCannotBeCollected = true; this.ClusterConfiguration = cf; if (cf is CacheClusterConfiguration) this.OriginalClusterConfiguration = (cf as CacheClusterConfiguration).ActualConfig(summary); else this.OriginalClusterConfiguration = cf; this.Initialize(summary); }
/// <summary> /// Create a ScopeJobStaticPlan. /// </summary> /// <param name="config">Cluster configuration.</param> /// <param name="planFile">Stream containing the file.</param> /// <param name="vertexDef">File containing the vertex definition (ScopeVertexDef.xml).</param> // ReSharper disable once UnusedParameter.Local public ScopeJobStaticPlan(ClusterConfiguration config, ISharedStreamReader planFile, ISharedStreamReader vertexDef) : base(planFile) { this.vertexDef = vertexDef; }
/// <summary> /// Create a DryadLinqJobStaticPlan. /// </summary> /// <param name="config">Cluster configuration.</param> /// <param name="planFile">Stream containing the file.</param> // ReSharper disable once UnusedParameter.Local public DryadLinqJobStaticPlan(ClusterConfiguration config, ISharedStreamReader planFile) : base(planFile) { }
private void ConfigurationChanged(ClusterConfiguration conf) { if (conf == null) return; // you cannot have two cache clusters at the same time if (conf is CacheClusterConfiguration) { foreach (var name in ClusterConfiguration.GetKnownClusterNames()) { var config = ClusterConfiguration.KnownClusterByName(name); if (config is CacheClusterConfiguration) { DialogResult res = MessageBox.Show("You cannot have two cache clusters at once: " + conf.Name + " and " + config.Name + "\nPress OK to use " + conf.Name + " instead of " + config.Name); if (res == System.Windows.Forms.DialogResult.OK) { ClusterConfiguration.RemoveKnownCluster(config.Name); (config as CacheClusterConfiguration).StopCaching(); (conf as CacheClusterConfiguration).StartCaching(); } else { return; } } } } ClusterConfiguration.AddKnownCluster(conf); this.AddClusterNameToMenu(conf.Name); this.Status("Added cluster " + conf.Name, StatusKind.OK); }
/// <summary> /// Create a cluster containing just the local machine. /// </summary> /// <param name="config">Configuration for the local machine.</param> public YarnEmulatedClusterStatus(ClusterConfiguration config) : base(config) { if (!(config is LocalEmulator)) throw new ArgumentException("Expected a LocalMachineConfiguration, got a " + config.GetType()); this.config = config as LocalEmulator; }
/// <summary> /// Create a cluster resident object corresponding to a given pathname. /// </summary> /// <param name="path">Path to the cluster-resident object.</param> /// <param name="config">Cluster where the file resides.</param> /// <param name="shouldCache">If true the file should be cached.</param> /// <param name="job">Job who owns this file.</param> public UNCFile(ClusterConfiguration config, DryadLinqJobSummary job, UNCPathname path, bool shouldCache) : base(config, job) { this.Pathname = path; this.Exception = null; this.ShouldCacheLocally = shouldCache; //if (! this.RepresentsAFolder) this.LocalCachePath = this.CachePath(this.Pathname); }
/// <summary> /// Create a cluster containing just the local machine. /// </summary> /// <param name="config">Configuration for the local machine.</param> protected DfsClusterStatus(ClusterConfiguration config) : base(config) { if (!(config is DfsClusterConfiguration)) throw new ArgumentException("Expected a DfsClusterConfiguration, got a " + config.GetType()); }
/// <summary> /// Create a cluster containing just the local machine. /// </summary> /// <param name="conf">Configuration for the local machine.</param> public HdfsClusterStatus(ClusterConfiguration conf) : base(conf) { if (!(conf is HdfsClusterConfiguration)) throw new ArgumentException("Expected an HdfsClusterConfiguration, got a " + conf.GetType()); this.config = conf as HdfsClusterConfiguration; // make a fake call to initialize the cluster on the foreground thread // HDFS does not work if initialized on the background thread. Uri uri = DfsFile.UriFromPath(this.config.JobsFolderUri, ""); this.config.DfsClient.IsFileExists(uri); // ignore result this.yarnClient = new NativeYarnClient(this.config.StatusNode, this.config.StatusNodePort, new HdfsClient(this.config.UserName)); }
/// <summary> /// Create a class to discover the information about a partitioned table given its uri. /// </summary> /// <param name="uri">Partitioned table uri.</param> /// <param name="uriType">Type of URI.</param> /// <param name="statusReporter">Delegate used to report errors.</param> /// <param name="code">Code associated to the stage (the uri does not contain the options, they may still be in the code).</param> /// <param name="config">Cluster where the job accessing the stream resides.</param> public StaticPartitionedTableInformation(ClusterConfiguration config, string uriType, string uri, string[] code, StatusReporter statusReporter) { this.Config = config; this.UriType = uriType; this.Error = ""; this.constructorArguments = new SaveConstructorArguments { // this is all we need code = code }; // really ugly, but the uri in the table does not longer contain the options; they were stripped by the DryadLINQ compiler. if (code.Length > 0) { string firstline = code[0]; firstline = firstline.Trim('[', ']'); if (firstline.StartsWith("PartitionedTable:")) firstline = firstline.Substring("PartitionedTable:".Length).Trim(); if (firstline.StartsWith(uri)) uri = firstline; // this may contain the options. } int indexoptions = uri.IndexOf("?"); if (indexoptions > 0) { this.Uri = uri.Substring(0, indexoptions); this.Options = uri.Substring(indexoptions + 1); } else { this.Uri = uri; this.Options = ""; } this.Name = Path.GetFileName(this.Uri); // default values this.PartitionCount = -1; this.partitions = new List<StaticPartitionInformation>(); this.EstimatedSize = -1; this.Header = Path.GetFileName(this.Uri); switch (uriType) { case "PartitionedFile": this.ParsePartitionedFile(statusReporter); break; } }
/// <summary> /// Create a failure diagnosis when the job info is not yet known. /// </summary> /// <param name="config">Cluster where job resides.</param> /// <param name="summary">Job summary.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) { this.cluster = config; this.Summary = summary; this.Manager = manager; this.FindJobInfo(manager); }
/// <summary> /// A file with the specified path. /// </summary> /// <param name="path">Path to the file.</param> /// <param name="client">Azure client.</param> /// <param name="config">Cluster configuration.</param> /// <param name="job">Job accessing this file.</param> /// <param name="isFolder">If true this must be a folder.</param> /// <param name="canCache">True if the file can be cached (it is immutable for sure).</param> public AzureDfsFile(ClusterConfiguration config, DryadLinqJobSummary job, AzureDfsClient client, string path, bool canCache, bool isFolder) : base(config, job) { this.client = client; this.path = path; this.ShouldCacheLocally = canCache; this.RepresentsAFolder = isFolder; this.size = -1; if (!string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) this.LocalCachePath = Path.Combine(CachedClusterResidentObject.CacheDirectory, this.path); }
/// <summary> /// A file with the specified path. /// </summary> /// <param name="path">Path to the file.</param> /// <param name="client">Azure client.</param> /// <param name="config">Cluster configuration.</param> /// <param name="job">Job accessing this file.</param> /// <param name="jobFolderUri">Uri to base folder.</param> /// <param name="isFolder">If true this must be a folder.</param> /// <param name="canCache">True if the file can be cached (it is immutable for sure).</param> public DfsFile(ClusterConfiguration config, Uri jobFolderUri, DryadLinqJobSummary job, HdfsClientBase client, string path, bool canCache, bool isFolder) : base(config, job) { this.client = client; this.Exception = null; this.baseUri = jobFolderUri; this.uri = UriFromPath(jobFolderUri, path); this.ShouldCacheLocally = canCache; this.RepresentsAFolder = isFolder; this.size = -1; Console.WriteLine("DfsFile Uri={0}", this.uri); if (!string.IsNullOrEmpty(CachedClusterResidentObject.CacheDirectory)) this.LocalCachePath = Path.Combine(CachedClusterResidentObject.CacheDirectory, PathFromUri(this.baseUri, this.uri)); }
/// <summary> /// Create a FailureDiagnosis object. /// </summary> /// <param name="job">Job being diagnosed.</param> /// <param name="plan">Static plan of the job.</param> /// <param name="manager">Communication manager.</param> protected FailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager) { this.Job = job; this.StaticPlan = plan; this.Manager = manager; this.Summary = job.Summary; this.cluster = job.ClusterConfiguration; }
private void comboBox_clusterType_SelectedIndexChanged(object sender, EventArgs e) { // initial setting for an existing configuration if (this.config != null) return; this.RemoveAddedProperties(); var type = (ClusterConfiguration.ClusterType)Enum.Parse(typeof(ClusterConfiguration.ClusterType), this.comboBox_clusterType.Text); this.config = ClusterConfiguration.CreateConfiguration(type); this.AddPropertiesToEdit(this.config.ExtractData().Properties); }
/// <summary> /// Create a class to diagnose the problems of a job. /// </summary> /// <param name="config">Cluster where job resides.</param> /// <param name="manager">Communication manager.</param> /// <param name="summary">Job summary.</param> protected JobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager) : base(config, summary, manager) { this.diagnosisLog = new DiagnosisLog(this.Job, summary); if (this.Job != null) this.jobManager = this.Job.ManagerVertex; }
/// <summary> /// Set the configuration to edit. If null a new configuration will be created. /// </summary> /// <param name="configToEdit">Configuration to edit.</param> public void SetConfigToEdit(ClusterConfiguration configToEdit) { this.config = configToEdit; this.canChangeType = configToEdit == null; this.BindProperties(); }
/// <summary> /// Diagnose a list of jobs. /// </summary> /// <param name="jobs">Jobs to diagnose.</param> /// <param name="config">Cluster configuration.</param> /// <param name="manager">Communicatino manager.</param> public static List<DiagnosisLog> DiagnoseJobs(IEnumerable<DryadLinqJobSummary> jobs, ClusterConfiguration config, CommManager manager) { var dryadLinqJobSummaries = jobs as DryadLinqJobSummary[] ?? jobs.ToArray(); int jobCount = dryadLinqJobSummaries.Count(); List<DiagnosisLog> result = new List<DiagnosisLog>(); int done = 0; foreach (DryadLinqJobSummary summary in dryadLinqJobSummaries) { if (summary == null) continue; manager.Token.ThrowIfCancellationRequested(); JobFailureDiagnosis diagnosis = JobFailureDiagnosis.CreateJobFailureDiagnosis(config, summary, manager); manager.Status("Diagnosing " + summary.ShortName(), StatusKind.LongOp); DiagnosisLog log = diagnosis.Diagnose(); result.Add(log); done++; manager.Progress(done * 100 / jobCount); } manager.Status("Diagnosis complete", StatusKind.OK); return result; }
/// <summary> /// Cache the vertices in the list; executed on the background thread. /// </summary> /// <returns>True: success.</returns> /// <param name="manager">Communication manager.</param> /// <param name="config">Cluster configuration.</param> /// <param name="summary">Job to cache.</param> /// <param name="vertices">Vertices to cache.</param> private static bool CacheAllVertices( ClusterConfiguration config, DryadLinqJobSummary summary, List<ExecutedVertexInstance> vertices, CommManager manager) { int done = 0; int todo = vertices.Count; int files = 0; manager.Status("Caching data for " + todo + " vertices", StatusKind.LongOp); foreach (ExecutedVertexInstance v in vertices) { files += CacheVertexInfo(config, summary, v); done++; manager.Progress(done / todo); } manager.Progress(100); manager.Status("Cached " + files + " files", StatusKind.OK); return true; }