Пример #1
0
 internal void Add(string machine, ExecutedVertexInstance vertex)
 {
     int index;
     if (!this.machines.ContainsKey(machine))
     {
         index = this.MachineCount;
         this.machines.Add(machine, index);
         this.machineInfo.Add(new MachineInformation(machine));
     }
     else
     {
         index = this.machines[machine];
     }
     this.machineInfo[index].AddVertex(vertex);
 }
Пример #2
0
 /// <summary>
 /// Create a class to diagnose the problems of a job.
 /// </summary>
 /// <param name="config">Cluster where job resides.</param>
 /// <param name="manager">Communication manager.</param>
 /// <param name="summary">Job summary.</param>
 protected JobFailureDiagnosis(ClusterConfiguration config, DryadLinqJobSummary summary, CommManager manager)
     : base(config, summary, manager)
 {
     this.diagnosisLog = new DiagnosisLog(this.Job, summary);
     if (this.Job != null)
         this.jobManager = this.Job.ManagerVertex;
 }
Пример #3
0
 /// <summary>
 /// Create a class to diagnose the problems of a job.
 /// </summary>
 /// <param name="job">Job to diagnose.</param>
 /// <param name="plan">Plan of the diagnosed job.</param>
 /// <param name="manager">Communication manager.</param>
 protected JobFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, CommManager manager)
     : base(job, plan, manager)
 {
     this.diagnosisLog = new DiagnosisLog(job, job.Summary);
     this.jobManager = this.Job.ManagerVertex;
 }
Пример #4
0
        /// <summary>
        /// Create a VertexFailureDiagnosis of the appropriate type.
        /// </summary>
        /// <param name="vertex">Vertex to diagnose.</param>
        /// <param name="job">Job containing the vertex.</param>
        /// <param name="manager">Communication manager.</param>
        /// <returns>A subclass of VertexFailureDiagnosis.</returns>
        /// <param name="plan">Plan of the executed job.</param>
        public static VertexFailureDiagnosis CreateVertexFailureDiagnosis(DryadLinqJobInfo job, 
            DryadJobStaticPlan plan, 
            ExecutedVertexInstance vertex,
            CommManager manager)
        {
            ClusterConfiguration config = job.ClusterConfiguration;
            if (config is CacheClusterConfiguration)
                config = (config as CacheClusterConfiguration).ActualConfig(job.Summary);

  
            throw new InvalidOperationException("Config of type " + config.TypeOfCluster + " not handled");
        }
Пример #5
0
 /// <summary>
 /// Create a class to diagnose the problems of a vertex.
 /// </summary>
 /// <param name="vertex">Vertex to diagnose.</param>
 /// <param name="job">Job containing the vertex.</param>
 /// <param name="plan">Plan of the executed job.</param>
 /// <param name="manager">Communication manager.</param>
 protected VertexFailureDiagnosis(DryadLinqJobInfo job, DryadJobStaticPlan plan, ExecutedVertexInstance vertex, CommManager manager)
     : base(job, plan, manager)
 {
     this.Job = job;
     this.Vertex = vertex;
     // ReSharper disable once DoNotCallOverridableMethodsInConstructor
     this.stackTraceFile = "dryadLinqStackTrace.txt";
 }
Пример #6
0
 /// <summary>
 /// Color representing the vertex state.
 /// </summary>
 /// <returns>A string naming a color.</returns>
 private static Color VertexStateColor(ExecutedVertexInstance.VertexState state)
 {
     switch (state)
     {
         case ExecutedVertexInstance.VertexState.Cancelled:
             return Color.Yellow;
         case ExecutedVertexInstance.VertexState.Unknown:
         case ExecutedVertexInstance.VertexState.Abandoned:
         case ExecutedVertexInstance.VertexState.Created:
             return Color.White;
         case ExecutedVertexInstance.VertexState.Started:
             return Color.Cyan;
         case ExecutedVertexInstance.VertexState.Invalidated:
             return Color.YellowGreen;
         case ExecutedVertexInstance.VertexState.Revoked:
             return Color.Brown;
         case ExecutedVertexInstance.VertexState.Successful:
             return Color.LightGreen;
         case ExecutedVertexInstance.VertexState.Failed:
             return Color.Tomato;
         default:
             throw new DryadException("Unexpected vertex state " + state);
     }
 }
Пример #7
0
        /// <summary>
        /// Cache the interesting files of this vertex.
        /// </summary>
        /// <param name="v">Vertex whose files should be cached.</param>
        /// <returns>Number of files cached.</returns>
        /// <param name="config">Cluster configuration.</param>
        /// <param name="summary">Job summary.</param>
        private static int CacheVertexInfo(ClusterConfiguration config, DryadLinqJobSummary summary, ExecutedVertexInstance v)
        {
            int cached = 0;

            IClusterResidentObject folder = config.ProcessWorkDirectory(v.ProcessIdentifier, v.VertexIsCompleted, v.Machine, summary);
            if (folder == null || folder.Exception != null)
                return 0;

            foreach (IClusterResidentObject file in folder.GetFilesAndFolders("*"))
            {
                if (file.RepresentsAFolder) continue;
                if (!Utilities.FileNameIndicatesTextFile(file.Name))
                {
                    continue;
                }
                if (!file.ShouldCacheLocally)
                {
                    continue;
                }

                ISharedStreamReader reader = file.GetStream();
                // ReSharper disable once UnusedVariable
                foreach (string line in reader.ReadAllLines())
                {
                    // discard; causes caching
                }
                cached++;
            }
            return cached;
        }
Пример #8
0
        /// <summary>
        /// Scan the JM stdout looking for the specified vertex; display the lines in the file view.
        /// Run in the background.
        /// </summary>
        /// <param name="vertex">Vertex to look for.</param>
        /// <returns>true if the information was found.</returns>
        /// <param name="logViewer">Viewer to use to display the logs.</param>
        /// <param name="stdout">Job standard output stream.</param>
        private static bool ScanJMStdout(ExecutedVertexInstance vertex, IClusterResidentObject stdout, LogViewer logViewer)
        {
            if (vertex == null || vertex.IsManager)
                return false;

            string vertexId = vertex.UniqueID;
            string name = string.Format(@"\s{0}.{1}\s", vertex.Number, vertex.Version); // the dot could match a space too.
            string regexstring = string.Format(@"vertex\s{0}\s(.*)\sv.{1}\s|", vertex.Number, vertex.Version);
            if (vertexId != "")
                regexstring += vertexId + "|";
            regexstring += name + "|"  + vertex.UniqueID;
            Regex regex = new Regex(regexstring, RegexOptions.Compiled);
            Trace.TraceInformation(regex.ToString());

            long length = stdout.Size;
            logViewer.Status("Looking for " + vertex.Name, StatusKind.LongOp);
            if (length == 0)
            {
                logViewer.Status("JM stdout is empty.", StatusKind.Error);
                logViewer.Done();
                return false;
            }

            ISharedStreamReader sr = stdout.GetStream();
            if (sr.Exception != null)
            {
                logViewer.Status("Error opening JM stdout: " + sr.Exception.Message, StatusKind.Error);
                logViewer.Done();
                return false;
            }

            try
            {
                long read = 0;
                long lines = 0;
                while (!sr.EndOfStream)
                {
                    string line = sr.ReadLine();
                    read += line.Length;
                    if (regex.IsMatch(line))
                        logViewer.AddLine(stdout.ToString(), lines, line);
                    lines++;
                    if (lines % 100 == 0 && length > 0)
                    {
                        if (logViewer.Cancelled)
                            break;
                        logViewer.UpdateProgress(Math.Min((int)(read * 100 / length), 100)); // the length can grow while the file is being read
                    }
                }
                sr.Close();
            }
            finally
            {
                logViewer.Done();
            }
            return true;
        }
Пример #9
0
        /// <summary>
        /// Scan the JM logs looking for the specified vertex; display the lines in the file view.
        /// Run in the background.
        /// </summary>
        /// <param name="vertex">Vertex to look for.</param>
        /// <returns>true if the information was found.</returns>
        /// <param name="logViewer">Viewer used to display the logs.</param>
        private bool ScanJMLogs(ExecutedVertexInstance vertex, LogViewer logViewer)
        {
            if (vertex == null || this.Job.ManagerVertex == null)
                return false;
            if (vertex == this.Job.ManagerVertex)
                return false;

            string vertexId = vertex.UniqueID;
            Regex regex = new Regex(vertexId, RegexOptions.Compiled);
            Trace.TraceInformation(regex.ToString());
            IClusterResidentObject logdir = this.Job.ManagerVertex.LogDirectory;

            if (logdir.Exception != null)
            {
                this.Status(logdir.ToString(), StatusKind.Error);
                return false;
            }

            List<IClusterResidentObject> files = logdir.GetFilesAndFolders(this.Job.ManagerVertex.LogFilesPattern).ToList();
            if (files.Count == 0)
            {
                this.Status("No log files found", StatusKind.Error);
                return false;
            }

            try
            {
                long totalWork = 0;
                foreach (var file in files)
                {
                    if (totalWork >= 0 && file.Size >= 0)
                        totalWork += file.Size;
                }

                long done = 0;
                foreach (var file in files)
                {
                    ISharedStreamReader sr = file.GetStream();
                    if (sr.Exception != null)
                    {
                        logViewer.Status("Error opening file: " + sr.Exception.Message, StatusKind.Error);
                        continue;
                    }
                    logViewer.Status("Scanning " + file, StatusKind.LongOp);
                    long lineno = 0;
                    while (!sr.EndOfStream)
                    {
                        if (logViewer.Cancelled)
                            break;
                        string line = sr.ReadLine();
                        done += line.Length;
                        if (regex.IsMatch(line))
                        {
                            logViewer.AddLine(file.Name, lineno, line);
                        }
                        lineno++;
                        logViewer.UpdateProgress((int)(100 * done / totalWork));
                    }
                    sr.Close();
                    if (logViewer.Cancelled)
                        break;
                }
            }
            finally
            {
                logViewer.Done();
            }

            return true;
        }
Пример #10
0
        /// <summary>
        /// Display information about a selected vertex in the vertex view panes.
        /// </summary>
        /// <param name="executedVertexInstance">Vertex to display.</param>
        private void DisplayVertex(ExecutedVertexInstance executedVertexInstance)
        {
            this.currentVertex = executedVertexInstance;
            this.vertexHeaderData.Clear();
            this.label_Vertex.BackColor = this.defaultBackColor;

            if (executedVertexInstance != null)
            {
                if (this.currentVertex.IsManager)
                {
                    if (!this.comboBox_vertexInformation.Items.Contains("XML Plan"))
                        this.comboBox_vertexInformation.Items.Add("XML Plan");
                    if (!this.comboBox_vertexInformation.Items.Contains("Job log"))
                        this.comboBox_vertexInformation.Items.Add("Job log");
                    if (this.comboBox_vertexInformation.Items.Contains("stdout"))
                        this.comboBox_vertexInformation.Items.Remove("stdout");
                }
                else
                {
                    if (this.comboBox_vertexInformation.Items.Contains("XML Plan"))
                        this.comboBox_vertexInformation.Items.Remove("XML Plan");
                    if (this.comboBox_vertexInformation.Items.Contains("Job log"))
                        this.comboBox_vertexInformation.Items.Remove("Job log");
                    if (!this.comboBox_vertexInformation.Items.Contains("stdout"))
                        this.comboBox_vertexInformation.Items.Add("stdout");
                }

                this.Status("Loading vertex data...", StatusKind.LongOp);
                this.textBox_find.Enabled = true;
                this.vertexPropertyEnumerator.Data = executedVertexInstance;
                this.vertexPropertyEnumerator.PopulateWithProperties(this.vertexHeaderData);
                this.label_Vertex.Text = "Vertex: " + executedVertexInstance.Name;
                this.comboBox_vertexInformation.Enabled = true;
                this.label_Vertex.BackColor = VertexStateColor(executedVertexInstance.State);

                this.vertexToolStripMenuItem.Enabled = false; //true;
            }
            else
            {
                this.vertexToolStripMenuItem.Enabled = false;

                this.textBox_find.Enabled = false; 
                this.label_Vertex.Text = "Vertex";
                this.comboBox_vertexInformation.Enabled = false;
            }
            this.vertexHeaderData.ResetBindings();
            this.ChooseVertexInformation();
            this.dataGridView_vertexHeader.ClearSelection();
            this.Status("OK", StatusKind.OK);
        }
Пример #11
0
        /// <summary>
        /// Move the selection to the specified vertex.
        /// </summary>
        /// <param name="vertex">Vertex to select.</param>
        private void SelectVertex(ExecutedVertexInstance vertex)
        {
            if (vertex != null)
            {
                string stageName = vertex.StageName;
                DryadLinqJobStage executedstage = this.Job.GetStage(stageName);
                if (executedstage != null && this.currentStage != executedstage && this.currentStage.Name != "All vertices")
                    this.SetStage(executedstage);

                // let us move selection to this vertex
                for (int i = 0; i < this.dataGridView_stageContents.Rows.Count; i++)
                {
                    DataGridViewRow row = this.dataGridView_stageContents.Rows[i];
                    if (row.DataBoundItem == vertex)
                    {
                        row.Selected = true;
                        this.dataGridView_stageContents.FirstDisplayedScrollingRowIndex = i;
                        break;
                    }
                }
            }
            else
            {
                this.SetNoStageOrTable("", false);
            }
        }
Пример #12
0
        /// <summary>
        /// Parse one line from the JM standard output.
        /// </summary>
        /// <param name="line">The line to parse.</param>
        private void ParseStdoutLine(string line)
        {
            DateTime lineTimeStamp = DateTime.MinValue;

            if (line.Contains("Created process execution record"))
            {
                Match m = vertexCreatedRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    // Created process execution record for vertex (\d+) \((.*)\) v.(\d+) GUID \{?([-A-F0-9]+)\}?
                    int number = Int32.Parse(m.Groups[1].Value);
                    string name = m.Groups[2].Value;
                    int version = Int32.Parse(m.Groups[3].Value);
                    string guid = m.Groups[4].Value; // on some platforms, e.g. HPC, this identifier is not yet assigned properly

                    // the vertex may be already there, sometimes numbers are reused...
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi == null)
                    {
                        vi = new ExecutedVertexInstance(this, number, version, name, guid, lineTimeStamp);
                        this.jobVertices.Add(vi);
                    }
                }
                else
                {
                    m = verticesCreatedRegex.Match(line);
                    if (m.Success)
                    {
                        lineTimeStamp = ParseLineTimestamp(line);

                        // Created process execution record for vertices (.*) v.(\d+) GUID \{?([-A-F0-9]+)\}?
                        // Created process execution record for vertices 192 (Merge__41[0]) 223 (Union__45[0]) v.0 GUID {0297A91C-FFEA-42EA-94AF-CD0163A04D45}
                        int version = Int32.Parse(m.Groups[2].Value);
                        string vertices = m.Groups[1].Value;
                        string guid = m.Groups[3].Value; // on some platforms, e.g. HPC, this identifier is not yet assigned properly

                        IEnumerable<Tuple<string, int>> vertexList = DryadLinqJobInfo.ParseVertices(vertices);
                        foreach (var p in vertexList)
                        {
                            int number = p.Item2;
                            ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                            if (vi == null)
                            {
                                vi = new ExecutedVertexInstance(this, number, version, p.Item1, guid, lineTimeStamp);
                                this.jobVertices.Add(vi);
                            }
                        }
                    }
                }
            }
            else if (line.StartsWith("Creating process"))
            {
                Match m = processCreatingRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    // Creating process for vertex (\d+) \((.*)\\) v.(\d+) GUID \{?([-A-F0-9]+)\}? machine (\w+)
                    int number = Int32.Parse(m.Groups[1].Value);
                    //string name = m.Groups[2].Value;
                    int version = Int32.Parse(m.Groups[3].Value);
                    string guid = m.Groups[4].Value; 

                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi != null)
                    {
                        this.jobVertices.Remap(vi, guid);
                    }
                }
            }
            else if (line.StartsWith("Process was revoked"))
            {
                Match m = revokedRegex.Match(line);
                if (m.Success)
                {
                    string oldGuid = m.Groups[1].Value;
                    ExecutedVertexInstance vi = this.jobVertices.FindVertexByGuid(oldGuid);
                    if (vi != null)
                    {
                        vi.SetState(ExecutedVertexInstance.VertexState.Revoked);
                        string newGuid = m.Groups[2].Value;
                        this.jobVertices.Remap(vi, newGuid);
                    }
                    else
                    {
                        Trace.TraceInformation("Could not find revoked vertex with guid " + oldGuid);
                    }
                }
            }
            else if (line.StartsWith("---HiPriTime"))
            {
                // Scope-specific line which we use to get the i/o information
                // ---HiPriTime D7D51A1F-6693-4378-95FD-FC778A67C632,F52CA694-0202-411E-85E9-0C883E770A0E,SV4_Extract_Split[0],Completed,ch1sch010331112,2011-05-03 15:26:01.681 PDT,2011-05-03 15:26:01.696 PDT,2011-05-03 15:26:02.118 PDT,2011-05-03 15:26:04.286 PDT,2011-05-03 15:26:07.656 PDT,2011-05-03 15:26:01.696 PDT,97390825,1498630
                string info = line.Substring(13);
                string[] parts = info.Split(',');
                if (parts.Length >= 13)
                {
                    long read = long.Parse(parts[11]);
                    long written = long.Parse(parts[12]);
                    string guid = parts[1];

                    ExecutedVertexInstance vi = this.jobVertices.FindVertexByGuid(guid);
                    if (vi != null)
                    {
                        vi.DataRead = read;
                        vi.DataWritten = written;
                        this.TotalDataRead += read;
                    }
                }
            }
            else if (line.Contains("Io information"))
            {
                // HPC-specific line
                Match m = ioRegex.Match(line);
                if (m.Success)
                {
                    int number = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi != null)
                    {
                        vi.DataRead = long.Parse(m.Groups[4].Value);
                        vi.DataWritten = long.Parse(m.Groups[5].Value);
                        this.TotalDataRead += vi.DataRead;
                    }
                }
            }
            else if (line.Contains("Process started"))
            {
                //those vertices which are being canceled may not be here
                Match m = vertexStartRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    string version = m.Groups[3].Value;
                    string guid = m.Groups[4].Value;
                    string pid = this.ClusterConfiguration.ExtractPidFromGuid(guid, this.Summary);
                    DryadProcessIdentifier identifier = new DryadProcessIdentifier(pid);
                    string machine = m.Groups[5].Value;

                    // Process started for vertex 4 (Super__0[0]) v.0 GUID {9DDD0B00-C93F-46D2-9073-1CFD27829300} machine sherwood-255
                    // Process started for vertices 23 (Merge__29) 24 (Apply__33) v.0 GUID {E945DC5D-9AF6-4732-8770-2A6BF7FA3041} machine sherwood-237

                    string vertices = m.Groups[2].Value;
                    // This is a list of (number \(name\))* pairs
                    // we will assume that the parantheses are matched, or we can't do much

                    bool onevertex;
                    if (m.Groups[1].Value == "ex")  // one vertEX
                        onevertex = true;
                    else if (m.Groups[1].Value == "ices")
                        onevertex = false;
                    else
                        throw new DryadException("Can't figure out if one or many vertices");

                    IEnumerable<Tuple<string, int>> vertexList = DryadLinqJobInfo.ParseVertices(vertices);

                    int vertexcount = 0;
                    int iversion = int.Parse(version);

                    if (lineTimeStamp > this.lastTimestampSeen)
                        this.lastTimestampSeen = lineTimeStamp;
                    foreach (var p in vertexList)
                    {
                        int number = p.Item2;
                        ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, iversion);
                        //new ExecutedVertexInstance(this, number, version, name, identifier, machine, this.lastTimestampSeen);
                        if (vi == null)
                            Trace.TraceInformation("Could not find information for vertex {0}.{1}", number, version);
                        else
                            vi.SetStartInformation(this, machine, this.lastTimestampSeen, identifier, guid);
                        vertexcount++;
                    }

                    if (vertexcount > 1 && onevertex)
                        throw new DryadException("Expected one vertex, found " + vertexcount);
                }
                else
                {
                    Trace.TraceInformation("Unexpected parsing error on line {0}", line);
                }
            }
            else if (line.Contains("Abandoning"))
            {
                Match m = vertexAbandonedRegex.Match(line);
                if (m.Success)
                {
                    int number = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi != null)
                        vi.SetState(ExecutedVertexInstance.VertexState.Abandoned);
                }
            }
            else if (line.Contains("Setting"))
            {
                Match m = setToFailedlRegex.Match(line);
                if (m.Success)
                {
                    // Setting vertex 1461.0 (Merge__13[258]) to failed
                    // Setting vertex (\d+)\.(\d+) \((.+)\) to failed(.*)
                    int number = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);

                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi != null)
                    {
                        vi.SetState(ExecutedVertexInstance.VertexState.Failed);
                        //vi.ErrorString = m.Groups[4].Value;
                    }
                }
            }
            else if (line.Contains("Process was terminated"))
            {
                // terminatedRegex = new Regex(@"Process was terminated Vertex (\d+)\.(\d+) \((.+)\) GUID \{?([-A-F0-9]+)\}? machine (\S+) status (.*)",
                // Process was terminated Vertex 11.0 (Select__6[1]) GUID {C1E35A88-F5AD-4A26-BE5F-46B6D515623F} machine sherwood-118 status The operation succeeded
                Match m = terminatedRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    int number = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);

                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (vi != null)
                    {
                        // sometimes successful processes are terminated, because they don't report quickly enough being done
                        if (vi.State != ExecutedVertexInstance.VertexState.Successful)
                        {
                            vi.SetState(ExecutedVertexInstance.VertexState.Cancelled);
                        }
                        vi.ErrorString = m.Groups[6].Value;
                        if (lineTimeStamp != DateTime.MinValue)
                            vi.End = lineTimeStamp;
                    }
                }
            }
            else if (line.Contains("Timing Information Graph Start Time"))
            {
                // Cosmos-specific line
                // Timing Information Graph Start Time 128654556581866096
                Match m = Regex.Match(line, @"Timing Information Graph Start Time (\d+)");
                DateTime createTime = Utilities.Convert64time(ClusterConfiguration.GetClusterTimeZone(this.Summary), m.Groups[1].Value);
                this.ManagerVertex.SetStartInformation(this, this.Summary.Machine, createTime, this.Summary.ManagerProcessGuid, "");
                this.ManagerVertex.StartCommandTime = this.ManagerVertex.CreationTime = this.ManagerVertex.VertexScheduleTime = createTime;
                this.lastTimestampSeen = createTime;
            }
            else if (line.StartsWith("Start time: "))
            {
                // HPC L2H specific line
                // Start time: 04/05/2011 17:25:42.223
                DateTime createTime;
                bool parse = DateTime.TryParse(line.Substring("Start time: ".Length), out createTime);

                if (parse)
                {
                    this.ManagerVertex.SetStartInformation(this, this.Summary.Machine, createTime, this.Summary.ManagerProcessGuid, "");
                    this.ManagerVertex.StartCommandTime = this.ManagerVertex.CreationTime = this.ManagerVertex.VertexScheduleTime = createTime;
                    this.lastTimestampSeen = createTime;
                }
            }
            else if (line.Contains("JM Finish time:"))
            {
                // Cosmos-specific line
                // JM Finish time: 129140295499437263 2010-03-25T22:25:49.943726Z
                Match m = Regex.Match(line, @"JM Finish time: (\d+)");
                DateTime time = Utilities.Convert64time(ClusterConfiguration.GetClusterTimeZone(this.Summary), m.Groups[1].Value);
                this.lastTimestampSeen = time;
                this.ManagerVertex.End = time;
            }
            else if (line.StartsWith("Stop time "))
            {
                // HPC L2H specific line
                // Stop time (Exit code = 2148734208): 04/05/2011 17:25:46.614
                Regex regex = new Regex(@"Stop time \(Exit code = (.*)\): (.*)");
                Match m = regex.Match(line);
                if (m.Success)
                {
                    this.ManagerStdoutIncomplete = false;

                    DateTime time;
                    bool parse = DateTime.TryParse(m.Groups[2].Value, out time);
                    if (parse)
                    {
                        this.lastTimestampSeen = time;
                        this.ManagerVertex.End = time;
                    }

                    this.ErrorCode = m.Groups[1].Value;
                    if (this.ErrorCode == "0")
                    {
                        this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Successful);
                    }
                    else
                    {
                        this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Failed);
                    }
                }
            }
            else if (line.Contains("Timing Information"))
            {
                // Timing Information 4 1 Super__0[0] 128654556603428182 0.0000 0.0000 0.0000 0.0000 0.2500 
                Match m = timingInfoRegex.Match(line);
                if (m.Success)
                {
                    int vertex = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    DateTime createtime = Utilities.Convert64time(ClusterConfiguration.GetClusterTimeZone(this.Summary), m.Groups[4].Value);
                    ExecutedVertexInstance vi = jobVertices.FindVertex(vertex, version);
                    if (vi == null)
                        return; // we do not keep track of vertices with duplicate scheduling, so these won't show up here

                    if (vi.State == ExecutedVertexInstance.VertexState.Started)
                    {
                        Console.WriteLine("Timing information while vertex is still running " + vi);
                        //throw new ClusterException("Timing information for vertex still running: " + vi);
                    }
                    DateTime last = vi.SetTiming(createtime, m.Groups[5].Value, m.Groups[6].Value, m.Groups[7].Value, m.Groups[8].Value, m.Groups[9].Value);
                    if (last > this.lastTimestampSeen)
                        this.lastTimestampSeen = last;
                    this.ManagerVertex.MarkVertexWasRunning(last);

                    try
                    {
                        if (vi.State == ExecutedVertexInstance.VertexState.Successful)
                            this.UsefulCPUTime += vi.RunningTime;
                        else if (vi.RunningTime > TimeSpan.Zero)
                            this.WastedCPUTime += vi.RunningTime;
                    }
                    catch (Exception ex)
                    {
                        Console.WriteLine("Time value exception: " + ex.Message);
                    }
                }
                else
                    throw new DryadException("Unmatched timing information line " + line);
            }
            else if (line.Contains("Process has failed"))
            {
                // Process has failed Vertex 11.0 (Select__6[1]) GUID {C1E35A88-F5AD-4A26-BE5F-46B6D515623F} machine sherwood-118 Exitcode 0 status The operation succeeded
                // failedRegex = new Regex(@"Process has failed Vertex (\d+)\.(\d+) \((.+)\) GUID \{?([-A-F0-9]+)\}? machine (\S+) Exitcode (.*)",
                Match m = failedRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    int vertex = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    string exitcode = m.Groups[6].Value;
                    //string status = m.Groups[7].Value;
                    ExecutedVertexInstance vi = jobVertices.FindVertex(vertex, version);
                    if (vi != null)
                    {
                        vi.SetState(ExecutedVertexInstance.VertexState.Failed);
                        vi.ExitCode = exitcode;
                        if (lineTimeStamp != DateTime.MinValue)
                            vi.End = lineTimeStamp;
                        //vi.ErrorString = status;
                    }
                }
            }
            else if (line.Contains("ABORTING:"))
            {
                this.AbortingMsg = line.Substring(10);
                this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Failed);
            }
            else if (line.Contains("Accurate read data"))
            {
                Match m = datareadRegex.Match(line);
                if (m.Success)
                {
                    this.TotalDataRead = long.Parse(m.Groups[1].Value);
                    this.LocalReadData = long.Parse(m.Groups[2].Value);
                    this.IntraPodDataRead = long.Parse(m.Groups[3].Value);
                    this.CrossPodDataRead = long.Parse(m.Groups[4].Value);
                }
            }
            else if (line.Contains("<ErrorString>"))
            {
                //some errors contains "Error returned from managed runtime invocation"
                //which shows the error is from application code
                Match m = Regex.Match(line, @"\<ErrorString\>(.*)\</ErrorString\>");
                if (m.Success && lastFailedVertex != null)
                {
                    lastFailedVertex.AddErrorString(System.Web.HttpUtility.HtmlDecode(m.Groups[1].Value));
                }
            }
            else if (line.Contains("Canceling"))
            {
                // Canceling vertex 1461.0 (Merge__13[258]) due to dependent failure
                Match m = cancelRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    int vertex = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    string name = m.Groups[3].Value;

                    ExecutedVertexInstance vi = jobVertices.FindVertex(vertex, version);
                    if (vi != null)
                    {
                        if (vi.State == ExecutedVertexInstance.VertexState.Successful)
                            vi.SetState(ExecutedVertexInstance.VertexState.Invalidated);
                        else 
                            vi.SetState(ExecutedVertexInstance.VertexState.Cancelled);
                        if (lineTimeStamp != DateTime.MinValue)
                            vi.End = lineTimeStamp;
                    }
                    else
                    {
                        // TODO: this should not be needed, but this is a workaround for a bug in the HPC L2H software
                        vi = new ExecutedVertexInstance(this, vertex, version, name, "", lineTimeStamp);
                        vi.SetState(ExecutedVertexInstance.VertexState.Cancelled);
                        this.jobVertices.Add(vi);
                    }
                    // Process wasn't even started, so there is nothing to cancel
                }
            }
            else if (line.Contains("Application"))
            {
                //the job ends successfully
                Regex endSuccessRegex = new Regex(@"Application completed successfully.");
                //the job failed
                Regex endFailRegex = new Regex(@"Application failed with error code (.*)");

                Match m1 = endFailRegex.Match(line);

                if (m1.Success)
                {
                    this.ErrorCode = m1.Groups[1].Value;
                    this.ManagerStdoutIncomplete = false;
                    this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Failed);
                }
                else
                {
                    Match m2 = endSuccessRegex.Match(line);
                    if (m2.Success)
                    {
                        this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Successful);
                        this.ManagerStdoutIncomplete = false;
                    }
                }
            }
            else if (line.StartsWith("Input"))
            {
                // Input vertex %u (%s) had %u read failure%s\n
                Match m = inputFailureRegex.Match(line);
                if (m.Success)
                {
                    this.AbortingMsg = line;
                }
            }
            else if (line.Contains("Vertex"))
            {
                // terminationRegex = new Regex(@"Vertex (\d+)\.(\d+) \((.+)\) machine (\S+) guid \{?([-0-9A-F]+)\}? status (.*)"
                Match m = terminationRegex.Match(line);
                if (m.Success)
                {
                    lineTimeStamp = ParseLineTimestamp(line);

                    int vertex = Int32.Parse(m.Groups[1].Value);
                    int version = Int32.Parse(m.Groups[2].Value);
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(vertex, version);
                    if (vi == null)
                    {
                        Trace.TraceInformation("Could not find vertex {0}.{1} line {2}", vertex, version, line);
                    }
                    else
                    {
                        bool failed = vi.SetTermination(m.Groups[6].Value, lineTimeStamp);
                        if (failed)
                            this.lastFailedVertex = vi;
                    }
                }
            }

            if (lineTimeStamp != DateTime.MinValue)
                this.lastTimestampSeen = lineTimeStamp;
        }
Пример #13
0
        /// <summary>
        /// New JM stdout parsing code, for YARN-based DryadLINQ.
        /// </summary>
        /// <param name="line">Line to parse.</param>
        /// <returns>False if the line terminated in a quoted string and has to be combined with the next line.</returns>
        private bool ParseStdoutLineNew(string line)
        {
            if (string.IsNullOrWhiteSpace(line)) return true;

            Dictionary<string, string> kvp = Utilities.ParseCSVKVP(line);
            if (kvp == null) return false;

            var strTs = kvp["logtimelocal"];
            int cutOff = strTs.IndexOf("UTC");
            if (cutOff >= 0)
            {
                strTs = strTs.Substring(0, cutOff);
            }
            DateTime timeStamp = DateTime.Parse(strTs, CultureInfo.InvariantCulture);
            timeStamp = timeStamp.ToLocalTime();
            this.lastTimestampSeen = timeStamp;

            if (kvp.ContainsKey("job"))
            {
                string operation = kvp["job"];
                switch (operation)
                {
                    case "start":
                        this.ManagerVertex.SetStartInformation(this, this.Summary.Machine, timeStamp, this.Summary.ManagerProcessGuid, "");
                        this.ManagerVertex.StartCommandTime = this.ManagerVertex.CreationTime = this.ManagerVertex.VertexScheduleTime = timeStamp;
                        break;
                    case "stop":
                        this.ManagerVertex.End = timeStamp;
                        string exitcode;

                        if (kvp.TryGetValue("exitcode", out exitcode))
                        {
                            this.ErrorCode = exitcode;
                            int numCode = Convert.ToInt32(exitcode, 16);
                            if (numCode == 0)
                            {
                                this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Successful);
                            }
                            else
                            {
                                this.ManagerVertex.SetState(ExecutedVertexInstance.VertexState.Failed);
                            }
                        }

                        string errorstring;
                        if (kvp.TryGetValue("errorstring", out errorstring))
                        {
                            this.ManagerVertex.AddErrorString(errorstring);
                            this.AbortingMsg = errorstring;
                        }

                        break;
                }
            }
            else if (kvp.ContainsKey("vertex"))
            {
                string vertex = kvp["vertex"];
                int number;
                int version;

                int dot = vertex.IndexOf('.');
                if (dot < 0)
                {
                    number = int.Parse(vertex);
                    version = int.Parse(kvp["version"]);
                }
                else
                {
                    number = int.Parse(vertex.Substring(0, dot));
                    version = int.Parse(vertex.Substring(dot + 1));
                }

                if (kvp.ContainsKey("transition"))
                {
                    string transition = kvp["transition"];
                    switch (transition)
                    {
                        case "created":
                        {
                            string name = kvp["name"];
                            ExecutedVertexInstance vi = new ExecutedVertexInstance(this, number, version, name, "", timeStamp);
                            this.jobVertices.Add(vi);
                        }
                        break;
                        case "starting":
                        {
                            // not doing anything
                            break;
                        }
                        case "running":
                        {
                            string process;
                            kvp.TryGetValue("id", out process);
                            if (process == null)
                                kvp.TryGetValue("process", out process);
                            string machine = kvp["computer"];
                            ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                            this.jobVertices.Remap(vi, process);
                            string pid = this.ClusterConfiguration.ExtractPidFromGuid(process, this.Summary);
                            DryadProcessIdentifier identifier = new DryadProcessIdentifier(pid);
                            vi.SetStartInformation(this, machine, timeStamp, identifier, process);
                        }
                        break;
                        case "completed":
                        {
                            ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                            vi.SetState(ExecutedVertexInstance.VertexState.Successful);
                            vi.End = timeStamp;
                            vi.ExitCode = "";
                            this.UsefulCPUTime += vi.RunningTime;
                            break;
                        }
                        case "failed":
                        {
                            ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                            if (vi.State != ExecutedVertexInstance.VertexState.Started)
                                vi.SetState(ExecutedVertexInstance.VertexState.Cancelled);
                            else
                            {
                                vi.SetState(ExecutedVertexInstance.VertexState.Failed);
                                if (vi.RunningTime > TimeSpan.Zero)
                                    this.WastedCPUTime += vi.RunningTime;
                            }
                            if (kvp.ContainsKey("errorstring"))
                                vi.AddErrorString(kvp["errorstring"]);
                            string exitcode;
                            if (kvp.TryGetValue("errorcode", out exitcode))
                                vi.ExitCode = exitcode;
                            vi.End = timeStamp;
                            break;
                        }
                    }
                }
                else if (kvp.ContainsKey("outputChannel"))
                {
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (kvp.ContainsKey("errorstring"))
                        vi.AddErrorString(kvp["errorstring"]);
                }
                else if (kvp.ContainsKey("inputChannel"))
                {
                    ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                    if (kvp.ContainsKey("errorstring"))
                        vi.AddErrorString(kvp["errorstring"]);
                }
                else if (kvp.ContainsKey("io"))
                {
                    if (kvp["io"] == "starting")
                    {
                        ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);
                        int numberOfInputs = (int) TryGetNumeric(kvp, "numberOfInputs");
                        int numberOfOutputs = (int)TryGetNumeric(kvp, "numberOfOutputs");

                        if (vi.InputChannels == null)
                            vi.InputChannels = new Dictionary<int, ChannelEndpointDescription>();

                        for (int i = 0; i < numberOfInputs; i++)
                        {
                            string uri;
                            if (kvp.TryGetValue("uriIn." + i, out uri))
                            {
                                var ched = new ChannelEndpointDescription(false, i, uri, 0);
                                vi.InputChannels[i] = ched;
                            }
                        }

                        if (vi.OutputChannels == null)
                            vi.OutputChannels = new Dictionary<int, ChannelEndpointDescription>();
                        for (int i = 0; i < numberOfOutputs; i++)
                        {
                            string uri;
                            if (kvp.TryGetValue("uriOut." + i, out uri))
                            {
                                var ched = new ChannelEndpointDescription(false, i, uri, 0);
                                vi.OutputChannels[i] = ched;
                            }
                        }
                    }
                    else if (kvp["io"] == "total")
                    {
                        ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);

                        long totalRead = TryGetNumeric(kvp, "totalRead");
                        long tempRead = TryGetNumeric(kvp, "tempRead");
                        long tempReadInRack = TryGetNumeric(kvp, "tempReadInRack");
                        long tempReadCrossRack = TryGetNumeric(kvp, "tempReadCrossRack");
                        long localRead = TryGetNumeric(kvp, "localRead");
                        long totalWritten = TryGetNumeric(kvp, "totalWritten");

                        vi.DataRead = totalRead;
                        vi.DataWritten = totalWritten;

                        if (vi.InputChannels != null)
                        {
                            foreach (int ch in vi.InputChannels.Keys)
                            {
                                long bytes = TryGetNumeric(kvp, "rb." + ch);
                                vi.InputChannels[ch].Size = bytes;
                            }
                        }

                        if (vi.OutputChannels != null)
                        {
                            foreach (int ch in vi.OutputChannels.Keys)
                            {
                                long bytes = TryGetNumeric(kvp, "wb." + ch);
                                vi.OutputChannels[ch].Size = bytes;
                            }
                        }

                        this.TotalDataRead += totalRead;
                        this.LocalReadData += localRead;
                        this.CrossPodDataRead += tempReadCrossRack;
                        this.IntraPodDataRead += tempReadInRack;
                    }
                    else if (kvp["io"] == "running")
                    {
                        ExecutedVertexInstance vi = this.jobVertices.FindVertex(number, version);

                        if (vi.InputChannels != null)
                        {
                            foreach (int ch in vi.InputChannels.Keys)
                            {
                                long bytes = TryGetNumeric(kvp, "rb." + ch);
                                vi.InputChannels[ch].Size = bytes;

                                bytes = TryGetNumeric(kvp, "tb." + ch);
                                vi.InputChannels[ch].TotalSize = bytes;
                            }
                        }

                        if (vi.InputChannels != null)
                        {
                            foreach (int ch in vi.OutputChannels.Keys)
                            {
                                long bytes = TryGetNumeric(kvp, "wb." + ch);
                                vi.OutputChannels[ch].Size = bytes;
                            }
                        }

                        long totalRead = TryGetNumeric(kvp, "totalRead");
                        long totalWritten = TryGetNumeric(kvp, "totalWritten");

                        vi.DataRead = totalRead;
                        vi.DataWritten = totalWritten;
                    }
                }
            }
            return true;
        }
Пример #14
0
 /// <summary>
 /// A vertex has received a new guid.
 /// </summary>
 /// <param name="vi">Executed vertex instance.</param>
 /// <param name="newGuid">New guid.</param>
 internal void Remap(ExecutedVertexInstance vi, string newGuid)
 {
     if (!this.vertexByGuid.ContainsKey(newGuid))
         this.vertexByGuid.Add(newGuid, vi);
 }
Пример #15
0
        /// <summary>
        /// Add a new vertex to this job.
        /// </summary>
        /// <param name="vi">Vertex description to add.</param>
        /// <returns>Stage name that the vertex belongs to.</returns>
        public void Add(ExecutedVertexInstance vi)
        {
            int id = vi.Number;
            List<ExecutedVertexInstance> l;

            if (vertices.ContainsKey(id))
                l = vertices[id];
            else
            {
                l = new List<ExecutedVertexInstance>();
                vertices.Add(id, l);
            }
            l.Add(vi);
            this.count++;
            string stage = vi.StageName;
            List<ExecutedVertexInstance> members;
            if (this.jobStages.ContainsKey(stage))
                members = this.jobStages[stage];
            else
            {
                members = new List<ExecutedVertexInstance>();
                this.jobStages.Add(stage, members);
            }
            members.Add(vi);

            if (!this.vertexByGuid.ContainsKey(vi.UniqueID))
                this.vertexByGuid.Add(vi.UniqueID, vi);

            this.jobStages["All vertices"].Add(vi);
        }
Пример #16
0
 public void AddVertex(ExecutedVertexInstance e)
 {
     this.vertices.Add(e);
 }