/// <summary> /// Internal function that analyzes the tag text and identifies the attributes of the tag. /// </summary> private void ParseAttributes() { int indexBegin, length, nameIndexBegin, nameLength, valueIndexBegin, valueLength; indexBegin = length = nameIndexBegin = nameLength = valueIndexBegin = valueLength = 0; foreach (Match m in MarkupRegexHelper.ParseTagAttributes(Text)) { if (m.Length > 0 && m.Groups.Count >= 3 && m.Groups[1].Length > 0 && m.Groups[2].Length > 0) { indexBegin = m.Index; length = m.Length; nameIndexBegin = m.Groups[1].Index; nameLength = m.Groups[1].Length; valueIndexBegin = m.Groups[2].Index; valueLength = m.Groups[2].Length; indexBegin += tagIndexBegin; nameIndexBegin += tagIndexBegin; valueIndexBegin += tagIndexBegin; attributes.Add(new MarkupAttribute(this, indexBegin, length, nameIndexBegin, nameLength, valueIndexBegin, valueLength)); } } }
/// <summary> /// This is the internal function that does all the work in parsing the markup text and constructing the document objects. /// </summary> private void ParseMarkup() { #if DEBUG Stopwatch sw1, sw2, sw3, sw4, sw5, sw6, sw7; TimeSpan totalElapsed; #endif #region Create initial tag objects #if DEBUG sw1 = Stopwatch.StartNew(); #endif Match n = null; // parse the text into markup tag objects var tags = new List <MarkupTag>(); foreach (Match m in MarkupRegexHelper.ParseMarkupTags(text)) { n = MarkupRegexHelper.GetMarkupTagName(m.Value); if (n != null && n.Groups.Count >= 1) // this would indicate an empty tag (<>) { tags.Add(new MarkupTag(this, m.Index, m.Length, m.Index + n.Groups[1].Index, n.Groups[1].Length)); } } #if DEBUG sw1.Stop(); #endif #endregion #region Correct faulty inline tags #if DEBUG sw2 = Stopwatch.StartNew(); #endif int tagCount = tags.Count - 1; MarkupTag ti = null, tj = null; var usedClosingTags = new List <int>(); if (fixBadlyFormedInlineTags) { // handle faulty inline tags bool closingTagFound; for (int i = 0; i <= tagCount; i++) { closingTagFound = false; // zip through tags that we aren't interested in do { ti = tags[i]; if (ti.Inline || ti.Comment || ti.IsClosingTag) { i++; } else { break; } } while (i <= tagCount); // if this was the last tag and it was a closing tag stop here if (i > tagCount && ti.IsClosingTag) { break; } for (int j = i + 1; j <= tagCount; j++) { // zip through tags that we aren't interested in; we want to find a closing tag that hasn't been used do { tj = tags[j]; if (tj.IsClosingTag && !usedClosingTags.Contains(j)) { break; } else { j++; } } while (j <= tagCount); if (tj.Tag.Equals("/" + ti.Tag, stringComparison)) { usedClosingTags.Add(j); closingTagFound = true; // skip ahead one if we have a closing tag immediately after it's opening tag if (j == i + 1) { i++; } break; } } // no closing tag was found, so we'll mark the tag as an inline tag if (!closingTagFound) { ti.Inline = true; } } usedClosingTags.Clear(); //usedClosingTags = null; //ti = null; //tj = null; } #if DEBUG sw2.Stop(); #endif #endregion #region Construct document hierarchy #if DEBUG sw3 = Stopwatch.StartNew(); #endif // now process the tag objects to form the document hierarchy var tagQueue = new Queue <MarkupTag>(tags); var tagStack = new Stack <MarkupTag>(1); var processedTags = new List <MarkupTag>(tagCount + 1); MarkupTag currentTag = null, parentTag = null; var closedTagIndices = new List <int>(tagCount + 1); int nestingLevel = 0; while (tagQueue.Count > 0) { currentTag = tagQueue.Dequeue(); if (tagStack.Count > 0) { parentTag = tagStack.Pop(); } if (currentTag.Inline || currentTag.Comment) // inline and comment tags { currentTag.Parent = parentTag; currentTag.NestingLevel = nestingLevel; processedTags.Add(currentTag); if (parentTag != null) { parentTag.Children.Add(currentTag); tagStack.Push(parentTag); } else { rootTags.Add(currentTag); } } else if (currentTag.IsClosingTag) // normal close tag { // find the corresponding opening tag and use it's parent for the parent of this closing tag for (int i = processedTags.Count - 1; i >= 0; i--) { while (processedTags[i].Inline || processedTags[i].Comment || processedTags[i].IsClosingTag) { i--; } if (currentTag.Tag.Equals("/" + processedTags[i].Tag, stringComparison) && !closedTagIndices.Contains(i)) { parentTag = processedTags[i].Parent; closedTagIndices.Add(i); break; } } if (parentTag != null) { nestingLevel = parentTag.NestingLevel + 1; } else { nestingLevel = 0; } currentTag.NestingLevel = nestingLevel; // parentTag = parentTag.Parent; if (parentTag == null) // end of processing! { rootTags.Add(currentTag); break; } currentTag.Parent = parentTag; parentTag.Children.Add(currentTag); processedTags.Add(currentTag); tagStack.Push(parentTag); } else // normal open tag { currentTag.NestingLevel = nestingLevel; nestingLevel++; currentTag.Parent = parentTag; processedTags.Add(currentTag); if (parentTag != null) { parentTag.Children.Add(currentTag); } else { rootTags.Add(currentTag); } tagStack.Push(currentTag); } } // these items are no longer needed processedTags.Clear(); processedTags = null; tagStack.Clear(); tagStack = null; tagQueue.Clear(); tagQueue = null; closedTagIndices.Clear(); closedTagIndices = null; currentTag = null; parentTag = null; #if DEBUG sw3.Stop(); #endif #endregion #region Associate normal opening tags with their corresponding closing tags #if DEBUG sw4 = Stopwatch.StartNew(); #endif usedClosingTags = new List <int>(); // use the initial list that still has references to all of the tag objects for this work for (int i = 0; i < tagCount; i++) { // skip these kinds of tags; we're looking for a normal opening tag to process do { ti = tags[i]; } while ((ti.Inline || ti.Comment || ti.ClosingTag != null || ti.IsClosingTag) && ++i < tagCount); // skip these kinds of tags; we're looking for a normal opening tag to process for (int j = i + 1; j <= tagCount; j++) { // skip these kinds of tags; we're looking for a normal closing tag for the current normal opening tag do { tj = tags[j]; } while ((!tj.IsClosingTag || tj.NestingLevel != ti.NestingLevel || usedClosingTags.Contains(j)) && ++j <= tagCount); // skip these kinds of tags; we're looking for a normal closing tag for the current normal opening tag // this will be a matching closing tag for the current opening tag if (tj.Tag.Equals("/" + ti.Tag, stringComparison)) { ti.ClosingTag = tj; usedClosingTags.Add(j); break; } } } usedClosingTags.Clear(); usedClosingTags = null; ti = null; tj = null; #if DEBUG sw4.Stop(); #endif #endregion #region Validate the markup #if DEBUG sw5 = Stopwatch.StartNew(); #endif bool valid = IsValidMarkup(tags); #if DEBUG sw5.Stop(); #endif if (!valid) { throw new Exception("The supplied markup text is invalid."); } #endregion #region Remove closing tags from the root tags collection and from all child objects #if DEBUG sw6 = Stopwatch.StartNew(); #endif // handle the root tags var tagsToRemove = new List <MarkupTag>(); foreach (MarkupTag t in rootTags) { if (t.IsClosingTag) { tagsToRemove.Add(t); } } foreach (MarkupTag t in tagsToRemove) { rootTags.Remove(t); } // now all the child tags tagStack = new Stack <MarkupTag>(); processedTags = new List <MarkupTag>(tagCount); foreach (MarkupTag rootTag in rootTags) { if (rootTag.Children.Count > 0) { tagStack.Push(rootTag); } } while (tagStack.Count > 0) { currentTag = tagStack.Pop(); if (processedTags.Contains(currentTag)) { continue; } tagsToRemove.Clear(); foreach (MarkupTag child in currentTag.Children) { // if it's a closing tag flag it to be removed, otherwise if it has children we'll put it on the stack if (child.IsClosingTag) { tagsToRemove.Add(child); } else if (child.Children.Count > 0) { tagStack.Push(child); } } foreach (MarkupTag t in tagsToRemove) { currentTag.Children.Remove(t); } } #if DEBUG sw6.Stop(); #endif #endregion #region Clear the cache that was used for parsing #if DEBUG sw7 = Stopwatch.StartNew(); #endif if (useCaching) { ClearCache(); } #if DEBUG sw7.Stop(); #endif #endregion #if DEBUG totalElapsed = sw1.Elapsed + sw2.Elapsed + sw3.Elapsed + sw4.Elapsed + sw5.Elapsed + sw6.Elapsed + sw7.Elapsed; #endif }