/* Function: SkipBlock * If the iterator is on an opening tag it will move past it, all contained content, and its closing tag and return true. * Otherwise returns false. */ protected void SkipBlock(ref XMLIterator iterator) { #if DEBUG if (iterator.IsOnTag(TagForm.Opening) == false) { throw new Exception("Can only call SkipBlock() when the iterator is on an opening tag."); } #endif TagStack tagStack = new TagStack(); tagStack.OpenTag(iterator.TagType); iterator.Next(); while (iterator.IsInBounds && !tagStack.IsEmpty) { if (iterator.Type == XMLElementType.Tag) { if (iterator.TagForm == TagForm.Opening) { tagStack.OpenTag(iterator.TagType); } else if (iterator.TagForm == TagForm.Closing) { tagStack.CloseTag(iterator.TagType); } // Ignore standalone tags } iterator.Next(); } }
protected virtual void PopTag(string tag) { var pop = TagStack.Pop(); if (pop != tag) { throw new ArgumentException(string.Format("stack top tag {0} is different with {1}.", pop, tag), "tag"); } }
public async void RenderNode(INode node, ITextContainer parentContainer) { var element = node as IElement; if (element != null) { PushTag(element); foreach (var child in node.ChildNodes) { if (child is IText) { var text = child.TextContent ?? string.Empty; if (TagStack.Contains("pre", StringComparer.OrdinalIgnoreCase) == false) { // Not in pre, replace all line break to empty. text = text.Replace("\r", string.Empty).Replace("\n", string.Empty); } if (string.IsNullOrEmpty(text) == false) { parentContainer.Add(new Run { Text = text }); } } else { var childElement = child as IElement; if (childElement != null) { var elementRender = ElementRenders.GetRender(childElement.TagName); if (elementRender != null) { elementRender.RenderElement(childElement, parentContainer, this); } else { if (Debugger.IsAttached) { Debugger.Break(); } await new MessageDialog(string.Format("{0} can't render", childElement.TagName)).ShowAsync(); } } } } PopTag(element); } }
/* Function: TryToGetTopLevelTextBlock * If the iterator is on a summary, remark, returns, or top-level example tag it will convert it to NDMarkup, add it to the comment * in a text block, move the iterator past it, and return true. Otherwise it returns false and nothing is changed. */ protected bool TryToGetTopLevelTextBlock(ref XMLIterator iterator, XMLComment comment) { if (iterator.IsOnTag("summary", TagForm.Opening) == false && iterator.IsOnTag("remark", TagForm.Opening) == false && iterator.IsOnTag("remarks", TagForm.Opening) == false && iterator.IsOnTag("example", TagForm.Opening) == false && iterator.IsOnTag("returns", TagForm.Opening) == false && iterator.IsOnTag("value", TagForm.Opening) == false) { return(false); } string keyword = iterator.TagType; string blockType = keyword; if (keyword == "remarks") { blockType = "remark"; } XMLComment.TextBlock block = comment.GetTextBlock(blockType); TagStack tagStack = new TagStack(); tagStack.OpenTag(keyword); iterator.Next(); GetText(ref iterator, block.Text, tagStack); tagStack.CloseAllTags(block.Text); if (iterator.IsOnTag(keyword, TagForm.Closing)) { iterator.Next(); } return(true); }
protected virtual void PushTag(string tag) { TagStack.Push(tag); }
protected virtual void PopTag() { TagStack.Pop(); }
/// <summary> /// Parses the given document to find all tags between the beginning of the document and the specified /// end line. /// </summary> /// <remarks> /// The <paramref name="p_pctCompleteTagCallback" /> is called whenever a complete tag has been parsed. /// A complete tag is a tag whose opening and closing tags have been found (for example, <b>..</b>). /// The stack that is returned contains all the unclosed tags found, and so represents where in the /// document heirarchy the line falls. /// </remarks> /// <param name="p_docDocument">The document to parse.</param> /// <param name="p_intEndLine">The line of the document at which to stop parsing.</param> /// <param name="p_pctCompleteTagCallback">The method to call whenever a complete tag is parsed.</param> /// <returns>A stack containing all the unclosed tags found.</returns> /// <exception cref="ArgumentOutOfRangeException"> /// Thrown if <paramref name="p_intEndLine" /> is greater than /// or equal to the <see cref="IDocument.TotalNumberOfLines" /> of <paramref name="p_docDocument" />. /// </exception> public static TagStack ParseTags(IDocument p_docDocument, Int32 p_intEndLine, ParsedTag p_pctCompleteTagCallback, ParsedTag p_pctUnclosedTagCallback) { if (p_intEndLine >= p_docDocument.TotalNumberOfLines) { throw new ArgumentOutOfRangeException("p_intEndLine", p_intEndLine, "The given end line paramater is outside of the range of lines in the given document."); } //parse the buffer var stkTags = new TagStack(); for (var i = 0; i <= p_intEndLine; i++) { var strLine = p_docDocument.GetText(p_docDocument.GetLineSegment(i)); var intLineNum = i; var intLastOpenPos = strLine.LastIndexOf('<'); if (intLastOpenPos < 0) { continue; } var intLastClosePos = strLine.LastIndexOf('>'); if ((intLastClosePos > -1) && (intLastOpenPos > intLastClosePos)) { var stbLines = new StringBuilder(strLine); //there is an open tag on this line - read lines until it is closed. for (; i <= p_intEndLine; i++) { var strNextLine = p_docDocument.GetText(p_docDocument.GetLineSegment(i)); intLastClosePos = strLine.LastIndexOf('>'); stbLines.Append(strNextLine); if (intLastClosePos < 0) { i--; break; } } strLine = stbLines.ToString(); } var mclLineTags = rgxTagContents.Matches(strLine); foreach (Match mtcTag in mclLineTags) { var strTag = mtcTag.Groups[1].Value.Trim(); var strTagName = rgxTagName.Match(strTag).Groups[1].Value; if (strTag.StartsWith("/")) { if (stkTags.Contains(strTagName)) { while (!stkTags.Peek().Equals(strTagName)) { var tpsTag = stkTags.Pop(); var tlcStart = new TextLocation(tpsTag.Column, tpsTag.LineNumber); var tlcEnd = new TextLocation(tpsTag.Column + tpsTag.Name.Length, tpsTag.LineNumber); if (p_pctUnclosedTagCallback != null) { p_pctUnclosedTagCallback(p_docDocument, tpsTag.Name, tlcStart, tlcEnd); } } var tpsCompleteTag = stkTags.Pop(); if (p_pctCompleteTagCallback != null) { var tlcStart = new TextLocation(tpsCompleteTag.Column, tpsCompleteTag.LineNumber); var intEndFoldPos = mtcTag.Groups[1].Index; var tlcEnd = new TextLocation(intEndFoldPos, intLineNum); p_pctCompleteTagCallback(p_docDocument, strTagName, tlcStart, tlcEnd); } } } else { if (!strTag.EndsWith("/")) { stkTags.Push(strTagName, intLineNum, mtcTag.Groups[1].Index); } } } } return(stkTags); }
/* Function: GetList * Converts the contents of a list tag to NDMarkup and adds it to the output. The iterator should be on an opening list tag * and when it ends it will be past the closing tag. */ protected void GetList(ref XMLIterator iterator, StringBuilder output, TagStack tagStack) { #if DEBUG if (iterator.IsOnTag("list", TagForm.Opening) == false) { throw new Exception("GetList() can only be called when the iterator is on an opening list tag."); } #endif tagStack.OpenTag("list"); int surroundingListTagIndex = tagStack.Count - 1; iterator.Next(); List <ListItem> items = new List <ListItem>(); ListItem currentItem = new ListItem(); StringBuilder stringBuilder = new StringBuilder(); // To reuse while (iterator.IsInBounds) { if (iterator.IsOnTag("list", TagForm.Closing)) { iterator.Next(); break; } else if (iterator.IsOnTag("item") || iterator.IsOnTag("listheader")) { if (iterator.TagForm == TagForm.Opening) { currentItem = new ListItem(); currentItem.IsHeading = (iterator.TagType == "listheader"); } else if (iterator.TagForm == TagForm.Closing) { if (currentItem.Term != null) { currentItem.Term = Normalize(currentItem.Term.Trim()); if (currentItem.Term == "") { currentItem.Term = null; } else if (currentItem.IsHeading) { currentItem.Term = "<b>" + currentItem.Term + "</b>"; } } if (currentItem.Description != null) { currentItem.Description = Normalize(currentItem.Description.Trim()); if (currentItem.Description == "") { currentItem.Description = null; } else if (currentItem.IsHeading) { currentItem.Description = currentItem.Description.Replace("<p>", "<p><b>"); currentItem.Description = currentItem.Description.Replace("</p>", "</b></p>"); } } if (currentItem.Term != null || currentItem.Description != null) { items.Add(currentItem); } currentItem = new ListItem(); } iterator.Next(); } else if (iterator.IsOnTag("term", TagForm.Opening)) { tagStack.OpenTag("term"); iterator.Next(); stringBuilder.Remove(0, stringBuilder.Length); GetSimpleText(ref iterator, stringBuilder, tagStack); currentItem.Term = stringBuilder.ToString(); if (iterator.TagType == "term" && iterator.TagForm == TagForm.Closing) { iterator.Next(); } tagStack.CloseTag("term"); } else if (iterator.IsOnTag("description", TagForm.Opening)) { tagStack.OpenTag("description"); iterator.Next(); stringBuilder.Remove(0, stringBuilder.Length); GetText(ref iterator, stringBuilder, tagStack); currentItem.Description = stringBuilder.ToString(); if (iterator.TagType == "description" && iterator.TagForm == TagForm.Closing) { iterator.Next(); } tagStack.CloseTag("description"); } else if (iterator.IsOnTag(TagForm.Opening)) { SkipBlock(ref iterator); } else { iterator.Next(); } } tagStack.CloseTag(surroundingListTagIndex); if (items.Count > 0) { bool hasTerms = false; bool hasDescriptions = false; for (int i = 0; i < items.Count && (hasTerms == false || hasDescriptions == false); i++) { if (items[i].Term != null) { hasTerms = true; } if (items[i].Description != null) { hasDescriptions = true; } } if (hasTerms && hasDescriptions) { output.Append("<dl>"); foreach (var item in items) { output.Append("<de>"); if (item.Term != null) { output.Append(item.Term); } output.Append("</de><dd>"); if (item.Description != null) { output.Append(item.Description); } output.Append("</dd>"); } output.Append("</dl>"); } else // doesn't have both { output.Append("<ul>"); foreach (var item in items) { output.Append("<li>"); // The format only allows for descriptions without terms, but we'll support terms without descriptions as well. if (item.Term != null) { output.Append(item.Term); } if (item.Description != null) { output.Append(item.Description); } output.Append("</li>"); } output.Append("</ul>"); } } }
/* Function: GetCode * Converts the contents of a code tag to NDMarkup and adds it to the output. The iterator should be on an opening code tag * and when it ends it will be past the closing tag. */ protected void GetCode(ref XMLIterator iterator, StringBuilder output, TagStack tagStack) { #if DEBUG if (iterator.IsOnTag("code", TagForm.Opening) == false) { throw new Exception("GetCode() can only be called when the iterator is on an opening code tag."); } #endif output.Append("<pre type=\"code\">"); tagStack.OpenTag("code", "</pre>"); int surroundingCodeTagIndex = tagStack.Count - 1; iterator.Next(); List <CodeLine> lines = new List <CodeLine>(); CodeLine currentLine = new CodeLine(); currentLine.Indent = -1; // Don't use text immediately following the code tag to figure out the shared indent. currentLine.Text = null; for (;;) { if (iterator.IsInBounds == false) { lines.Add(currentLine); break; } else if (iterator.IsOnTag(TagForm.Closing)) { int openingTagIndex = tagStack.FindTag(iterator.TagType); if (openingTagIndex != -1 && openingTagIndex <= surroundingCodeTagIndex) { lines.Add(currentLine); break; } // Otherwise let it fall through to be treated as text. } if (iterator.IsOn(XMLElementType.LineBreak)) { lines.Add(currentLine); currentLine = new CodeLine(); currentLine.Indent = 0; currentLine.Text = null; } else if (iterator.IsOn(XMLElementType.Indent)) { currentLine.Indent = iterator.Indent; } else // entity, unhandled tag, text { if (currentLine.Text == null) { currentLine.Text = iterator.String; } else { currentLine.Text += iterator.String; } } iterator.Next(); } Normalize(lines); // Build the output. for (int i = 0; i < lines.Count; i++) { if (lines[i].Indent >= 1) { output.Append(' ', lines[i].Indent); } if (lines[i].Text != null) { output.EntityEncodeAndAppend(lines[i].Text); } if (i < lines.Count - 1) { output.Append("<br>"); } } tagStack.CloseTag(surroundingCodeTagIndex, output); }
/* Function: GetSimpleText * Converts a block of plain unformatted text to NDMarkup and adds it to the output. Unlike <GetText()> this will not surround the * output in paragraph tags. It ends when it reaches the closing tag for anything already on the tag stack. */ protected void GetSimpleText(ref XMLIterator iterator, StringBuilder output, TagStack tagStack) { int surroundingTagCount = tagStack.Count; while (iterator.IsInBounds) { if (iterator.IsOn(XMLElementType.Text)) { output.EntityEncodeAndAppend(iterator.String); iterator.Next(); } else if (iterator.IsOn(XMLElementType.EntityChar)) { output.EntityEncodeAndAppend(iterator.EntityValue); iterator.Next(); } else if (iterator.IsOn(XMLElementType.LineBreak)) { // Add a literal line break. We'll replace these with spaces or double spaces later. Right now we can't decide // which it should be because you can't run a regex directly on a StringBuilder and it would be inefficient to convert // it to a string on every line break. output.Append('\n'); iterator.Next(); } else if (iterator.IsOnTag("paramref") || iterator.IsOnTag("typeparamref")) { // Can't assume all the properties are set string name = iterator.TagProperty("name"); if (name != null) { output.Append(name); } iterator.Next(); } else if (iterator.IsOnTag(TagForm.Opening)) { tagStack.OpenTag(iterator.TagType); iterator.Next(); } else if (iterator.IsOnTag(TagForm.Closing)) { int openingTagIndex = tagStack.FindTag(iterator.TagType); if (openingTagIndex == -1) { } else if (openingTagIndex <= surroundingTagCount - 1) { break; } else { tagStack.CloseTag(openingTagIndex, output); } iterator.Next(); } else { // Ignore indent. Spaces between words will be handled by line breaks. // Ignore unrecognized standalone tags. iterator.Next(); } } if (tagStack.Count > surroundingTagCount) { tagStack.CloseTag(surroundingTagCount, output); } }
/* Function: GetText * Converts a block of formatted text to NDMarkup and adds it to the output. It ends when it reaches the closing tag for anything * already on the tag stack. */ protected void GetText(ref XMLIterator iterator, StringBuilder output, TagStack tagStack) { output.Append("<p>"); tagStack.OpenTag(null, "</p>"); int surroundingPTagIndex = tagStack.Count - 1; while (iterator.IsInBounds) { if (iterator.IsOn(XMLElementType.Text)) { output.EntityEncodeAndAppend(iterator.String); iterator.Next(); } else if (iterator.IsOn(XMLElementType.EntityChar)) { output.EntityEncodeAndAppend(iterator.EntityValue); iterator.Next(); } else if (iterator.IsOn(XMLElementType.LineBreak)) { // Add a literal line break. We'll replace these with spaces or double spaces later. Right now we can't decide // which it should be because you can't run a regex directly on a StringBuilder and it would be inefficient to convert // it to a string on every line break. output.Append('\n'); iterator.Next(); } else if (iterator.IsOnTag("para")) { // Text can appear both inside and outside of <para> tags, and whitespace can appear between <para> tags that // can be mistaken for content, so rather than put in a lot of logic we handle it in a very dirty but simple way. Every // <para> tag--opening, closing, standalone (technically invalid)--causes a paragraph break. Normalize() will clean it // up for us afterwards. tagStack.CloseTag(surroundingPTagIndex + 1, output); // Reuse our surrounding tag output.Append("</p><p>"); iterator.Next(); } else if (iterator.IsOnTag("code", TagForm.Opening)) { output.Append("</p>"); GetCode(ref iterator, output, tagStack); output.Append("<p>"); } else if (iterator.IsOnTag("example", TagForm.Opening)) { // <example> can be nested in addition to a top-level tag. output.Append("</p><h>"); output.EntityEncodeAndAppend( Engine.Locale.Get("NaturalDocs.Engine", "XML.Heading.example") ); output.Append("</h><p>"); tagStack.OpenTag("example", "</p><p>"); iterator.Next(); } else if (iterator.IsOnTag("list", TagForm.Opening)) { output.Append("</p>"); GetList(ref iterator, output, tagStack); output.Append("<p>"); } else if (iterator.IsOnTag("paramref") || iterator.IsOnTag("typeparamref")) { // Can't assume all the properties are set string name = iterator.TagProperty("name"); if (name != null) { output.EntityEncodeAndAppend(name); } iterator.Next(); } else if (iterator.IsOnTag("see", TagForm.Standalone)) { // Can't assume all the properties are set string cref = iterator.TagProperty("cref"); if (cref != null) { output.Append("<link type=\"naturaldocs\" originaltext=\""); output.EntityEncodeAndAppend(cref); output.Append("\">"); } else { string langword = iterator.TagProperty("langword"); if (langword != null) { output.EntityEncodeAndAppend(langword); } } iterator.Next(); } else if (iterator.IsOnTag(TagForm.Opening)) { tagStack.OpenTag(iterator.TagType); iterator.Next(); } else if (iterator.IsOnTag(TagForm.Closing)) { int openingTagIndex = tagStack.FindTag(iterator.TagType); if (openingTagIndex == -1) { } else if (openingTagIndex < surroundingPTagIndex) { break; } else { tagStack.CloseTag(openingTagIndex, output); } iterator.Next(); } else { // Ignore indent. Spaces between words will be handled by line breaks. // Ignore unrecognized standalone tags. iterator.Next(); } } tagStack.CloseTag(surroundingPTagIndex, output); }
/* Function: TryToGetTopLevelListItem * If the iterator is on a param, typeparam, exception, or permission tag it will convert it to NDMarkup, add it to the * comment in a list block, move the iterator past it, and return true. Otherwise it returns false and nothing is changed. */ protected bool TryToGetTopLevelListItem(ref XMLIterator iterator, XMLComment comment) { if (iterator.IsOnTag("param", TagForm.Opening) == false && iterator.IsOnTag("exception", TagForm.Opening) == false && iterator.IsOnTag("permission", TagForm.Opening) == false && iterator.IsOnTag("typeparam", TagForm.Opening) == false && iterator.IsOnTag("see", TagForm.Opening) == false && iterator.IsOnTag("see", TagForm.Standalone) == false && iterator.IsOnTag("seealso", TagForm.Opening) == false && iterator.IsOnTag("seealso", TagForm.Standalone) == false) { return(false); } string keyword = iterator.TagType; if (keyword == "see") { keyword = "seealso"; } XMLComment.ListBlock block = comment.GetListBlock(keyword); string name = null; string description = null; if (keyword == "param" || keyword == "typeparam") { name = iterator.TagProperty("name"); } else { name = iterator.TagProperty("cref") ?? iterator.TagProperty("langword"); } if (iterator.TagForm == TagForm.Opening) { TagStack tagStack = new TagStack(); tagStack.OpenTag(keyword); iterator.Next(); StringBuilder descriptionBuilder = new StringBuilder(); GetText(ref iterator, descriptionBuilder, tagStack); tagStack.CloseAllTags(descriptionBuilder); description = Normalize(descriptionBuilder.ToString()); if (iterator.IsOnTag(keyword, TagForm.Closing)) { iterator.Next(); } } else { iterator.Next(); } if (name != null) { block.Add(name, description); } return(true); }
/// <summary> /// Simple parsing to check if input fragment is well-formed, /// HTML elements that do not required end tags (i.e. <BR>) /// will be ignored by this parser. /// </summary> /// <param name="text"> /// text being parsed /// </param> internal static bool IsWellFormed(String text) { int textPos = 0; TagStack stack = new TagStack(); StringBuilder builder = new StringBuilder(); for (;;) { Match match = null; // 1: scan for text up to the next tag. if ((match = _textRegex.Match(text, textPos)).Success) { textPos = match.Index + match.Length; } // we might be done now if (textPos == text.Length) { while (!stack.IsEmpty()) { if (!IsEndTagOptional(stack.Pop())) { return(false); } } return(true); } // First check if it's a unclosed tag (i.e. <mobile:Form >) if ((match = _unclosedTagRegex.Match(text, textPos)).Success) { String startTag = match.Groups["tagname"].Value; stack.Push(startTag); } // Check to see if it's a tag else if ((match = _tagRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an end tag else if ((match = _endtagRegex.Match(text, textPos)).Success) { String endTag = match.Groups["tagname"].Value; bool matched = false; while (!stack.IsEmpty()) { String startTag = stack.Pop(); if (String.Compare(endTag, startTag, true /*ignoreCase*/, CultureInfo.InvariantCulture) != 0) { if (IsEndTagOptional(startTag)) { continue; } // no match against start tag that requires an end tag return(false); } // we found a match here. matched = true; break; } if (!matched && stack.IsEmpty()) { return(false); } } // Check to see if it's a directive (i.e. <%@ %> block) else if ((match = _directiveRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's a server side include // e.g. <!-- #include file="foo.inc" --> else if ((match = _includeRegex.Match(text, textPos)).Success) { // skip it } // Check to see if it's a comment (<%-- --%> block // e.g. <!-- Blah! --> else if ((match = _commentRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an asp expression block (i.e. <%= %> block) else if ((match = _aspExprRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's a databinding expression block (i.e. <%# %> block) // This does not include <%# %> blocks used as values for // attributes of server tags. else if ((match = _databindExprRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an asp code block else if ((match = _aspCodeRegex.Match(text, textPos)).Success) { // skip } // Did we process the block that started with a '<'? if (match == null || !match.Success) { // Skip the '<' textPos++; } else { textPos = match.Index + match.Length; } // we might be done now if (textPos == text.Length) { while (!stack.IsEmpty()) { if (!IsEndTagOptional(stack.Pop())) { return(false); } } return(true); } } }
/* Function: GetText * * Converts a stretch of formatted text to NDMarkup. * * Modes: * * Normal - The iterator continues until it goes out of bounds. * ListItem - The iterator continues until it reaches a closing li tag. It also skips certain formatting that is not supported * in list items in NDMarkup. */ protected void GetText(ref JavadocIterator iterator, StringBuilder output, GetTextMode mode = GetTextMode.Normal) { output.Append("<p>"); TagStack tagStack = new TagStack(); tagStack.OpenTag(null, "</p>"); while (iterator.IsInBounds) { if (iterator.IsOn(JavadocElementType.Text)) { output.EntityEncodeAndAppend(iterator.String); iterator.Next(); } else if (iterator.IsOn(JavadocElementType.EntityChar)) { output.EntityEncodeAndAppend(iterator.EntityValue); iterator.Next(); } else if (iterator.IsOn(JavadocElementType.LineBreak)) { // Add a literal line break. We'll replace these with spaces or double spaces later. Right now we can't decide // which it should be because you can't run a regex directly on a StringBuilder and it would be inefficient to convert // it to a string on every line break. output.Append('\n'); iterator.Next(); } else if (iterator.IsOnHTMLTag("p")) { // Text can appear both inside and outside of <p> tags, whitespace can appear between <p> tags that can be // mistaken for content, and people can use <p> tags as standalone rather than opening tags. Rather than put in // logic to try to account for all of this we handle it in a very dirty but simple way. Every <p> tag--opening, closing, // or standalone--causes a paragraph break. Normalize() will clean it up for us afterwards. tagStack.CloseTag(1, output); // Reuse our surrounding tag output.Append("</p><p>"); iterator.Next(); } else if (iterator.IsOnHTMLTag("b") || iterator.IsOnHTMLTag("strong")) { if (iterator.HTMLTagForm == TagForm.Opening) { tagStack.OpenTag(iterator.TagType, "</b>"); output.Append("<b>"); } else if (iterator.HTMLTagForm == TagForm.Closing) { tagStack.CloseTag(iterator.TagType, output); } iterator.Next(); } else if (iterator.IsOnHTMLTag("i") || iterator.IsOnHTMLTag("em")) { if (iterator.HTMLTagForm == TagForm.Opening) { tagStack.OpenTag(iterator.TagType, "</i>"); output.Append("<i>"); } else if (iterator.HTMLTagForm == TagForm.Closing) { tagStack.CloseTag(iterator.TagType, output); } iterator.Next(); } else if (iterator.IsOnHTMLTag("u")) { if (iterator.HTMLTagForm == TagForm.Opening) { tagStack.OpenTag(iterator.TagType, "</u>"); output.Append("<u>"); } else if (iterator.HTMLTagForm == TagForm.Closing) { tagStack.CloseTag(iterator.TagType, output); } iterator.Next(); } else if (iterator.IsOnHTMLTag("pre", TagForm.Opening) && mode == GetTextMode.Normal) // Ignore pre's in list items { output.Append("</p>"); GetPre(ref iterator, output); output.Append("<p>"); } else if (iterator.IsOnHTMLTag("ul") || iterator.IsOnHTMLTag("ol")) { if (iterator.HTMLTagForm == TagForm.Opening) { output.Append("</p>"); GetList(ref iterator, output); output.Append("<p>"); } else if (iterator.HTMLTagForm == TagForm.Closing && mode == GetTextMode.ListItem) { break; } else { iterator.Next(); } } else if (iterator.IsOnHTMLTag("li", TagForm.Closing) && mode == GetTextMode.ListItem) { break; } else if (iterator.IsOnHTMLTag("a", TagForm.Opening)) { string href = iterator.HTMLTagProperty("href"); if (href == null || href == "" || href == "#" || href.StartsWith("{@docRoot}") || href.StartsWith("javascript:", StringComparison.OrdinalIgnoreCase)) { iterator.Next(); } else { GetHTMLLink(ref iterator, output); } } else if (iterator.IsOnJavadocTag("code") || iterator.IsOnJavadocTag("literal")) { // These get added without searching the contents for nested tags output.EntityEncodeAndAppend(iterator.JavadocTagValue); iterator.Next(); } else if (iterator.IsOnJavadocTag("link") || iterator.IsOnJavadocTag("linkPlain")) { Tokenizer linkContent = new Tokenizer(iterator.JavadocTagValue); TokenIterator linkIterator = linkContent.FirstToken; string symbol = GetJavadocLinkSymbol(ref linkIterator); linkIterator.NextPastWhitespace(); string description = GetSimpleText(linkIterator, linkContent.LastToken); description = Normalize(description); if (description == null || description == "") { output.Append("<link type=\"naturaldocs\" originaltext=\""); output.EntityEncodeAndAppend(symbol); output.Append("\">"); } else { output.Append("<link type=\"naturaldocs\" originaltext=\""); output.EntityEncodeAndAppend(description); output.Append(" at "); output.EntityEncodeAndAppend(symbol); output.Append("\">"); } iterator.Next(); } else if (iterator.IsOnJavadocTag("value")) { string symbol = iterator.JavadocTagValue; if (symbol == null || symbol == "") { output.EntityEncodeAndAppend( Locale.Get("NaturalDocs.Engine", "Javadoc.Substitution.value") ); } else { string substitution = Locale.Get("NaturalDocs.Engine", "Javadoc.Substitution.value(symbol)", '\x1F'); int substitutionIndex = substitution.IndexOf('\x1F'); if (substitutionIndex == -1) { output.EntityEncodeAndAppend(substitution); } else { if (substitutionIndex > 0) { output.EntityEncodeAndAppend(substitution, 0, substitutionIndex); } output.Append("<link type=\"naturaldocs\" originaltext=\""); output.EntityEncodeAndAppend(symbol); output.Append("\">"); if (substitutionIndex < substitution.Length - 1) { output.EntityEncodeAndAppend(substitution, substitutionIndex + 1, substitution.Length - (substitutionIndex + 1)); } } } iterator.Next(); } else { // Ignore indent. Spaces between words will be handled by line breaks. // Ignore HTML comments. // Ignore unrecognized HTML tags. iterator.Next(); } } tagStack.CloseAllTags(output); }
/// <summary> /// Simple parsing to check if input fragment is well-formed, /// HTML elements that do not required end tags (i.e. <BR>) /// will be ignored by this parser. /// </summary> /// <param name="text"> /// text being parsed /// </param> internal static bool IsWellFormed(String text) { int textPos = 0; TagStack stack = new TagStack(); for (;;) { Match match = null; // 1: scan for text up to the next tag. if ((match = _textRegex.Match(text, textPos)).Success) { textPos = match.Index + match.Length; } // we might be done now if (textPos == text.Length) { while (!stack.IsEmpty()) { if (!IsEndTagOptional(stack.Pop())) { return false; } } return true; } // First check if it's a unclosed tag (i.e. <mobile:Form >) if ((match = _unclosedTagRegex.Match(text, textPos)).Success) { String startTag = match.Groups["tagname"].Value; stack.Push(startTag); } // Check to see if it's a tag else if ((match = _tagRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an end tag else if ((match = _endtagRegex.Match(text, textPos)).Success) { String endTag = match.Groups["tagname"].Value; bool matched = false; while (!stack.IsEmpty()) { String startTag = stack.Pop(); if (String.Compare(endTag, startTag, StringComparison.OrdinalIgnoreCase) != 0) { if (IsEndTagOptional(startTag)) { continue; } // no match against start tag that requires an end tag return false; } // we found a match here. matched = true; break; } if (!matched && stack.IsEmpty()) { return false; } } // Check to see if it's a directive (i.e. <%@ %> block) else if ((match = _directiveRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's a server side include // e.g. <!-- #include file="foo.inc" --> else if ((match = _includeRegex.Match(text, textPos)).Success) { // skip it } // Check to see if it's a comment (<%-- --%> block // e.g. <!-- Blah! --> else if ((match = _commentRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an asp expression block (i.e. <%= %> block) else if ((match = _aspExprRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's a databinding expression block (i.e. <%# %> block) // This does not include <%# %> blocks used as values for // attributes of server tags. else if ((match = _databindExprRegex.Match(text, textPos)).Success) { // skip } // Check to see if it's an asp code block else if ((match = _aspCodeRegex.Match(text, textPos)).Success) { // skip } // Did we process the block that started with a '<'? if (match == null || !match.Success) { // Skip the '<' textPos++; } else { textPos = match.Index + match.Length; } // we might be done now if (textPos == text.Length) { while (!stack.IsEmpty()) { if (!IsEndTagOptional(stack.Pop())) { return false; } } return true; } } }