protected virtual void PushTag(string tag)
 {
     TagStack.Push(tag);
 }
        /// <summary>
        ///     Simple parsing to check if input fragment is well-formed,
        ///     HTML elements that do not required end tags (i.e. <BR>)
        ///     will be ignored by this parser.
        /// </summary>
        /// <param name="text">
        ///     text being parsed
        /// </param>
        internal static bool IsWellFormed(String text)
        {
            int           textPos = 0;
            TagStack      stack   = new TagStack();
            StringBuilder builder = new StringBuilder();

            for (;;)
            {
                Match match = null;

                // 1: scan for text up to the next tag.
                if ((match = _textRegex.Match(text, textPos)).Success)
                {
                    textPos = match.Index + match.Length;
                }

                // we might be done now
                if (textPos == text.Length)
                {
                    while (!stack.IsEmpty())
                    {
                        if (!IsEndTagOptional(stack.Pop()))
                        {
                            return(false);
                        }
                    }
                    return(true);
                }

                // First check if it's a unclosed tag (i.e. <mobile:Form >)
                if ((match = _unclosedTagRegex.Match(text, textPos)).Success)
                {
                    String startTag = match.Groups["tagname"].Value;
                    stack.Push(startTag);
                }

                // Check to see if it's a tag
                else if ((match = _tagRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an end tag
                else if ((match = _endtagRegex.Match(text, textPos)).Success)
                {
                    String endTag  = match.Groups["tagname"].Value;
                    bool   matched = false;

                    while (!stack.IsEmpty())
                    {
                        String startTag = stack.Pop();

                        if (String.Compare(endTag, startTag, true /*ignoreCase*/, CultureInfo.InvariantCulture) != 0)
                        {
                            if (IsEndTagOptional(startTag))
                            {
                                continue;
                            }

                            // no match against start tag that requires an end tag
                            return(false);
                        }

                        // we found a match here.
                        matched = true;
                        break;
                    }

                    if (!matched && stack.IsEmpty())
                    {
                        return(false);
                    }
                }

                // Check to see if it's a directive (i.e. <%@ %> block)
                else if ((match = _directiveRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's a server side include
                // e.g. <!-- #include file="foo.inc" -->
                else if ((match = _includeRegex.Match(text, textPos)).Success)
                {
                    // skip it
                }

                // Check to see if it's a comment (<%-- --%> block
                // e.g. <!-- Blah! -->
                else if ((match = _commentRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an asp expression block (i.e. <%= %> block)
                else if ((match = _aspExprRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's a databinding expression block (i.e. <%# %> block)
                // This does not include <%# %> blocks used as values for
                // attributes of server tags.
                else if ((match = _databindExprRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an asp code block
                else if ((match = _aspCodeRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Did we process the block that started with a '<'?
                if (match == null || !match.Success)
                {
                    // Skip the '<'
                    textPos++;
                }
                else
                {
                    textPos = match.Index + match.Length;
                }

                // we might be done now
                if (textPos == text.Length)
                {
                    while (!stack.IsEmpty())
                    {
                        if (!IsEndTagOptional(stack.Pop()))
                        {
                            return(false);
                        }
                    }
                    return(true);
                }
            }
        }
Example #3
0
        /// <summary>
        ///   Parses the given document to find all tags between the beginning of the document and the specified
        ///   end line.
        /// </summary>
        /// <remarks>
        ///   The <paramref name="p_pctCompleteTagCallback" /> is called whenever a complete tag has been parsed.
        ///   A complete tag is a tag whose opening and closing tags have been found (for example, &lt;b>..&lt/b>).
        ///   The stack that is returned contains all the unclosed tags found, and so represents where in the
        ///   document heirarchy the line falls.
        /// </remarks>
        /// <param name="p_docDocument">The document to parse.</param>
        /// <param name="p_intEndLine">The line of the document at which to stop parsing.</param>
        /// <param name="p_pctCompleteTagCallback">The method to call whenever a complete tag is parsed.</param>
        /// <returns>A stack containing all the unclosed tags found.</returns>
        /// <exception cref="ArgumentOutOfRangeException">
        ///   Thrown if <paramref name="p_intEndLine" /> is greater than
        ///   or equal to the <see cref="IDocument.TotalNumberOfLines" /> of <paramref name="p_docDocument" />.
        /// </exception>
        public static TagStack ParseTags(IDocument p_docDocument, Int32 p_intEndLine, ParsedTag p_pctCompleteTagCallback,
                                         ParsedTag p_pctUnclosedTagCallback)
        {
            if (p_intEndLine >= p_docDocument.TotalNumberOfLines)
            {
                throw new ArgumentOutOfRangeException("p_intEndLine", p_intEndLine,
                                                      "The given end line paramater is outside of the range of lines in the given document.");
            }
            //parse the buffer
            var stkTags = new TagStack();

            for (var i = 0; i <= p_intEndLine; i++)
            {
                var strLine        = p_docDocument.GetText(p_docDocument.GetLineSegment(i));
                var intLineNum     = i;
                var intLastOpenPos = strLine.LastIndexOf('<');
                if (intLastOpenPos < 0)
                {
                    continue;
                }
                var intLastClosePos = strLine.LastIndexOf('>');
                if ((intLastClosePos > -1) && (intLastOpenPos > intLastClosePos))
                {
                    var stbLines = new StringBuilder(strLine);
                    //there is an open tag on this line - read lines until it is closed.
                    for (; i <= p_intEndLine; i++)
                    {
                        var strNextLine = p_docDocument.GetText(p_docDocument.GetLineSegment(i));
                        intLastClosePos = strLine.LastIndexOf('>');
                        stbLines.Append(strNextLine);
                        if (intLastClosePos < 0)
                        {
                            i--;
                            break;
                        }
                    }
                    strLine = stbLines.ToString();
                }

                var mclLineTags = rgxTagContents.Matches(strLine);
                foreach (Match mtcTag in mclLineTags)
                {
                    var strTag     = mtcTag.Groups[1].Value.Trim();
                    var strTagName = rgxTagName.Match(strTag).Groups[1].Value;
                    if (strTag.StartsWith("/"))
                    {
                        if (stkTags.Contains(strTagName))
                        {
                            while (!stkTags.Peek().Equals(strTagName))
                            {
                                var tpsTag   = stkTags.Pop();
                                var tlcStart = new TextLocation(tpsTag.Column, tpsTag.LineNumber);
                                var tlcEnd   = new TextLocation(tpsTag.Column + tpsTag.Name.Length, tpsTag.LineNumber);
                                if (p_pctUnclosedTagCallback != null)
                                {
                                    p_pctUnclosedTagCallback(p_docDocument, tpsTag.Name, tlcStart, tlcEnd);
                                }
                            }
                            var tpsCompleteTag = stkTags.Pop();
                            if (p_pctCompleteTagCallback != null)
                            {
                                var tlcStart      = new TextLocation(tpsCompleteTag.Column, tpsCompleteTag.LineNumber);
                                var intEndFoldPos = mtcTag.Groups[1].Index;
                                var tlcEnd        = new TextLocation(intEndFoldPos, intLineNum);
                                p_pctCompleteTagCallback(p_docDocument, strTagName, tlcStart, tlcEnd);
                            }
                        }
                    }
                    else
                    {
                        if (!strTag.EndsWith("/"))
                        {
                            stkTags.Push(strTagName, intLineNum, mtcTag.Groups[1].Index);
                        }
                    }
                }
            }
            return(stkTags);
        }
        /// <summary>
        ///     Simple parsing to check if input fragment is well-formed,
        ///     HTML elements that do not required end tags (i.e. <BR>)
        ///     will be ignored by this parser.
        /// </summary>
        /// <param name="text">
        ///     text being parsed
        /// </param>
        internal static bool IsWellFormed(String text)
        {
            int textPos = 0;
            TagStack stack = new TagStack();

            for (;;) 
            {
                Match match = null;

                // 1: scan for text up to the next tag.
                if ((match = _textRegex.Match(text, textPos)).Success)
                {
                    textPos = match.Index + match.Length;
                }

                // we might be done now
                if (textPos == text.Length)
                {
                    while (!stack.IsEmpty())
                    {
                        if (!IsEndTagOptional(stack.Pop()))
                        {
                            return false;
                        }
                    }
                    return true;
                }

                // First check if it's a unclosed tag (i.e. <mobile:Form >)
                if ((match = _unclosedTagRegex.Match(text, textPos)).Success)
                {
                    String startTag = match.Groups["tagname"].Value;
                    stack.Push(startTag);
                }

                // Check to see if it's a tag
                else if ((match = _tagRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an end tag
                else if ((match = _endtagRegex.Match(text, textPos)).Success)
                {
                    String endTag = match.Groups["tagname"].Value;
                    bool matched = false;

                    while (!stack.IsEmpty())
                    {
                        String startTag = stack.Pop();

                        if (String.Compare(endTag, startTag, StringComparison.OrdinalIgnoreCase) != 0)
                        {
                            if (IsEndTagOptional(startTag))
                            {
                                continue;
                            }

                            // no match against start tag that requires an end tag
                            return false;
                        }

                        // we found a match here.
                        matched = true;
                        break;
                    }

                    if (!matched && stack.IsEmpty())
                    {
                        return false;
                    }
                }

                // Check to see if it's a directive (i.e. <%@ %> block)
                else if ((match = _directiveRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's a server side include
                // e.g. <!-- #include file="foo.inc" -->
                else if ((match = _includeRegex.Match(text, textPos)).Success)
                {
                    // skip it
                }

                // Check to see if it's a comment (<%-- --%> block
                // e.g. <!-- Blah! -->
                else if ((match = _commentRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an asp expression block (i.e. <%= %> block)
                else if ((match = _aspExprRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's a databinding expression block (i.e. <%# %> block)
                // This does not include <%# %> blocks used as values for
                // attributes of server tags.
                else if ((match = _databindExprRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Check to see if it's an asp code block
                else if ((match = _aspCodeRegex.Match(text, textPos)).Success)
                {
                    // skip
                }

                // Did we process the block that started with a '<'?
                if (match == null || !match.Success) 
                {
                    // Skip the '<'
                    textPos++;
                }
                else 
                {
                    textPos = match.Index + match.Length;
                }

                // we might be done now
                if (textPos == text.Length)
                {
                    while (!stack.IsEmpty())
                    {
                        if (!IsEndTagOptional(stack.Pop()))
                        {
                            return false;
                        }
                    }
                    return true;
                }
            }
        }