Mutable Boolean (c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.cs for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Пример #1
0
        /* values start with "=" or " = " etc. */
        /* doesn't consume the ">" at end of start tag */
        public virtual string ParseValue(string name, bool foldCase, MutableBoolean isempty, MutableInteger pdelim)
        {
            int len;
            int start;
            short map;
            bool seenGt = false;
            bool munge = true;
            int c;
            int delim, quotewarning;
            string val;

            delim = 0;
            pdelim.Val = '"';

            /*
            Henry Zrepa reports that some folk are using the
            embed element with script attributes where newlines
            are significant and must be preserved
            */
            if (Options.LiteralAttribs)
                munge = false;

            /* skip white space before the '=' */

            for (;;)
            {
                c = Input.ReadChar();

                if (c == StreamIn.END_OF_STREAM)
                {
                    Input.UngetChar(c);
                    break;
                }

                map = Map((char) c);

                if ((map & WHITE) == 0)
                {
                    break;
                }
            }

            /*
            c should be '=' if there is a value
            other legal possibilities are white
            space, '/' and '>'
            */

            if (c != '=')
            {
                Input.UngetChar(c);
                return null;
            }

            /* skip white space after '=' */

            for (;;)
            {
                c = Input.ReadChar();
                if (c == StreamIn.END_OF_STREAM)
                {
                    Input.UngetChar(c);
                    break;
                }

                map = Map((char) c);

                if ((map & WHITE) == 0)
                    break;
            }

            /* check for quote marks */

            if (c == '"' || c == '\'')
                delim = c;
            else if (c == '<')
            {
                start = Lexsize;
                AddCharToLexer(c);
                pdelim.Val = ParseServerInstruction();
                len = Lexsize - start;
                Lexsize = start;
                return (len > 0 ? GetString(Lexbuf, start, len) : null);
            }
            else
            {
                Input.UngetChar(c);
            }

            /*
            and read the value string
            check for quote mark if needed
            */

            quotewarning = 0;
            start = Lexsize;
            c = '\x0000';

            for (;;)
            {
                int lastc = c;
                c = Input.ReadChar();

                if (c == StreamIn.END_OF_STREAM)
                {
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_END_OF_FILE);
                    Input.UngetChar(c);
                    break;
                }

                if (delim == (char) 0)
                {
                    if (c == '>')
                    {
                        Input.UngetChar(c);
                        break;
                    }

                    if (c == '"' || c == '\'')
                    {
                        Report.AttrError(this, Token, null, Report.UNEXPECTED_QUOTEMARK);
                        break;
                    }

                    if (c == '<')
                    {
                        /* in.UngetChar(c); */
                        Report.AttrError(this, Token, null, Report.UNEXPECTED_GT);
                        /* break; */
                    }

                    /*
                    For cases like <br clear=all/> need to avoid treating /> as
                    part of the attribute value, however care is needed to avoid
                    so treating <a href=http://www.acme.com/> in this way, which
                    would map the <a> tag to <a href="http://www.acme.com"/>
                    */
                    if (c == '/')
                    {
                        /* peek ahead in case of /> */
                        c = Input.ReadChar();
                        if (c == '>' && !AttributeTable.DefaultAttributeTable.IsUrl(name))
                        {
                            isempty.Val = true;
                            Input.UngetChar(c);
                            break;
                        }

                        /* unget peeked char */
                        Input.UngetChar(c);
                        c = '/';
                    }
                }
                    /* delim is '\'' or '"' */
                else
                {
                    if (c == delim)
                    {
                        break;
                    }

                    /* treat CRLF, CR and LF as single line break */

                    if (c == '\r')
                    {
                        c = Input.ReadChar();
                        if (c != '\n')
                        {
                            Input.UngetChar(c);
                        }

                        c = '\n';
                    }

                    if (c == '\n' || c == '<' || c == '>')
                        ++quotewarning;

                    if (c == '>')
                        seenGt = true;
                }

                if (c == '&')
                {
                    AddCharToLexer(c);
                    ParseEntity(0);
                    continue;
                }

                /*
                kludge for JavaScript attribute values
                with line continuations in string literals
                */
                if (c == '\\')
                {
                    c = Input.ReadChar();

                    if (c != '\n')
                    {
                        Input.UngetChar(c);
                        c = '\\';
                    }
                }

                map = Map((char) c);

                if ((map & WHITE) != 0)
                {
                    if (delim == (char) 0)
                        break;

                    if (munge)
                    {
                        c = ' ';

                        if (lastc == ' ')
                            continue;
                    }
                }
                else if (foldCase && (map & UPPERCASE) != 0)
                    c += ('a' - 'A');

                AddCharToLexer(c);
            }

            if (quotewarning > 10 && seenGt && munge)
            {
                /*
                there is almost certainly a missing trailling quote mark
                as we have see too many newlines, < or > characters.

                an exception is made for Javascript attributes and the
                javascript URL scheme which may legitimately include < and >
                */
                if (!AttributeTable.DefaultAttributeTable.IsScript(name) &&
                    !(AttributeTable.DefaultAttributeTable.IsUrl(name) &&
                      (GetString(Lexbuf, start, 11)).Equals("javascript:")))
                    Report.Error(this, null, null, Report.SUSPECTED_MISSING_QUOTE);
            }

            len = Lexsize - start;
            Lexsize = start;

            if (len > 0 || delim != 0)
            {
                val = GetString(Lexbuf, start, len);
            }
            else
            {
                val = null;
            }

            /* note delimiter if given */
            pdelim.Val = delim != 0 ? delim : '"';

            return val;
        }
Пример #2
0
        /* consumes the '>' terminating start tags */
        public virtual string ParseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php)
        {
            int start;
            // int len = 0;   Removed by BUGFIX for 126265
            short map;
            int c;

            asp.Object = null; /* clear asp pointer */
            php.Object = null; /* clear php pointer */
            /* skip white space before the attribute */

            for (;;)
            {
                c = Input.ReadChar();
                if (c == '/')
                {
                    c = Input.ReadChar();
                    if (c == '>')
                    {
                        isempty.Val = true;
                        return null;
                    }

                    Input.UngetChar(c);
                    c = '/';
                    break;
                }

                if (c == '>')
                {
                    return null;
                }

                if (c == '<')
                {
                    c = Input.ReadChar();

                    if (c == '%')
                    {
                        asp.Object = ParseAsp();
                        return null;
                    }
                    if (c == '?')
                    {
                        php.Object = ParsePhp();
                        return null;
                    }

                    Input.UngetChar(c);
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_GT);
                    return null;
                }

                if (c == '"' || c == '\'')
                {
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_QUOTEMARK);
                    continue;
                }

                if (c == StreamIn.END_OF_STREAM)
                {
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_END_OF_FILE);
                    Input.UngetChar(c);
                    return null;
                }

                map = Map((char) c);

                if ((map & WHITE) == 0)
                {
                    break;
                }
            }

            start = Lexsize;

            for (;;)
            {
                /* but push back '=' for parseValue() */
                if (c == '=' || c == '>')
                {
                    Input.UngetChar(c);
                    break;
                }

                if (c == '<' || c == StreamIn.END_OF_STREAM)
                {
                    Input.UngetChar(c);
                    break;
                }

                map = Map((char) c);

                if ((map & WHITE) != 0)
                    break;

                /* what should be done about non-namechar characters? */
                /* currently these are incorporated into the attr name */

                if (!Options.XmlTags && (map & UPPERCASE) != 0)
                {
                    c += ('a' - 'A');
                }

                //  ++len;    Removed by BUGFIX for 126265
                AddCharToLexer(c);

                c = Input.ReadChar();
            }

            // Following line added by GLP to fix BUG 126265.  This is a temporary comment
            // and should be removed when Tidy is fixed.
            int len = Lexsize - start;
            string attr = (len > 0 ? GetString(Lexbuf, start, len) : null);
            Lexsize = start;

            return attr;
        }
Пример #3
0
        /* swallows closing '>' */
        public virtual AttVal ParseAttrs(MutableBoolean isempty)
        {
            var delim = new MutableInteger();
            var asp = new MutableObject();
            var php = new MutableObject();

            AttVal list = null;

            while (!EndOfInput())
            {
                string attribute = ParseAttribute(isempty, asp, php);

                AttVal av;
                if (attribute == null)
                {
                    /* check if attributes are created by ASP markup */
                    if (asp.Object != null)
                    {
                        av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null);
                        list = av;
                        continue;
                    }

                    /* check if attributes are created by PHP markup */
                    if (php.Object != null)
                    {
                        av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null);
                        list = av;
                        continue;
                    }

                    break;
                }

                string val = ParseValue(attribute, false, isempty, delim);

                if (IsValidAttrName(attribute))
                {
                    av = new AttVal(list, null, null, null, delim.Val, attribute, val);
                    av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av);
                    list = av;
                }
                else
                {
                    //av = new AttVal(null, null, null, null, 0, attribute, val);
                    Report.AttrError(this, Token, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }

            return list;
        }
Пример #4
0
        /*
        modes for GetToken()

        MixedContent   -- for elements which don't accept PCDATA
        Preformatted       -- white space preserved as is
        IgnoreMarkup       -- for CDATA elements such as script, style
        */
        public virtual Node GetToken(short mode)
        {
            int c;
            int badcomment = 0;
            var isempty = new MutableBoolean();

            if (Pushed)
            {
                /* duplicate inlines in preference to pushed text nodes when appropriate */
                if (Token.Type != Node.TEXT_NODE || (Insert == - 1 && Inode == null))
                {
                    Pushed = false;
                    return Token;
                }
            }

            /* at start of block elements, unclosed inline
            elements are inserted into the token stream */

            if (Insert != - 1 || Inode != null)
            {
                return InsertedToken();
            }

            Lines = Input.CursorLine;
            Columns = Input.CursorColumn;
            Waswhite = false;

            Txtstart = Lexsize;
            Txtend = Lexsize;

            while (true)
            {
                c = Input.ReadChar();
                if (c == StreamIn.END_OF_STREAM)
                {
                    break;
                }

                if (Insertspace && mode != IGNORE_WHITESPACE)
                {
                    AddCharToLexer(' ');
                    Waswhite = true;
                    Insertspace = false;
                }

                /* treat \r\n as \n and \r as \n */

                if (c == '\r')
                {
                    c = Input.ReadChar();

                    if (c != '\n')
                    {
                        Input.UngetChar(c);
                    }

                    c = '\n';
                }

                AddCharToLexer(c);

                short map;
                switch (State)
                {
                    case LEX_CONTENT:
                        map = Map((char) c);

                        /*
                        Discard white space if appropriate. Its cheaper
                        to do this here rather than in parser methods
                        for elements that don't have mixed content.
                        */
                        if (((map & WHITE) != 0) && (mode == IGNORE_WHITESPACE) && Lexsize == Txtstart + 1)
                        {
                            --Lexsize;
                            Waswhite = false;
                            Lines = Input.CursorLine;
                            Columns = Input.CursorColumn;
                            continue;
                        }

                        if (c == '<')
                        {
                            State = LEX_GT;
                            continue;
                        }

                        if ((map & WHITE) != 0)
                        {
                            /* was previous char white? */
                            if (Waswhite)
                            {
                                if (mode != PREFORMATTED && mode != IGNORE_MARKUP)
                                {
                                    --Lexsize;
                                    Lines = Input.CursorLine;
                                    Columns = Input.CursorColumn;
                                }
                            }
                                /* prev char wasn't white */
                            else
                            {
                                Waswhite = true;

                                if (mode != PREFORMATTED && mode != IGNORE_MARKUP && c != ' ')
                                {
                                    ChangeChar((byte) ' ');
                                }
                            }

                            continue;
                        }
                        if (c == '&' && mode != IGNORE_MARKUP)
                        {
                            ParseEntity(mode);
                        }

                        /* this is needed to avoid trimming trailing whitespace */
                        if (mode == IGNORE_WHITESPACE)
                            mode = MIXED_CONTENT;

                        Waswhite = false;
                        continue;

                    case LEX_GT:
                        if (c == '/')
                        {
                            c = Input.ReadChar();
                            if (c == StreamIn.END_OF_STREAM)
                            {
                                Input.UngetChar(c);
                                continue;
                            }

                            AddCharToLexer(c);
                            map = Map((char) c);

                            if ((map & LETTER) != 0)
                            {
                                Lexsize -= 3;
                                Txtend = Lexsize;
                                Input.UngetChar(c);
                                State = LEX_ENDTAG;
                                Lexbuf[Lexsize] = (byte) '\x0000'; /* debug */
                                Input.CursorColumn -= 2;

                                /* if some text before the </ return it now */
                                if (Txtend > Txtstart)
                                {
                                    /* trim space char before end tag */
                                    if (mode == IGNORE_WHITESPACE && Lexbuf[Lexsize - 1] == (byte) ' ')
                                    {
                                        Lexsize -= 1;
                                        Txtend = Lexsize;
                                    }

                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                continue; /* no text so keep going */
                            }

                            /* otherwise treat as CDATA */
                            Waswhite = false;
                            State = LEX_CONTENT;
                            continue;
                        }

                        if (mode == IGNORE_MARKUP)
                        {
                            /* otherwise treat as CDATA */
                            Waswhite = false;
                            State = LEX_CONTENT;
                            continue;
                        }

                        /*
                        look out for comments, doctype or marked sections
                        this isn't quite right, but its getting there ...
                        */
                        if (c == '!')
                        {
                            c = Input.ReadChar();
                            if (c == '-')
                            {
                                c = Input.ReadChar();
                                if (c == '-')
                                {
                                    State = LEX_COMMENT; /* comment */
                                    Lexsize -= 2;
                                    Txtend = Lexsize;

                                    /* if some text before < return it now */
                                    if (Txtend > Txtstart)
                                    {
                                        Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                        return Token;
                                    }

                                    Txtstart = Lexsize;
                                    continue;
                                }

                                Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                            }
                            else if (c == 'd' || c == 'D')
                            {
                                State = LEX_DOCTYPE; /* doctype */
                                Lexsize -= 2;
                                Txtend = Lexsize;
                                mode = IGNORE_WHITESPACE;

                                /* skip until white space or '>' */

                                for (;;)
                                {
                                    c = Input.ReadChar();

                                    if (c == StreamIn.END_OF_STREAM || c == '>')
                                    {
                                        Input.UngetChar(c);
                                        break;
                                    }

                                    map = Map((char) c);
                                    if ((map & WHITE) == 0)
                                    {
                                        continue;
                                    }

                                    /* and skip to end of whitespace */

                                    for (;;)
                                    {
                                        c = Input.ReadChar();

                                        if (c == StreamIn.END_OF_STREAM || c == '>')
                                        {
                                            Input.UngetChar(c);
                                            break;
                                        }

                                        map = Map((char) c);

                                        if ((map & WHITE) != 0)
                                        {
                                            continue;
                                        }

                                        Input.UngetChar(c);
                                        break;
                                    }

                                    break;
                                }

                                /* if some text before < return it now */
                                if (Txtend > Txtstart)
                                {
                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                Txtstart = Lexsize;
                                continue;
                            }
                            else if (c == '[')
                            {
                                /* Word 2000 embeds <![if ...]> ... <![endif]> sequences */
                                Lexsize -= 2;
                                State = LEX_SECTION;
                                Txtend = Lexsize;

                                /* if some text before < return it now */
                                if (Txtend > Txtstart)
                                {
                                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                    return Token;
                                }

                                Txtstart = Lexsize;
                                continue;
                            }

                            /* otherwise swallow chars up to and including next '>' */
                            while (true)
                            {
                                c = Input.ReadChar();
                                if (c == '>')
                                {
                                    break;
                                }
                                if (c == - 1)
                                {
                                    Input.UngetChar(c);
                                    break;
                                }
                            }

                            Lexsize -= 2;
                            Lexbuf[Lexsize] = (byte) '\x0000';
                            State = LEX_CONTENT;
                            continue;
                        }

                        /*
                        processing instructions
                        */

                        if (c == '?')
                        {
                            Lexsize -= 2;
                            State = LEX_PROCINSTR;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        /* Microsoft ASP's e.g. <% ... server-code ... %> */
                        if (c == '%')
                        {
                            Lexsize -= 2;
                            State = LEX_ASP;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        /* Netscapes JSTE e.g. <# ... server-code ... #> */
                        if (c == '#')
                        {
                            Lexsize -= 2;
                            State = LEX_JSTE;
                            Txtend = Lexsize;

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            Txtstart = Lexsize;
                            continue;
                        }

                        map = Map((char) c);

                        /* check for start tag */
                        if ((map & LETTER) != 0)
                        {
                            Input.UngetChar(c); /* push back letter */
                            Lexsize -= 2; /* discard "<" + letter */
                            Txtend = Lexsize;
                            State = LEX_STARTTAG; /* ready to read tag name */

                            /* if some text before < return it now */
                            if (Txtend > Txtstart)
                            {
                                Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                                return Token;
                            }

                            continue; /* no text so keep going */
                        }

                        /* otherwise treat as CDATA */
                        State = LEX_CONTENT;
                        Waswhite = false;
                        continue;

                    case LEX_ENDTAG:
                        Txtstart = Lexsize - 1;
                        Input.CursorColumn += 2;
                        c = ParseTagName();
                        Token = NewNode(Node.END_TAG, Lexbuf, Txtstart, Txtend,
                                        GetString(Lexbuf, Txtstart, Txtend - Txtstart));
                        Lexsize = Txtstart;
                        Txtend = Txtstart;

                        /* skip to '>' */
                        while (c != '>')
                        {
                            c = Input.ReadChar();
                            if (c == StreamIn.END_OF_STREAM)
                            {
                                break;
                            }
                        }

                        if (c == StreamIn.END_OF_STREAM)
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        State = LEX_CONTENT;
                        Waswhite = false;
                        return Token; /* the endtag token */

                    case LEX_STARTTAG:
                        Txtstart = Lexsize - 1; /* set txtstart to first letter */
                        c = ParseTagName();
                        isempty.Val = false;
                        AttVal attributes = null;
                        Token = NewNode((isempty.Val ? Node.START_END_TAG : Node.START_TAG), Lexbuf, Txtstart, Txtend,
                                        GetString(Lexbuf, Txtstart, Txtend - Txtstart));

                        /* parse attributes, consuming closing ">" */
                        if (c != '>')
                        {
                            if (c == '/')
                            {
                                Input.UngetChar(c);
                            }

                            attributes = ParseAttrs(isempty);
                        }

                        if (isempty.Val)
                        {
                            Token.Type = Node.START_END_TAG;
                        }

                        Token.Attributes = attributes;
                        Lexsize = Txtstart;
                        Txtend = Txtstart;

                        /* swallow newline following start tag */
                        /* special check needed for CRLF sequence */
                        /* this doesn't apply to empty elements */

                        if (ExpectsContent(Token) || Token.Tag == Options.TagTable.TagBr)
                        {
                            c = Input.ReadChar();
                            if (c == '\r')
                            {
                                c = Input.ReadChar();

                                if (c != '\n')
                                {
                                    Input.UngetChar(c);
                                }
                            }
                            else if (c != '\n' && c != '\f')
                            {
                                Input.UngetChar(c);
                            }

                            Waswhite = true; /* to swallow leading whitespace */
                        }
                        else
                        {
                            Waswhite = false;
                        }

                        State = LEX_CONTENT;

                        if (Token.Tag == null)
                        {
                            Report.Error(this, null, Token, Report.UNKNOWN_ELEMENT);
                        }
                        else if (!Options.XmlTags)
                        {
                            Versions &= Token.Tag.Versions;

                            if ((Token.Tag.Versions & HtmlVersion.Proprietary) != 0)
                            {
                                if (!Options.MakeClean &&
                                    (Token.Tag == Options.TagTable.TagNobr || Token.Tag == Options.TagTable.TagWbr))
                                {
                                    Report.Warning(this, null, Token, Report.PROPRIETARY_ELEMENT);
                                }
                            }

                            if (Token.Tag.CheckAttribs != null)
                            {
                                Token.CheckUniqueAttributes(this);
                                Token.Tag.CheckAttribs.Check(this, Token);
                            }
                            else
                            {
                                Token.CheckAttributes(this);
                            }
                        }
                        return Token; /* return start tag */

                    case LEX_COMMENT:
                        if (c != '-')
                        {
                            continue;
                        }

                        c = Input.ReadChar();
                        AddCharToLexer(c);
                        if (c != '-')
                        {
                            continue;
                        }

                        while (true)
                        {
                            c = Input.ReadChar();

                            if (c == '>')
                            {
                                if (badcomment != 0)
                                {
                                    Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                                }

                                Txtend = Lexsize - 2; // AQ 8Jul2000
                                Lexbuf[Lexsize] = (byte) '\x0000';
                                State = LEX_CONTENT;
                                Waswhite = false;
                                Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend);

                                /* now look for a line break */

                                c = Input.ReadChar();

                                if (c == '\r')
                                {
                                    c = Input.ReadChar();

                                    if (c != '\n')
                                    {
                                        Token.Linebreak = true;
                                    }
                                }

                                if (c == '\n')
                                {
                                    Token.Linebreak = true;
                                }
                                else
                                {
                                    Input.UngetChar(c);
                                }

                                return Token;
                            }

                            /* note position of first such error in the comment */
                            if (badcomment == 0)
                            {
                                Lines = Input.CursorLine;
                                Columns = Input.CursorColumn - 3;
                            }

                            badcomment++;
                            if (Options.FixComments)
                            {
                                Lexbuf[Lexsize - 2] = (byte) '=';
                            }

                            AddCharToLexer(c);

                            /* if '-' then look for '>' to end the comment */
                            if (c != '-')
                            {
                                break;
                            }
                        }

                        /* otherwise continue to look for --> */
                        Lexbuf[Lexsize - 2] = (byte) '=';
                        continue;

                    case LEX_DOCTYPE:
                        map = Map((char) c);

                        if ((map & WHITE) != 0)
                        {
                            if (Waswhite)
                            {
                                Lexsize -= 1;
                            }

                            Waswhite = true;
                        }
                        else
                        {
                            Waswhite = false;
                        }

                        if (c != '>')
                        {
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.DOC_TYPE_TAG, Lexbuf, Txtstart, Txtend);
                        /* make a note of the version named by the doctype */
                        Doctype = FindGivenVersion(Token);
                        return Token;

                    case LEX_PROCINSTR:

                        if (Lexsize - Txtstart == 3)
                        {
                            if ((GetString(Lexbuf, Txtstart, 3)).Equals("php"))
                            {
                                State = LEX_PHP;
                                continue;
                            }
                        }

                        if (Options.XmlPIs)
                        {
                            /* insist on ?> as terminator */
                            if (c != '?')
                            {
                                continue;
                            }

                            /* now look for '>' */
                            c = Input.ReadChar();

                            if (c == StreamIn.END_OF_STREAM)
                            {
                                Report.Warning(this, null, null, Report.UNEXPECTED_END_OF_FILE);
                                Input.UngetChar(c);
                                continue;
                            }

                            AddCharToLexer(c);
                        }

                        if (c != '>')
                        {
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.PROC_INS_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_ASP:
                        if (c != '%')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();

                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.ASP_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_JSTE:
                        if (c != '#')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.JSTE_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_PHP:
                        if (c != '?')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.PHP_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_SECTION:
                        if (c == '[')
                        {
                            if (Lexsize == (Txtstart + 6) && (GetString(Lexbuf, Txtstart, 6)).Equals("CDATA["))
                            {
                                State = LEX_CDATA;
                                Lexsize -= 6;
                                continue;
                            }
                        }

                        if (c != ']')
                        {
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.SECTION_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;

                    case LEX_CDATA:
                        if (c != ']')
                        {
                            continue;
                        }

                        /* now look for ']' */
                        c = Input.ReadChar();
                        if (c != ']')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        /* now look for '>' */
                        c = Input.ReadChar();
                        if (c != '>')
                        {
                            Input.UngetChar(c);
                            continue;
                        }

                        Lexsize -= 1;
                        Txtend = Lexsize;
                        Lexbuf[Lexsize] = (byte) '\x0000';
                        State = LEX_CONTENT;
                        Waswhite = false;
                        Token = NewNode(Node.CDATA_TAG, Lexbuf, Txtstart, Txtend);
                        return Token;
                }
            }

            if (State == LEX_CONTENT)
            {
                /* text string */
                Txtend = Lexsize;
                if (Txtend > Txtstart)
                {
                    Input.UngetChar(c);
                    if (Lexbuf[Lexsize - 1] == (byte) ' ')
                    {
                        Lexsize -= 1;
                        Txtend = Lexsize;
                    }

                    Token = NewNode(Node.TEXT_NODE, Lexbuf, Txtstart, Txtend);
                    return Token;
                }
            }
            else if (State == LEX_COMMENT)
            {
                /* comment */
                if (c == StreamIn.END_OF_STREAM)
                {
                    Report.Warning(this, null, null, Report.MALFORMED_COMMENT);
                }

                Txtend = Lexsize;
                Lexbuf[Lexsize] = (byte) '\x0000';
                State = LEX_CONTENT;
                Waswhite = false;
                Token = NewNode(Node.COMMENT_TAG, Lexbuf, Txtstart, Txtend);
                return Token;
            }

            return null;
        }