InlineDup() public method

public InlineDup ( Node node ) : int
node Node
return int
Beispiel #1
0
            public virtual void Parse(Lexer lexer, Node body, short mode)
            {
                bool checkstack;

                mode = Lexer.IGNORE_WHITESPACE;
                checkstack = true;
                TagCollection tt = lexer.Options.TagTable;

                while (true)
                {
                    Node node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == body.Tag && node.Type == Node.END_TAG)
                    {
                        body.Closed = true;
                        Node.TrimSpaces(lexer, body);
                        lexer.SeenBodyEndTag = 1;
                        mode = Lexer.IGNORE_WHITESPACE;

                        if (body.Parent.Tag == tt.TagNoframes)
                        {
                            break;
                        }

                        continue;
                    }

                    if (node.Tag == tt.TagNoframes)
                    {
                        if (node.Type == Node.START_TAG)
                        {
                            Node.InsertNodeAtEnd(body, node);
                            ParseBlock.Parse(lexer, node, mode);
                            continue;
                        }

                        if (node.Type == Node.END_TAG && body.Parent.Tag == tt.TagNoframes)
                        {
                            Node.TrimSpaces(lexer, body);
                            lexer.UngetToken();
                            break;
                        }
                    }

                    if ((node.Tag == tt.TagFrame || node.Tag == tt.TagFrameset) && body.Parent.Tag == tt.TagNoframes)
                    {
                        Node.TrimSpaces(lexer, body);
                        lexer.UngetToken();
                        break;
                    }

                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    bool iswhitenode = node.Type == Node.TEXT_NODE && node.End <= node.Start + 1 &&
                                       node.Textarray[node.Start] == (sbyte) ' ';

                    /* deal with comments etc. */
                    if (Node.InsertMisc(body, node))
                    {
                        continue;
                    }

                    if (lexer.SeenBodyEndTag == 1 && !iswhitenode)
                    {
                        ++lexer.SeenBodyEndTag;
                        Report.Warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
                    }

                    /* mixed content model permits text */
                    if (node.Type == Node.TEXT_NODE)
                    {
                        if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
                        {
                            continue;
                        }

                        if (lexer.Options.EncloseText && !iswhitenode)
                        {
                            lexer.UngetToken();
                            Node para = lexer.InferredTag("p");
                            Node.InsertNodeAtEnd(body, para);
                            ParseTag(lexer, para, mode);
                            mode = Lexer.MIXED_CONTENT;
                            continue;
                        }
                        /* strict doesn't allow text here */
                        lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);

                        if (checkstack)
                        {
                            checkstack = false;

                            if (lexer.InlineDup(node) > 0)
                            {
                                continue;
                            }
                        }

                        Node.InsertNodeAtEnd(body, node);
                        mode = Lexer.MIXED_CONTENT;
                        continue;
                    }

                    if (node.Type == Node.DOC_TYPE_TAG)
                    {
                        Node.InsertDocType(lexer, body, node);
                        continue;
                    }
                    /* discard unknown  and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    Netscape allows LI and DD directly in BODY
                    We infer UL or DL respectively and use this
                    boolean to exclude block-level elements so as
                    to match Netscape's observed behaviour.
                    */
                    lexer.ExcludeBlocks = false;

                    if ((node.Tag.Model & ContentModel.BLOCK) == 0 && (node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        /* avoid this error message being issued twice */
                        if ((node.Tag.Model & ContentModel.HEAD) == 0)
                        {
                            Report.Warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
                        }

                        if ((node.Tag.Model & ContentModel.HTML) != 0)
                        {
                            /* copy body attributes if current body was inferred */
                            if (node.Tag == tt.TagBody && body.Isimplicit && body.Attributes == null)
                            {
                                body.Attributes = node.Attributes;
                                node.Attributes = null;
                            }

                            continue;
                        }

                        if ((node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            MoveToHead(lexer, body, node);
                            continue;
                        }

                        if ((node.Tag.Model & ContentModel.LIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("ul");
                            Node.AddClass(node, "noindent");
                            lexer.ExcludeBlocks = true;
                        }
                        else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("dl");
                            lexer.ExcludeBlocks = true;
                        }
                        else if ((node.Tag.Model & (ContentModel.TABLE | ContentModel.ROWGRP | ContentModel.ROW)) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("table");
                            lexer.ExcludeBlocks = true;
                        }
                        else
                        {
                            /* AQ: The following line is from the official C
                            version of tidy.  It doesn't make sense to me
                            because the '!' operator has higher precedence
                            than the '&' operator.  It seems to me that the
                            expression always evaluates to 0.

                            if (!node->tag->model & (CM_ROW | CM_FIELD))

                            AQ: 13Jan2000 fixed in C tidy
                            */
                            if ((node.Tag.Model & (ContentModel.ROW | ContentModel.FIELD)) == 0)
                            {
                                lexer.UngetToken();
                                return;
                            }

                            /* ignore </td> </th> <option> etc. */
                            continue;
                        }
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(body, node);
                            node = lexer.InferredTag("br");
                        }
                        else if ((node.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            lexer.PopInline(node);
                        }
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (((node.Tag.Model & ContentModel.INLINE) != 0) &&
                            (node.Tag.Model & ContentModel.MIXED) == 0)
                        {
                            /* HTML4 strict doesn't allow inline content here */
                            /* but HTML2 does allow img elements as children of body */
                            if (node.Tag == tt.TagImg)
                            {
                                lexer.Versions &= ~ HtmlVersion.Html40Strict;
                            }
                            else
                            {
                                lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);
                            }

                            if (checkstack && !node.Isimplicit)
                            {
                                checkstack = false;

                                if (lexer.InlineDup(node) > 0)
                                {
                                    continue;
                                }
                            }

                            mode = Lexer.MIXED_CONTENT;
                        }
                        else
                        {
                            checkstack = true;
                            mode = Lexer.IGNORE_WHITESPACE;
                        }

                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, body, node, Report.INSERTING_TAG);
                        }

                        Node.InsertNodeAtEnd(body, node);
                        ParseTag(lexer, node, mode);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                }
            }
Beispiel #2
0
            public virtual void Parse(Lexer lexer, Node pre, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((pre.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if ((pre.Tag.Model & ContentModel.OBSOLETE) != 0)
                {
                    Node.CoerceNode(lexer, pre, tt.TagPre);
                }

                lexer.InlineDup(null); /* tell lexer to insert inlines if needed */

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.PREFORMATTED);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == pre.Tag && node.Type == Node.END_TAG)
                    {
                        Node.TrimSpaces(lexer, pre);
                        pre.Closed = true;
                        Node.TrimEmptyElement(lexer, pre);
                        return;
                    }

                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        /* if first check for inital newline */
                        if (pre.Content == null)
                        {
                            if (node.Textarray[node.Start] == (sbyte) '\n')
                            {
                                ++node.Start;
                            }

                            if (node.Start >= node.End)
                            {
                                continue;
                            }
                        }

                        Node.InsertNodeAtEnd(pre, node);
                        continue;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(pre, node))
                    {
                        continue;
                    }

                    /* discard unknown  and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag == tt.TagP)
                    {
                        if (node.Type == Node.START_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);

                            /* trim white space before <p> in <pre>*/
                            Node.TrimSpaces(lexer, pre);

                            /* coerce both <p> and </p> to <br> */
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(pre, node);
                        }
                        else
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        }
                        continue;
                    }

                    if ((node.Tag.Model & ContentModel.HEAD) != 0 && (node.Tag.Model & ContentModel.BLOCK) == 0)
                    {
                        MoveToHead(lexer, pre, node);
                        continue;
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = pre.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);

                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, pre);
                                Node.TrimEmptyElement(lexer, pre);
                                return;
                            }
                        }
                    }

                    /* what about head content, HEAD, BODY tags etc? */
                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
                        lexer.ExcludeBlocks = true;

                        /* check if we need to infer a container */
                        if ((node.Tag.Model & ContentModel.LIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("ul");
                            Node.AddClass(node, "noindent");
                        }
                        else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("dl");
                        }
                        else if ((node.Tag.Model & ContentModel.TABLE) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("table");
                        }

                        Node.InsertNodeAfterElement(pre, node);
                        pre = lexer.InferredTag("pre");
                        Node.InsertNodeAfterElement(node, pre);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        lexer.ExcludeBlocks = false;
                        continue;
                    }
                    /*
                    if (!((node.Tag.Model & ContentModel.INLINE) != 0))
                    {
                    Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
                    lexer.UngetToken();
                    return;
                    }
                    */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, pre);
                        }

                        Node.InsertNodeAtEnd(pre, node);
                        ParseTag(lexer, node, Lexer.PREFORMATTED);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                }

                Report.Warning(lexer, pre, null, Report.MISSING_ENDTAG_FOR);
                Node.TrimEmptyElement(lexer, pre);
            }
Beispiel #3
0
            /*
            element is node created by the lexer
            upon seeing the start tag, or by the
            parser when the start tag is inferred
            */
            public virtual void Parse(Lexer lexer, Node element, short mode)
            {
                Node node;
                bool checkstack;
                int istackbase = 0;
                TagCollection tt = lexer.Options.TagTable;

                checkstack = true;

                if ((element.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if (element.Tag == tt.TagForm && element.IsDescendantOf(tt.TagForm))
                {
                    Report.Warning(lexer, element, null, Report.ILLEGAL_NESTING);
                }

                /*
                InlineDup() asks the lexer to insert inline emphasis tags
                currently pushed on the istack, but take care to avoid
                propagating inline emphasis inside OBJECT or APPLET.
                For these elements a fresh inline stack context is created
                and disposed of upon reaching the end of the element.
                They thus behave like table cells in this respect.
                */
                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                {
                    istackbase = lexer.Istackbase;
                    lexer.Istackbase = lexer.Istack.Count;
                }

                if ((element.Tag.Model & ContentModel.MIXED) == 0)
                {
                    lexer.InlineDup(null);
                }

                mode = Lexer.IGNORE_WHITESPACE;

                while (true)
                {
                    node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }

                    /* end tag for this element */
                    if (node.Type == Node.END_TAG && node.Tag != null &&
                        (node.Tag == element.Tag || element.Was == node.Tag))
                    {
                        if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                        {
                            /* pop inline stack */
                            while (lexer.Istack.Count > lexer.Istackbase)
                            {
                                lexer.PopInline(null);
                            }
                            lexer.Istackbase = istackbase;
                        }

                        element.Closed = true;
                        Node.TrimSpaces(lexer, element);
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    if (node.Tag == tt.TagHtml || node.Tag == tt.TagHead || node.Tag == tt.TagBody)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == null)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);

                            continue;
                        }
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(element, node);
                            node = lexer.InferredTag("br");
                        }
                        else
                        {
                            /*
                            if this is the end tag for an ancestor element
                            then infer end tag for this element
                            */
                            Node parent;
                            for (parent = element.Parent; parent != null; parent = parent.Parent)
                            {
                                if (node.Tag != parent.Tag) continue;
                                if ((element.Tag.Model & ContentModel.OPT) == 0)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                }

                                lexer.UngetToken();

                                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                                {
                                    /* pop inline stack */
                                    while (lexer.Istack.Count > lexer.Istackbase)
                                    {
                                        lexer.PopInline(null);
                                    }
                                    lexer.Istackbase = istackbase;
                                }

                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                            /* special case </tr> etc. for stuff moved in front of table */
                            if (lexer.Exiled && node.Tag.Model != 0 && (node.Tag.Model & ContentModel.TABLE) != 0)
                            {
                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* mixed content model permits text */
                    if (node.Type == Node.TEXT_NODE)
                    {
                        bool iswhitenode = node.Type == Node.TEXT_NODE && node.End <= node.Start + 1 &&
                                           lexer.Lexbuf[node.Start] == (sbyte) ' ';

                        if (lexer.Options.EncloseBlockText && !iswhitenode)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("p");
                            Node.InsertNodeAtEnd(element, node);
                            ParseTag(lexer, node, Lexer.MIXED_CONTENT);
                            continue;
                        }

                        if (checkstack)
                        {
                            checkstack = false;

                            if ((element.Tag.Model & ContentModel.MIXED) == 0)
                            {
                                if (lexer.InlineDup(node) > 0)
                                {
                                    continue;
                                }
                            }
                        }

                        Node.InsertNodeAtEnd(element, node);
                        mode = Lexer.MIXED_CONTENT;
                        /*
                        HTML4 strict doesn't allow mixed content for
                        elements with %block; as their content model
                        */
                        lexer.Versions &= ~ HtmlVersion.Html40Strict;
                        continue;
                    }

                    if (Node.InsertMisc(element, node))
                    {
                        continue;
                    }

                    /* allow PARAM elements? */
                    if (node.Tag == tt.TagParam)
                    {
                        if (((element.Tag.Model & ContentModel.PARAM) != 0) &&
                            (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG))
                        {
                            Node.InsertNodeAtEnd(element, node);
                            continue;
                        }

                        /* otherwise discard it */
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* allow AREA elements? */
                    if (node.Tag == tt.TagArea)
                    {
                        if ((element.Tag == tt.TagMap) &&
                            (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG))
                        {
                            Node.InsertNodeAtEnd(element, node);
                            continue;
                        }

                        /* otherwise discard it */
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* ignore unknown start/end tags */
                    if (node.Tag == null)
                    {
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    Allow ContentModel.INLINE elements here.

                    Allow ContentModel.BLOCK elements here unless
                    lexer.excludeBlocks is yes.

                    LI and DD are special cased.

                    Otherwise infer end tag for this element.
                    */

                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (element.Tag == tt.TagTd || element.Tag == tt.TagTh)
                        {
                            /* if parent is a table cell, avoid inferring the end of the cell */

                            if ((node.Tag.Model & ContentModel.HEAD) != 0)
                            {
                                MoveToHead(lexer, element, node);
                                continue;
                            }

                            if ((node.Tag.Model & ContentModel.LIST) != 0)
                            {
                                lexer.UngetToken();
                                node = lexer.InferredTag("ul");
                                Node.AddClass(node, "noindent");
                                lexer.ExcludeBlocks = true;
                            }
                            else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                            {
                                lexer.UngetToken();
                                node = lexer.InferredTag("dl");
                                lexer.ExcludeBlocks = true;
                            }

                            /* infer end of current table cell */
                            if ((node.Tag.Model & ContentModel.BLOCK) == 0)
                            {
                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                        else if ((node.Tag.Model & ContentModel.BLOCK) != 0)
                        {
                            if (lexer.ExcludeBlocks)
                            {
                                if ((element.Tag.Model & ContentModel.OPT) == 0)
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);

                                lexer.UngetToken();

                                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                                    lexer.Istackbase = istackbase;

                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                            /* things like list items */
                        else
                        {
                            if ((element.Tag.Model & ContentModel.OPT) == 0 && !element.Isimplicit)
                                Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);

                            if ((node.Tag.Model & ContentModel.HEAD) != 0)
                            {
                                MoveToHead(lexer, element, node);
                                continue;
                            }

                            lexer.UngetToken();

                            if ((node.Tag.Model & ContentModel.LIST) != 0)
                            {
                                if (element.Parent != null && element.Parent.Tag != null &&
                                    element.Parent.Tag.Parser == ParseList)
                                {
                                    Node.TrimSpaces(lexer, element);
                                    Node.TrimEmptyElement(lexer, element);
                                    return;
                                }

                                node = lexer.InferredTag("ul");
                                Node.AddClass(node, "noindent");
                            }
                            else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                            {
                                if (element.Parent.Tag == tt.TagDl)
                                {
                                    Node.TrimSpaces(lexer, element);
                                    Node.TrimEmptyElement(lexer, element);
                                    return;
                                }

                                node = lexer.InferredTag("dl");
                            }
                            else if ((node.Tag.Model & ContentModel.TABLE) != 0 ||
                                     (node.Tag.Model & ContentModel.ROW) != 0)
                            {
                                node = lexer.InferredTag("table");
                            }
                            else if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                            {
                                /* pop inline stack */
                                while (lexer.Istack.Count > lexer.Istackbase)
                                {
                                    lexer.PopInline(null);
                                }
                                lexer.Istackbase = istackbase;
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                            else
                            {
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* parse known element */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if ((node.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            if (checkstack && !node.Isimplicit)
                            {
                                checkstack = false;

                                if (lexer.InlineDup(node) > 0)
                                    continue;
                            }

                            mode = Lexer.MIXED_CONTENT;
                        }
                        else
                        {
                            checkstack = true;
                            mode = Lexer.IGNORE_WHITESPACE;
                        }

                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.InsertNodeAtEnd(element, node);

                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, element, node, Report.INSERTING_TAG);
                        }

                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }

                    /* discard unexpected tags */
                    if (node.Type == Node.END_TAG)
                        lexer.PopInline(node);
                    /* if inline end tag */

                    Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                }

                if ((element.Tag.Model & ContentModel.OPT) == 0)
                {
                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
                }

                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                {
                    /* pop inline stack */
                    while (lexer.Istack.Count > lexer.Istackbase)
                    {
                        lexer.PopInline(null);
                    }
                    lexer.Istackbase = istackbase;
                }

                Node.TrimSpaces(lexer, element);
                Node.TrimEmptyElement(lexer, element);
            }
Beispiel #4
0
            public virtual void Parse(Lexer lexer, Node element, short mode)
            {
                Node node;
                TagCollection tt = lexer.Options.TagTable;

                if ((element.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if (element.Tag == tt.TagA)
                {
                    if (element.Attributes == null)
                    {
                        Report.Warning(lexer, element.Parent, element, Report.DISCARDING_UNEXPECTED);
                        Node.DiscardElement(element);
                        return;
                    }
                }

                /*
                ParseInline is used for some block level elements like H1 to H6
                For such elements we need to insert inline emphasis tags currently
                on the inline stack. For Inline elements, we normally push them
                onto the inline stack provided they aren't implicit or OBJECT/APPLET.
                This test is carried out in PushInline and PopInline, see istack.c
                We don't push A or SPAN to replicate current browser behavior
                */
                if (((element.Tag.Model & ContentModel.BLOCK) != 0) || (element.Tag == tt.TagDt))
                {
                    lexer.InlineDup(null);
                }
                else if ((element.Tag.Model & ContentModel.INLINE) != 0 && element.Tag != tt.TagA &&
                         element.Tag != tt.TagSpan)
                {
                    lexer.PushInline(element);
                }

                if (element.Tag == tt.TagNobr)
                {
                    lexer.BadLayout |= Report.USING_NOBR;
                }
                else if (element.Tag == tt.TagFont)
                {
                    lexer.BadLayout |= Report.USING_FONT;
                }

                /* Inline elements may or may not be within a preformatted element */
                if (mode != Lexer.PREFORMATTED)
                {
                    mode = Lexer.MIXED_CONTENT;
                }

                while (true)
                {
                    node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }
                    /* end tag for current element */
                    if (node.Tag == element.Tag && node.Type == Node.END_TAG)
                    {
                        if ((element.Tag.Model & ContentModel.INLINE) != 0 && element.Tag != tt.TagA)
                        {
                            lexer.PopInline(node);
                        }

                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        /*
                        if a font element wraps an anchor and nothing else
                        then move the font element inside the anchor since
                        otherwise it won't alter the anchor text color
                        */
                        if (element.Tag == tt.TagFont && element.Content != null && element.Content == element.Last)
                        {
                            Node child = element.Content;

                            if (child.Tag == tt.TagA)
                            {
                                child.Parent = element.Parent;
                                child.Next = element.Next;
                                child.Prev = element.Prev;

                                if (child.Prev != null)
                                {
                                    child.Prev.Next = child;
                                }
                                else
                                {
                                    child.Parent.Content = child;
                                }

                                if (child.Next != null)
                                {
                                    child.Next.Prev = child;
                                }
                                else
                                {
                                    child.Parent.Last = child;
                                }

                                element.Next = null;
                                element.Prev = null;
                                element.Parent = child;
                                element.Content = child.Content;
                                element.Last = child.Last;
                                child.Content = element;
                                child.Last = element;
                                for (child = element.Content; child != null; child = child.Next)
                                {
                                    child.Parent = element;
                                }
                            }
                        }
                        element.Closed = true;
                        Node.TrimSpaces(lexer, element);
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
                    /* otherwise emphasis nesting is probably unintentional */
                    /* big and small have cumulative effect to leave them alone */
                    if (node.Type == Node.START_TAG && node.Tag == element.Tag && lexer.IsPushed(node) &&
                        !node.Isimplicit && !element.Isimplicit && node.Tag != null &&
                        ((node.Tag.Model & ContentModel.INLINE) != 0) && node.Tag != tt.TagA && node.Tag != tt.TagFont &&
                        node.Tag != tt.TagBig && node.Tag != tt.TagSmall)
                    {
                        if (element.Content != null && node.Attributes == null)
                        {
                            Report.Warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                            node.Type = Node.END_TAG;
                            lexer.UngetToken();
                            continue;
                        }

                        Report.Warning(lexer, element, node, Report.NESTED_EMPHASIS);
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        /* only called for 1st child */
                        if (element.Content == null && (mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        if (node.Start >= node.End)
                        {
                            continue;
                        }

                        Node.InsertNodeAtEnd(element, node);
                        continue;
                    }

                    /* mixed content model so allow text */
                    if (Node.InsertMisc(element, node))
                    {
                        continue;
                    }

                    /* deal with HTML tags */
                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        /* otherwise infer end of inline element */
                        lexer.UngetToken();
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* within <dt> or <pre> map <p> to <br> */
                    if (node.Tag == tt.TagP && node.Type == Node.START_TAG &&
                        ((mode & Lexer.PREFORMATTED) != 0 || element.Tag == tt.TagDt || element.IsDescendantOf(tt.TagDt)))
                    {
                        node.Tag = tt.TagBr;
                        node.Element = "br";
                        Node.TrimSpaces(lexer, element);
                        Node.InsertNodeAtEnd(element, node);
                        continue;
                    }

                    /* ignore unknown and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag == tt.TagBr && node.Type == Node.END_TAG)
                    {
                        node.Type = Node.START_TAG;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        /* coerce </br> to <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            /* coerce unmatched </p> to <br><br> */
                            if (!element.IsDescendantOf(tt.TagP))
                            {
                                Node.CoerceNode(lexer, node, tt.TagBr);
                                Node.TrimSpaces(lexer, element);
                                Node.InsertNodeAtEnd(element, node);
                                //node = lexer.InferredTag("br");
                                continue;
                            }
                        }
                        else if ((node.Tag.Model & ContentModel.INLINE) != 0 && node.Tag != tt.TagA &&
                                 (node.Tag.Model & ContentModel.OBJECT) == 0 &&
                                 (element.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            /* allow any inline end tag to end current element */
                            lexer.PopInline(element);

                            if (element.Tag != tt.TagA)
                            {
                                if (node.Tag == tt.TagA && node.Tag != element.Tag)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                    lexer.UngetToken();
                                }
                                else
                                {
                                    Report.Warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                                }

                                if ((mode & Lexer.PREFORMATTED) == 0)
                                {
                                    Node.TrimSpaces(lexer, element);
                                }
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }

                            /* if parent is <a> then discard unexpected inline end tag */
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }
                            /* special case </tr> etc. for stuff moved in front of table */
                        else if (lexer.Exiled && node.Tag.Model != 0 && (node.Tag.Model & ContentModel.TABLE) != 0)
                        {
                            lexer.UngetToken();
                            Node.TrimSpaces(lexer, element);
                            Node.TrimEmptyElement(lexer, element);
                            return;
                        }
                    }

                    /* allow any header tag to end current header */
                    if ((node.Tag.Model & ContentModel.HEADING) != 0 && (element.Tag.Model & ContentModel.HEADING) != 0)
                    {
                        if (node.Tag == element.Tag)
                        {
                            Report.Warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                        }
                        else
                        {
                            Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                            lexer.UngetToken();
                        }
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /*
                    an <A> tag to ends any open <A> element
                    but <A href=...> is mapped to </A><A href=...>
                    */
                    if (node.Tag == tt.TagA && !node.Isimplicit && lexer.IsPushed(node))
                    {
                        /* coerce <a> to </a> unless it has some attributes */
                        if (node.Attributes == null)
                        {
                            node.Type = Node.END_TAG;
                            Report.Warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                            lexer.PopInline(node);
                            lexer.UngetToken();
                            continue;
                        }

                        lexer.UngetToken();
                        Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        lexer.PopInline(element);
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    if ((element.Tag.Model & ContentModel.HEADING) != 0)
                    {
                        if (node.Tag == tt.TagCenter || node.Tag == tt.TagDiv)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                            /* insert center as parent if heading is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeAsParent(element, node);
                                continue;
                            }

                            /* split heading and make center parent of 2nd part */
                            Node.InsertNodeAfterElement(element, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAtEnd(node, element);
                            continue;
                        }

                        if (node.Tag == tt.TagHr)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                            /* insert hr before heading if heading is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeBeforeElement(element, node);
                                continue;
                            }

                            /* split heading and insert hr before 2nd part */
                            Node.InsertNodeAfterElement(element, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAfterElement(node, element);
                            continue;
                        }
                    }

                    if (element.Tag == tt.TagDt)
                    {
                        if (node.Tag == tt.TagHr)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
                            Node dd = lexer.InferredTag("dd");

                            /* insert hr within dd before dt if dt is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeBeforeElement(element, dd);
                                Node.InsertNodeAtEnd(dd, node);
                                continue;
                            }

                            /* split dt and insert hr within dd before 2nd part */
                            Node.InsertNodeAfterElement(element, dd);
                            Node.InsertNodeAtEnd(dd, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAfterElement(dd, element);
                            continue;
                        }
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        Node parent;
                        for (parent = element.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                if ((element.Tag.Model & ContentModel.OPT) == 0 && !element.Isimplicit)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                }

                                if (element.Tag == tt.TagA)
                                {
                                    lexer.PopInline(element);
                                }

                                lexer.UngetToken();

                                if ((mode & Lexer.PREFORMATTED) == 0)
                                {
                                    Node.TrimSpaces(lexer, element);
                                }

                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* block level tags end this element */
                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if ((element.Tag.Model & ContentModel.OPT) == 0)
                        {
                            Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        }

                        if ((node.Tag.Model & ContentModel.HEAD) != 0 && (node.Tag.Model & ContentModel.BLOCK) == 0)
                        {
                            MoveToHead(lexer, element, node);
                            continue;
                        }

                        /*
                        prevent anchors from propagating into block tags
                        except for headings h1 to h6
                        */
                        if (element.Tag == tt.TagA)
                        {
                            if (node.Tag != null && (node.Tag.Model & ContentModel.HEADING) == 0)
                            {
                                lexer.PopInline(element);
                            }
                            else if (element.Content == null)
                            {
                                Node.DiscardElement(element);
                                lexer.UngetToken();
                                return;
                            }
                        }

                        lexer.UngetToken();

                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* parse inline element */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, element, node, Report.INSERTING_TAG);
                        }

                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.InsertNodeAtEnd(element, node);
                        ParseTag(lexer, node, mode);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                }

                if ((element.Tag.Model & ContentModel.OPT) == 0)
                {
                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
                }

                Node.TrimEmptyElement(lexer, element);
            }