InferredTag() public method

public InferredTag ( string name ) : Node
name string
return Node
Beispiel #1
0
            /*
            element is node created by the lexer
            upon seeing the start tag, or by the
            parser when the start tag is inferred
            */
            public virtual void Parse(Lexer lexer, Node element, short mode)
            {
                Node node;
                bool checkstack;
                int istackbase = 0;
                TagCollection tt = lexer.Options.TagTable;

                checkstack = true;

                if ((element.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if (element.Tag == tt.TagForm && element.IsDescendantOf(tt.TagForm))
                {
                    Report.Warning(lexer, element, null, Report.ILLEGAL_NESTING);
                }

                /*
                InlineDup() asks the lexer to insert inline emphasis tags
                currently pushed on the istack, but take care to avoid
                propagating inline emphasis inside OBJECT or APPLET.
                For these elements a fresh inline stack context is created
                and disposed of upon reaching the end of the element.
                They thus behave like table cells in this respect.
                */
                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                {
                    istackbase = lexer.Istackbase;
                    lexer.Istackbase = lexer.Istack.Count;
                }

                if ((element.Tag.Model & ContentModel.MIXED) == 0)
                {
                    lexer.InlineDup(null);
                }

                mode = Lexer.IGNORE_WHITESPACE;

                while (true)
                {
                    node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }

                    /* end tag for this element */
                    if (node.Type == Node.END_TAG && node.Tag != null &&
                        (node.Tag == element.Tag || element.Was == node.Tag))
                    {
                        if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                        {
                            /* pop inline stack */
                            while (lexer.Istack.Count > lexer.Istackbase)
                            {
                                lexer.PopInline(null);
                            }
                            lexer.Istackbase = istackbase;
                        }

                        element.Closed = true;
                        Node.TrimSpaces(lexer, element);
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    if (node.Tag == tt.TagHtml || node.Tag == tt.TagHead || node.Tag == tt.TagBody)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == null)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);

                            continue;
                        }
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(element, node);
                            node = lexer.InferredTag("br");
                        }
                        else
                        {
                            /*
                            if this is the end tag for an ancestor element
                            then infer end tag for this element
                            */
                            Node parent;
                            for (parent = element.Parent; parent != null; parent = parent.Parent)
                            {
                                if (node.Tag != parent.Tag) continue;
                                if ((element.Tag.Model & ContentModel.OPT) == 0)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                }

                                lexer.UngetToken();

                                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                                {
                                    /* pop inline stack */
                                    while (lexer.Istack.Count > lexer.Istackbase)
                                    {
                                        lexer.PopInline(null);
                                    }
                                    lexer.Istackbase = istackbase;
                                }

                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                            /* special case </tr> etc. for stuff moved in front of table */
                            if (lexer.Exiled && node.Tag.Model != 0 && (node.Tag.Model & ContentModel.TABLE) != 0)
                            {
                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* mixed content model permits text */
                    if (node.Type == Node.TEXT_NODE)
                    {
                        bool iswhitenode = node.Type == Node.TEXT_NODE && node.End <= node.Start + 1 &&
                                           lexer.Lexbuf[node.Start] == (sbyte) ' ';

                        if (lexer.Options.EncloseBlockText && !iswhitenode)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("p");
                            Node.InsertNodeAtEnd(element, node);
                            ParseTag(lexer, node, Lexer.MIXED_CONTENT);
                            continue;
                        }

                        if (checkstack)
                        {
                            checkstack = false;

                            if ((element.Tag.Model & ContentModel.MIXED) == 0)
                            {
                                if (lexer.InlineDup(node) > 0)
                                {
                                    continue;
                                }
                            }
                        }

                        Node.InsertNodeAtEnd(element, node);
                        mode = Lexer.MIXED_CONTENT;
                        /*
                        HTML4 strict doesn't allow mixed content for
                        elements with %block; as their content model
                        */
                        lexer.Versions &= ~ HtmlVersion.Html40Strict;
                        continue;
                    }

                    if (Node.InsertMisc(element, node))
                    {
                        continue;
                    }

                    /* allow PARAM elements? */
                    if (node.Tag == tt.TagParam)
                    {
                        if (((element.Tag.Model & ContentModel.PARAM) != 0) &&
                            (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG))
                        {
                            Node.InsertNodeAtEnd(element, node);
                            continue;
                        }

                        /* otherwise discard it */
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* allow AREA elements? */
                    if (node.Tag == tt.TagArea)
                    {
                        if ((element.Tag == tt.TagMap) &&
                            (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG))
                        {
                            Node.InsertNodeAtEnd(element, node);
                            continue;
                        }

                        /* otherwise discard it */
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* ignore unknown start/end tags */
                    if (node.Tag == null)
                    {
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    Allow ContentModel.INLINE elements here.

                    Allow ContentModel.BLOCK elements here unless
                    lexer.excludeBlocks is yes.

                    LI and DD are special cased.

                    Otherwise infer end tag for this element.
                    */

                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (element.Tag == tt.TagTd || element.Tag == tt.TagTh)
                        {
                            /* if parent is a table cell, avoid inferring the end of the cell */

                            if ((node.Tag.Model & ContentModel.HEAD) != 0)
                            {
                                MoveToHead(lexer, element, node);
                                continue;
                            }

                            if ((node.Tag.Model & ContentModel.LIST) != 0)
                            {
                                lexer.UngetToken();
                                node = lexer.InferredTag("ul");
                                Node.AddClass(node, "noindent");
                                lexer.ExcludeBlocks = true;
                            }
                            else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                            {
                                lexer.UngetToken();
                                node = lexer.InferredTag("dl");
                                lexer.ExcludeBlocks = true;
                            }

                            /* infer end of current table cell */
                            if ((node.Tag.Model & ContentModel.BLOCK) == 0)
                            {
                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                        else if ((node.Tag.Model & ContentModel.BLOCK) != 0)
                        {
                            if (lexer.ExcludeBlocks)
                            {
                                if ((element.Tag.Model & ContentModel.OPT) == 0)
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);

                                lexer.UngetToken();

                                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                                    lexer.Istackbase = istackbase;

                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                            /* things like list items */
                        else
                        {
                            if ((element.Tag.Model & ContentModel.OPT) == 0 && !element.Isimplicit)
                                Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);

                            if ((node.Tag.Model & ContentModel.HEAD) != 0)
                            {
                                MoveToHead(lexer, element, node);
                                continue;
                            }

                            lexer.UngetToken();

                            if ((node.Tag.Model & ContentModel.LIST) != 0)
                            {
                                if (element.Parent != null && element.Parent.Tag != null &&
                                    element.Parent.Tag.Parser == ParseList)
                                {
                                    Node.TrimSpaces(lexer, element);
                                    Node.TrimEmptyElement(lexer, element);
                                    return;
                                }

                                node = lexer.InferredTag("ul");
                                Node.AddClass(node, "noindent");
                            }
                            else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                            {
                                if (element.Parent.Tag == tt.TagDl)
                                {
                                    Node.TrimSpaces(lexer, element);
                                    Node.TrimEmptyElement(lexer, element);
                                    return;
                                }

                                node = lexer.InferredTag("dl");
                            }
                            else if ((node.Tag.Model & ContentModel.TABLE) != 0 ||
                                     (node.Tag.Model & ContentModel.ROW) != 0)
                            {
                                node = lexer.InferredTag("table");
                            }
                            else if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                            {
                                /* pop inline stack */
                                while (lexer.Istack.Count > lexer.Istackbase)
                                {
                                    lexer.PopInline(null);
                                }
                                lexer.Istackbase = istackbase;
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                            else
                            {
                                Node.TrimSpaces(lexer, element);
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* parse known element */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if ((node.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            if (checkstack && !node.Isimplicit)
                            {
                                checkstack = false;

                                if (lexer.InlineDup(node) > 0)
                                    continue;
                            }

                            mode = Lexer.MIXED_CONTENT;
                        }
                        else
                        {
                            checkstack = true;
                            mode = Lexer.IGNORE_WHITESPACE;
                        }

                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.InsertNodeAtEnd(element, node);

                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, element, node, Report.INSERTING_TAG);
                        }

                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }

                    /* discard unexpected tags */
                    if (node.Type == Node.END_TAG)
                        lexer.PopInline(node);
                    /* if inline end tag */

                    Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                }

                if ((element.Tag.Model & ContentModel.OPT) == 0)
                {
                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
                }

                if ((element.Tag.Model & ContentModel.OBJECT) != 0)
                {
                    /* pop inline stack */
                    while (lexer.Istack.Count > lexer.Istackbase)
                    {
                        lexer.PopInline(null);
                    }
                    lexer.Istackbase = istackbase;
                }

                Node.TrimSpaces(lexer, element);
                Node.TrimEmptyElement(lexer, element);
            }
Beispiel #2
0
            public virtual void Parse(Lexer lexer, Node rowgroup, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((rowgroup.Tag.Model & ContentModel.EMPTY) != 0)
                    return;

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                        break;
                    if (node.Tag == rowgroup.Tag)
                    {
                        if (node.Type == Node.END_TAG)
                        {
                            rowgroup.Closed = true;
                            Node.TrimEmptyElement(lexer, rowgroup);
                            return;
                        }

                        lexer.UngetToken();
                        return;
                    }

                    /* if </table> infer end tag */
                    if (node.Tag == tt.TagTable && node.Type == Node.END_TAG)
                    {
                        lexer.UngetToken();
                        Node.TrimEmptyElement(lexer, rowgroup);
                        return;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(rowgroup, node))
                        continue;

                    /* discard unknown tags */
                    if (node.Tag == null && node.Type != Node.TEXT_NODE)
                    {
                        Report.Warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    if TD or TH then infer <TR>
                    if text or inline or block move before table
                    if head content move to head
                    */

                    if (node.Type != Node.END_TAG)
                    {
                        if (node.Tag == tt.TagTd || node.Tag == tt.TagTh)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("tr");
                            Report.Warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
                        }
                        else if (node.Tag != null && (node.Type == Node.TEXT_NODE ||
                                                      (node.Tag.Model & (ContentModel.BLOCK | ContentModel.INLINE)) != 0))
                        {
                            Node.MoveBeforeTable(rowgroup, node, tt);
                            Report.Warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
                            lexer.Exiled = true;

                            if (node.Type != Node.TEXT_NODE)
                                ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);

                            lexer.Exiled = false;
                            continue;
                        }
                        else if (node.Tag != null && (node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            Report.Warning(lexer, rowgroup, node, Report.TAG_NOT_ALLOWED_IN);
                            MoveToHead(lexer, rowgroup, node);
                            continue;
                        }
                    }

                    /*
                    if this is the end tag for ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (node.Tag == tt.TagTr || node.Tag == tt.TagTd || node.Tag == tt.TagTh)
                        {
                            Report.Warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = rowgroup.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                lexer.UngetToken();
                                Node.TrimEmptyElement(lexer, rowgroup);
                                return;
                            }
                        }
                    }

                    /*
                    if THEAD, TFOOT or TBODY then implied end tag

                    */
                    if (node.Tag != null && (node.Tag.Model & ContentModel.ROWGRP) != 0)
                    {
                        if (node.Type != Node.END_TAG)
                            lexer.UngetToken();

                        Node.TrimEmptyElement(lexer, rowgroup);
                        return;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        Report.Warning(lexer, rowgroup, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag != tt.TagTr)
                    {
                        node = lexer.InferredTag("tr");
                        Report.Warning(lexer, rowgroup, node, Report.MISSING_STARTTAG);
                        lexer.UngetToken();
                    }

                    /* node should be <TR> */
                    Node.InsertNodeAtEnd(rowgroup, node);
                    ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                }

                Node.TrimEmptyElement(lexer, rowgroup);
            }
Beispiel #3
0
            public virtual void Parse(Lexer lexer, Node table, short mode)
            {
                int istackbase;
                TagCollection tt = lexer.Options.TagTable;

                lexer.DeferDup();
                istackbase = lexer.Istackbase;
                lexer.Istackbase = lexer.Istack.Count;

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                        break;
                    if (node.Tag == table.Tag && node.Type == Node.END_TAG)
                    {
                        lexer.Istackbase = istackbase;
                        table.Closed = true;
                        Node.TrimEmptyElement(lexer, table);
                        return;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(table, node))
                        continue;

                    /* discard unknown tags */
                    if (node.Tag == null && node.Type != Node.TEXT_NODE)
                    {
                        Report.Warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* if TD or TH or text or inline or block then infer <TR> */

                    if (node.Type != Node.END_TAG)
                    {
                        if (node.Tag == tt.TagTd || node.Tag == tt.TagTh || node.Tag == tt.TagTable)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("tr");
                            Report.Warning(lexer, table, node, Report.MISSING_STARTTAG);
                        }
                        else if (node.Tag != null && (node.Type == Node.TEXT_NODE ||
                                                      (node.Tag.Model & (ContentModel.BLOCK | ContentModel.INLINE)) != 0))
                        {
                            Node.InsertNodeBeforeElement(table, node);
                            Report.Warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
                            lexer.Exiled = true;

                            /* AQ: TODO
                            Line 2040 of parser.c (13 Jan 2000) reads as follows:
                            if (!node->type == TextNode)
                            This will always evaluate to false.
                            This has been reported to Dave Raggett <*****@*****.**>
                            */
                            //Should be?: if (!(node.Type == Node.TextNode))
                //							if (false)
                //								TidyNet.ParserImpl.parseTag(lexer, node, Lexer.IgnoreWhitespace);

                            lexer.Exiled = false;
                            continue;
                        }
                        else if (node.Tag != null && (node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            MoveToHead(lexer, table, node);
                            continue;
                        }
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (node.Tag != null && (node.Tag.Model & (ContentModel.TABLE | ContentModel.ROW)) != 0)
                        {
                            Report.Warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = table.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag != parent.Tag) continue;
                            Report.Warning(lexer, table, node, Report.MISSING_ENDTAG_BEFORE);
                            lexer.UngetToken();
                            lexer.Istackbase = istackbase;
                            Node.TrimEmptyElement(lexer, table);
                            return;
                        }
                    }

                    if (node.Tag != null && (node.Tag.Model & ContentModel.TABLE) == 0)
                    {
                        lexer.UngetToken();
                        Report.Warning(lexer, table, node, Report.TAG_NOT_ALLOWED_IN);
                        lexer.Istackbase = istackbase;
                        Node.TrimEmptyElement(lexer, table);
                        return;
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        Node.InsertNodeAtEnd(table, node);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }

                    /* discard unexpected text nodes and end tags */
                    Report.Warning(lexer, table, node, Report.DISCARDING_UNEXPECTED);
                }

                Report.Warning(lexer, table, null, Report.MISSING_ENDTAG_FOR);
                Node.TrimEmptyElement(lexer, table);
                lexer.Istackbase = istackbase;
            }
Beispiel #4
0
            public virtual void Parse(Lexer lexer, Node pre, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((pre.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if ((pre.Tag.Model & ContentModel.OBSOLETE) != 0)
                {
                    Node.CoerceNode(lexer, pre, tt.TagPre);
                }

                lexer.InlineDup(null); /* tell lexer to insert inlines if needed */

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.PREFORMATTED);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == pre.Tag && node.Type == Node.END_TAG)
                    {
                        Node.TrimSpaces(lexer, pre);
                        pre.Closed = true;
                        Node.TrimEmptyElement(lexer, pre);
                        return;
                    }

                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        /* if first check for inital newline */
                        if (pre.Content == null)
                        {
                            if (node.Textarray[node.Start] == (sbyte) '\n')
                            {
                                ++node.Start;
                            }

                            if (node.Start >= node.End)
                            {
                                continue;
                            }
                        }

                        Node.InsertNodeAtEnd(pre, node);
                        continue;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(pre, node))
                    {
                        continue;
                    }

                    /* discard unknown  and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag == tt.TagP)
                    {
                        if (node.Type == Node.START_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.USING_BR_INPLACE_OF);

                            /* trim white space before <p> in <pre>*/
                            Node.TrimSpaces(lexer, pre);

                            /* coerce both <p> and </p> to <br> */
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(pre, node);
                        }
                        else
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                        }
                        continue;
                    }

                    if ((node.Tag.Model & ContentModel.HEAD) != 0 && (node.Tag.Model & ContentModel.BLOCK) == 0)
                    {
                        MoveToHead(lexer, pre, node);
                        continue;
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = pre.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);

                                lexer.UngetToken();
                                Node.TrimSpaces(lexer, pre);
                                Node.TrimEmptyElement(lexer, pre);
                                return;
                            }
                        }
                    }

                    /* what about head content, HEAD, BODY tags etc? */
                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
                        lexer.ExcludeBlocks = true;

                        /* check if we need to infer a container */
                        if ((node.Tag.Model & ContentModel.LIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("ul");
                            Node.AddClass(node, "noindent");
                        }
                        else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("dl");
                        }
                        else if ((node.Tag.Model & ContentModel.TABLE) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("table");
                        }

                        Node.InsertNodeAfterElement(pre, node);
                        pre = lexer.InferredTag("pre");
                        Node.InsertNodeAfterElement(node, pre);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        lexer.ExcludeBlocks = false;
                        continue;
                    }
                    /*
                    if (!((node.Tag.Model & ContentModel.INLINE) != 0))
                    {
                    Report.Warning(lexer, pre, node, Report.MISSING_ENDTAG_BEFORE);
                    lexer.UngetToken();
                    return;
                    }
                    */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, pre);
                        }

                        Node.InsertNodeAtEnd(pre, node);
                        ParseTag(lexer, node, Lexer.PREFORMATTED);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, pre, node, Report.DISCARDING_UNEXPECTED);
                }

                Report.Warning(lexer, pre, null, Report.MISSING_ENDTAG_FOR);
                Node.TrimEmptyElement(lexer, pre);
            }
Beispiel #5
0
            public virtual void Parse(Lexer lexer, Node row, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((row.Tag.Model & ContentModel.EMPTY) != 0)
                    return;

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                        break;
                    if (node.Tag == row.Tag)
                    {
                        if (node.Type == Node.END_TAG)
                        {
                            row.Closed = true;
                            Node.FixEmptyRow(lexer, row);
                            return;
                        }

                        lexer.UngetToken();
                        Node.FixEmptyRow(lexer, row);
                        return;
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (node.Tag == tt.TagTd || node.Tag == tt.TagTh)
                        {
                            Report.Warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = row.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                lexer.UngetToken();
                                Node.TrimEmptyElement(lexer, row);
                                return;
                            }
                        }
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(row, node))
                        continue;

                    /* discard unknown tags */
                    if (node.Tag == null && node.Type != Node.TEXT_NODE)
                    {
                        Report.Warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* discard unexpected <table> element */
                    if (node.Tag == tt.TagTable)
                    {
                        Report.Warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* THEAD, TFOOT or TBODY */
                    if (node.Tag != null && (node.Tag.Model & ContentModel.ROWGRP) != 0)
                    {
                        lexer.UngetToken();
                        Node.TrimEmptyElement(lexer, row);
                        return;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        Report.Warning(lexer, row, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    if text or inline or block move before table
                    if head content move to head
                    */

                    if (node.Type != Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("td");
                            Report.Warning(lexer, row, node, Report.MISSING_STARTTAG);
                        }
                        else if (node.Tag != null && (node.Type == Node.TEXT_NODE ||
                                                      (node.Tag.Model & (ContentModel.BLOCK | ContentModel.INLINE)) != 0))
                        {
                            Node.MoveBeforeTable(row, node, tt);
                            Report.Warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                            lexer.Exiled = true;

                            if (node.Type != Node.TEXT_NODE)
                                ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);

                            lexer.Exiled = false;
                            continue;
                        }
                        else if (node.Tag != null && (node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            Report.Warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                            MoveToHead(lexer, row, node);
                            continue;
                        }
                    }

                    if (!(node.Tag == tt.TagTd || node.Tag == tt.TagTh))
                    {
                        Report.Warning(lexer, row, node, Report.TAG_NOT_ALLOWED_IN);
                        continue;
                    }

                    /* node should be <TD> or <TH> */
                    Node.InsertNodeAtEnd(row, node);
                    bool excludeState = lexer.ExcludeBlocks;
                    lexer.ExcludeBlocks = false;
                    ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                    lexer.ExcludeBlocks = excludeState;

                    /* pop inline stack */

                    while (lexer.Istack.Count > lexer.Istackbase)
                        lexer.PopInline(null);
                }

                Node.TrimEmptyElement(lexer, row);
            }
Beispiel #6
0
        /*
        Symptom: <center>
        Action: replace <center> by <div style="text-align: center">
        */
        private bool Center2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagCenter)
            {
                if (lexer.Options.DropFontTags)
                {
                    if (node.Content != null)
                    {
                        Node last = node.Last;
                        Node parent = node.Parent;

                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");

                        if (last.Next != null)
                        {
                            last.Next.Prev = node;
                        }

                        node.Next = last.Next;
                        last.Next = node;
                        node.Prev = last;

                        if (parent.Last == last)
                        {
                            parent.Last = node;
                        }

                        node.Parent = parent;
                    }
                    else
                    {
                        Node prev = node.Prev;
                        Node next = node.Next;
                        Node parent = node.Parent;
                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");
                        node.Next = next;
                        node.Prev = prev;
                        node.Parent = parent;

                        if (next != null)
                        {
                            next.Prev = node;
                        }
                        else
                        {
                            parent.Last = node;
                        }

                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        else
                        {
                            parent.Content = node;
                        }
                    }

                    return true;
                }
                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "text-align: center");
                return true;
            }

            return false;
        }
Beispiel #7
0
 /*
 if a table row is empty then insert an empty cell
 this practice is consistent with browser behavior
 and avoids potential problems with row spanning cells
 */
 public static void FixEmptyRow(Lexer lexer, Node row)
 {
     if (row.Content != null) return;
     Node cell = lexer.InferredTag("td");
     InsertNodeAtEnd(row, cell);
     Report.Warning(lexer, row, cell, Report.MISSING_STARTTAG);
 }
Beispiel #8
0
        public static Node ParseDocument(Lexer lexer)
        {
            Node doctype = null;
            TagCollection tt = lexer.Options.TagTable;

            Node document = lexer.NewNode();
            document.Type = Node.ROOT_NODE;

            while (true)
            {
                Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                if (node == null)
                {
                    break;
                }

                /* deal with comments etc. */
                if (Node.InsertMisc(document, node))
                {
                    continue;
                }

                if (node.Type == Node.DOC_TYPE_TAG)
                {
                    if (doctype == null)
                    {
                        Node.InsertNodeAtEnd(document, node);
                        doctype = node;
                    }
                    else
                    {
                        Report.Warning(lexer, document, node, Report.DISCARDING_UNEXPECTED);
                    }
                    continue;
                }

                if (node.Type == Node.END_TAG)
                {
                    Report.Warning(lexer, document, node, Report.DISCARDING_UNEXPECTED); //TODO?
                    continue;
                }

                Node html;
                if (node.Type != Node.START_TAG || node.Tag != tt.TagHtml)
                {
                    lexer.UngetToken();
                    html = lexer.InferredTag("html");
                }
                else
                {
                    html = node;
                }

                Node.InsertNodeAtEnd(document, html);
                ParseHtml.Parse(lexer, html, 0); // TODO?
                break;
            }

            return document;
        }
Beispiel #9
0
            public virtual void Parse(Lexer lexer, Node element, short mode)
            {
                Node node;
                TagCollection tt = lexer.Options.TagTable;

                if ((element.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                if (element.Tag == tt.TagA)
                {
                    if (element.Attributes == null)
                    {
                        Report.Warning(lexer, element.Parent, element, Report.DISCARDING_UNEXPECTED);
                        Node.DiscardElement(element);
                        return;
                    }
                }

                /*
                ParseInline is used for some block level elements like H1 to H6
                For such elements we need to insert inline emphasis tags currently
                on the inline stack. For Inline elements, we normally push them
                onto the inline stack provided they aren't implicit or OBJECT/APPLET.
                This test is carried out in PushInline and PopInline, see istack.c
                We don't push A or SPAN to replicate current browser behavior
                */
                if (((element.Tag.Model & ContentModel.BLOCK) != 0) || (element.Tag == tt.TagDt))
                {
                    lexer.InlineDup(null);
                }
                else if ((element.Tag.Model & ContentModel.INLINE) != 0 && element.Tag != tt.TagA &&
                         element.Tag != tt.TagSpan)
                {
                    lexer.PushInline(element);
                }

                if (element.Tag == tt.TagNobr)
                {
                    lexer.BadLayout |= Report.USING_NOBR;
                }
                else if (element.Tag == tt.TagFont)
                {
                    lexer.BadLayout |= Report.USING_FONT;
                }

                /* Inline elements may or may not be within a preformatted element */
                if (mode != Lexer.PREFORMATTED)
                {
                    mode = Lexer.MIXED_CONTENT;
                }

                while (true)
                {
                    node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }
                    /* end tag for current element */
                    if (node.Tag == element.Tag && node.Type == Node.END_TAG)
                    {
                        if ((element.Tag.Model & ContentModel.INLINE) != 0 && element.Tag != tt.TagA)
                        {
                            lexer.PopInline(node);
                        }

                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        /*
                        if a font element wraps an anchor and nothing else
                        then move the font element inside the anchor since
                        otherwise it won't alter the anchor text color
                        */
                        if (element.Tag == tt.TagFont && element.Content != null && element.Content == element.Last)
                        {
                            Node child = element.Content;

                            if (child.Tag == tt.TagA)
                            {
                                child.Parent = element.Parent;
                                child.Next = element.Next;
                                child.Prev = element.Prev;

                                if (child.Prev != null)
                                {
                                    child.Prev.Next = child;
                                }
                                else
                                {
                                    child.Parent.Content = child;
                                }

                                if (child.Next != null)
                                {
                                    child.Next.Prev = child;
                                }
                                else
                                {
                                    child.Parent.Last = child;
                                }

                                element.Next = null;
                                element.Prev = null;
                                element.Parent = child;
                                element.Content = child.Content;
                                element.Last = child.Last;
                                child.Content = element;
                                child.Last = element;
                                for (child = element.Content; child != null; child = child.Next)
                                {
                                    child.Parent = element;
                                }
                            }
                        }
                        element.Closed = true;
                        Node.TrimSpaces(lexer, element);
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* <u>...<u>  map 2nd <u> to </u> if 1st is explicit */
                    /* otherwise emphasis nesting is probably unintentional */
                    /* big and small have cumulative effect to leave them alone */
                    if (node.Type == Node.START_TAG && node.Tag == element.Tag && lexer.IsPushed(node) &&
                        !node.Isimplicit && !element.Isimplicit && node.Tag != null &&
                        ((node.Tag.Model & ContentModel.INLINE) != 0) && node.Tag != tt.TagA && node.Tag != tt.TagFont &&
                        node.Tag != tt.TagBig && node.Tag != tt.TagSmall)
                    {
                        if (element.Content != null && node.Attributes == null)
                        {
                            Report.Warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                            node.Type = Node.END_TAG;
                            lexer.UngetToken();
                            continue;
                        }

                        Report.Warning(lexer, element, node, Report.NESTED_EMPHASIS);
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        /* only called for 1st child */
                        if (element.Content == null && (mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        if (node.Start >= node.End)
                        {
                            continue;
                        }

                        Node.InsertNodeAtEnd(element, node);
                        continue;
                    }

                    /* mixed content model so allow text */
                    if (Node.InsertMisc(element, node))
                    {
                        continue;
                    }

                    /* deal with HTML tags */
                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        /* otherwise infer end of inline element */
                        lexer.UngetToken();
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* within <dt> or <pre> map <p> to <br> */
                    if (node.Tag == tt.TagP && node.Type == Node.START_TAG &&
                        ((mode & Lexer.PREFORMATTED) != 0 || element.Tag == tt.TagDt || element.IsDescendantOf(tt.TagDt)))
                    {
                        node.Tag = tt.TagBr;
                        node.Element = "br";
                        Node.TrimSpaces(lexer, element);
                        Node.InsertNodeAtEnd(element, node);
                        continue;
                    }

                    /* ignore unknown and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag == tt.TagBr && node.Type == Node.END_TAG)
                    {
                        node.Type = Node.START_TAG;
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        /* coerce </br> to <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            /* coerce unmatched </p> to <br><br> */
                            if (!element.IsDescendantOf(tt.TagP))
                            {
                                Node.CoerceNode(lexer, node, tt.TagBr);
                                Node.TrimSpaces(lexer, element);
                                Node.InsertNodeAtEnd(element, node);
                                //node = lexer.InferredTag("br");
                                continue;
                            }
                        }
                        else if ((node.Tag.Model & ContentModel.INLINE) != 0 && node.Tag != tt.TagA &&
                                 (node.Tag.Model & ContentModel.OBJECT) == 0 &&
                                 (element.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            /* allow any inline end tag to end current element */
                            lexer.PopInline(element);

                            if (element.Tag != tt.TagA)
                            {
                                if (node.Tag == tt.TagA && node.Tag != element.Tag)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                    lexer.UngetToken();
                                }
                                else
                                {
                                    Report.Warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                                }

                                if ((mode & Lexer.PREFORMATTED) == 0)
                                {
                                    Node.TrimSpaces(lexer, element);
                                }
                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }

                            /* if parent is <a> then discard unexpected inline end tag */
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }
                            /* special case </tr> etc. for stuff moved in front of table */
                        else if (lexer.Exiled && node.Tag.Model != 0 && (node.Tag.Model & ContentModel.TABLE) != 0)
                        {
                            lexer.UngetToken();
                            Node.TrimSpaces(lexer, element);
                            Node.TrimEmptyElement(lexer, element);
                            return;
                        }
                    }

                    /* allow any header tag to end current header */
                    if ((node.Tag.Model & ContentModel.HEADING) != 0 && (element.Tag.Model & ContentModel.HEADING) != 0)
                    {
                        if (node.Tag == element.Tag)
                        {
                            Report.Warning(lexer, element, node, Report.NON_MATCHING_ENDTAG);
                        }
                        else
                        {
                            Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                            lexer.UngetToken();
                        }
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /*
                    an <A> tag to ends any open <A> element
                    but <A href=...> is mapped to </A><A href=...>
                    */
                    if (node.Tag == tt.TagA && !node.Isimplicit && lexer.IsPushed(node))
                    {
                        /* coerce <a> to </a> unless it has some attributes */
                        if (node.Attributes == null)
                        {
                            node.Type = Node.END_TAG;
                            Report.Warning(lexer, element, node, Report.COERCE_TO_ENDTAG);
                            lexer.PopInline(node);
                            lexer.UngetToken();
                            continue;
                        }

                        lexer.UngetToken();
                        Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        lexer.PopInline(element);
                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }
                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    if ((element.Tag.Model & ContentModel.HEADING) != 0)
                    {
                        if (node.Tag == tt.TagCenter || node.Tag == tt.TagDiv)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                            /* insert center as parent if heading is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeAsParent(element, node);
                                continue;
                            }

                            /* split heading and make center parent of 2nd part */
                            Node.InsertNodeAfterElement(element, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAtEnd(node, element);
                            continue;
                        }

                        if (node.Tag == tt.TagHr)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);

                            /* insert hr before heading if heading is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeBeforeElement(element, node);
                                continue;
                            }

                            /* split heading and insert hr before 2nd part */
                            Node.InsertNodeAfterElement(element, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAfterElement(node, element);
                            continue;
                        }
                    }

                    if (element.Tag == tt.TagDt)
                    {
                        if (node.Tag == tt.TagHr)
                        {
                            if (node.Type != Node.START_TAG && node.Type != Node.START_END_TAG)
                            {
                                Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                                continue;
                            }

                            Report.Warning(lexer, element, node, Report.TAG_NOT_ALLOWED_IN);
                            Node dd = lexer.InferredTag("dd");

                            /* insert hr within dd before dt if dt is empty */
                            if (element.Content == null)
                            {
                                Node.InsertNodeBeforeElement(element, dd);
                                Node.InsertNodeAtEnd(dd, node);
                                continue;
                            }

                            /* split dt and insert hr within dd before 2nd part */
                            Node.InsertNodeAfterElement(element, dd);
                            Node.InsertNodeAtEnd(dd, node);

                            if ((mode & Lexer.PREFORMATTED) == 0)
                            {
                                Node.TrimSpaces(lexer, element);
                            }

                            element = lexer.CloneNode(element);
                            element.Start = lexer.Lexsize;
                            element.End = lexer.Lexsize;
                            Node.InsertNodeAfterElement(dd, element);
                            continue;
                        }
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        Node parent;
                        for (parent = element.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                if ((element.Tag.Model & ContentModel.OPT) == 0 && !element.Isimplicit)
                                {
                                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                                }

                                if (element.Tag == tt.TagA)
                                {
                                    lexer.PopInline(element);
                                }

                                lexer.UngetToken();

                                if ((mode & Lexer.PREFORMATTED) == 0)
                                {
                                    Node.TrimSpaces(lexer, element);
                                }

                                Node.TrimEmptyElement(lexer, element);
                                return;
                            }
                        }
                    }

                    /* block level tags end this element */
                    if ((node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if ((element.Tag.Model & ContentModel.OPT) == 0)
                        {
                            Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_BEFORE);
                        }

                        if ((node.Tag.Model & ContentModel.HEAD) != 0 && (node.Tag.Model & ContentModel.BLOCK) == 0)
                        {
                            MoveToHead(lexer, element, node);
                            continue;
                        }

                        /*
                        prevent anchors from propagating into block tags
                        except for headings h1 to h6
                        */
                        if (element.Tag == tt.TagA)
                        {
                            if (node.Tag != null && (node.Tag.Model & ContentModel.HEADING) == 0)
                            {
                                lexer.PopInline(element);
                            }
                            else if (element.Content == null)
                            {
                                Node.DiscardElement(element);
                                lexer.UngetToken();
                                return;
                            }
                        }

                        lexer.UngetToken();

                        if ((mode & Lexer.PREFORMATTED) == 0)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.TrimEmptyElement(lexer, element);
                        return;
                    }

                    /* parse inline element */
                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, element, node, Report.INSERTING_TAG);
                        }

                        /* trim white space before <br> */
                        if (node.Tag == tt.TagBr)
                        {
                            Node.TrimSpaces(lexer, element);
                        }

                        Node.InsertNodeAtEnd(element, node);
                        ParseTag(lexer, node, mode);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, element, node, Report.DISCARDING_UNEXPECTED);
                }

                if ((element.Tag.Model & ContentModel.OPT) == 0)
                {
                    Report.Warning(lexer, element, node, Report.MISSING_ENDTAG_FOR);
                }

                Node.TrimEmptyElement(lexer, element);
            }
Beispiel #10
0
            public virtual void Parse(Lexer lexer, Node head, short mode)
            {
                int hasTitle = 0;
                int hasBase = 0;
                TagCollection tt = lexer.Options.TagTable;

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == head.Tag && node.Type == Node.END_TAG)
                    {
                        head.Closed = true;
                        break;
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        lexer.UngetToken();
                        break;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(head, node))
                    {
                        continue;
                    }

                    if (node.Type == Node.DOC_TYPE_TAG)
                    {
                        Node.InsertDocType(lexer, head, node);
                        continue;
                    }

                    /* discard unknown tags */
                    if (node.Tag == null)
                    {
                        Report.Warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if ((node.Tag.Model & ContentModel.HEAD) == 0)
                    {
                        lexer.UngetToken();
                        break;
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (node.Tag == tt.TagTitle)
                        {
                            ++hasTitle;

                            if (hasTitle > 1)
                            {
                                Report.Warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                            }
                        }
                        else if (node.Tag == tt.TagBase)
                        {
                            ++hasBase;

                            if (hasBase > 1)
                            {
                                Report.Warning(lexer, head, node, Report.TOO_MANY_ELEMENTS);
                            }
                        }
                        else if (node.Tag == tt.TagNoscript)
                        {
                            Report.Warning(lexer, head, node, Report.TAG_NOT_ALLOWED_IN);
                        }

                        Node.InsertNodeAtEnd(head, node);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }

                    /* discard unexpected text nodes and end tags */
                    Report.Warning(lexer, head, node, Report.DISCARDING_UNEXPECTED);
                }

                if (hasTitle == 0)
                {
                    Report.Warning(lexer, head, null, Report.MISSING_TITLE_ELEMENT);
                    Node.InsertNodeAtEnd(head, lexer.InferredTag("title"));
                }
            }
Beispiel #11
0
            public virtual void Parse(Lexer lexer, Node html, short mode)
            {
                Node node;
                Node frameset = null;
                Node noframes = null;

                lexer.Options.XmlTags = false;
                lexer.SeenBodyEndTag = 0;
                TagCollection tt = lexer.Options.TagTable;

                for (;;)
                {
                    node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);

                    if (node == null)
                    {
                        node = lexer.InferredTag("head");
                        break;
                    }

                    if (node.Tag == tt.TagHead)
                        break;

                    if (node.Tag == html.Tag && node.Type == Node.END_TAG)
                    {
                        Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(html, node))
                    {
                        continue;
                    }

                    lexer.UngetToken();
                    node = lexer.InferredTag("head");
                    break;
                }

                Node head = node;
                Node.InsertNodeAtEnd(html, head);
                ParseHead.Parse(lexer, head, mode);

                for (;;)
                {
                    node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);

                    if (node == null)
                    {
                        if (frameset == null)
                        {
                            /* create an empty body */
                            //node = lexer.InferredTag("body");
                        }

                        return;
                    }

                    /* robustly handle html tags */
                    if (node.Tag == html.Tag)
                    {
                        if (node.Type != Node.START_TAG && frameset == null)
                        {
                            Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(html, node))
                    {
                        continue;
                    }

                    /* if frameset document coerce <body> to <noframes> */
                    if (node.Tag == tt.TagBody)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (frameset != null)
                        {
                            lexer.UngetToken();

                            if (noframes == null)
                            {
                                noframes = lexer.InferredTag("noframes");
                                Node.InsertNodeAtEnd(frameset, noframes);
                                Report.Warning(lexer, html, noframes, Report.INSERTING_TAG);
                            }

                            ParseTag(lexer, noframes, mode);
                            continue;
                        }

                        break; /* to parse body */
                    }

                    /* flag an error if we see more than one frameset */
                    if (node.Tag == tt.TagFrameset)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (frameset != null)
                        {
                            Report.Error(lexer, html, node, Report.DUPLICATE_FRAMESET);
                        }
                        else
                        {
                            frameset = node;
                        }

                        Node.InsertNodeAtEnd(html, node);
                        ParseTag(lexer, node, mode);

                        /*
                        see if it includes a noframes element so
                        that we can merge subsequent noframes elements
                        */

                        for (node = frameset.Content; node != null; node = node.Next)
                        {
                            if (node.Tag == tt.TagNoframes)
                            {
                                noframes = node;
                            }
                        }
                        continue;
                    }

                    /* if not a frameset document coerce <noframes> to <body> */
                    if (node.Tag == tt.TagNoframes)
                    {
                        if (node.Type != Node.START_TAG)
                        {
                            Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (frameset == null)
                        {
                            Report.Warning(lexer, html, node, Report.DISCARDING_UNEXPECTED);
                            node = lexer.InferredTag("body");
                            break;
                        }

                        if (noframes == null)
                        {
                            noframes = node;
                            Node.InsertNodeAtEnd(frameset, noframes);
                        }

                        ParseTag(lexer, noframes, mode);
                        continue;
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (node.Tag != null && (node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            MoveToHead(lexer, html, node);
                            continue;
                        }
                    }

                    lexer.UngetToken();

                    /* insert other content into noframes element */

                    if (frameset != null)
                    {
                        if (noframes == null)
                        {
                            noframes = lexer.InferredTag("noframes");
                            Node.InsertNodeAtEnd(frameset, noframes);
                        }
                        else
                        {
                            Report.Warning(lexer, html, node, Report.NOFRAMES_CONTENT);
                        }

                        ParseTag(lexer, noframes, mode);
                        continue;
                    }

                    node = lexer.InferredTag("body");
                    break;
                }

                /* node must be body */

                Node.InsertNodeAtEnd(html, node);
                ParseTag(lexer, node, mode);
            }
Beispiel #12
0
            public virtual void Parse(Lexer lexer, Node frameset, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                lexer.BadAccess |= Report.USING_FRAMES;

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == frameset.Tag && node.Type == Node.END_TAG)
                    {
                        frameset.Closed = true;
                        Node.TrimSpaces(lexer, frameset);
                        return;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(frameset, node))
                    {
                        continue;
                    }

                    if (node.Tag == null)
                    {
                        Report.Warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (node.Tag != null && (node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            MoveToHead(lexer, frameset, node);
                            continue;
                        }
                    }

                    if (node.Tag == tt.TagBody)
                    {
                        lexer.UngetToken();
                        node = lexer.InferredTag("noframes");
                        Report.Warning(lexer, frameset, node, Report.INSERTING_TAG);
                    }

                    if (node.Type == Node.START_TAG && (node.Tag.Model & ContentModel.FRAMES) != 0)
                    {
                        Node.InsertNodeAtEnd(frameset, node);
                        lexer.ExcludeBlocks = false;
                        ParseTag(lexer, node, Lexer.MIXED_CONTENT);
                        continue;
                    }
                    if (node.Type == Node.START_END_TAG && (node.Tag.Model & ContentModel.FRAMES) != 0)
                    {
                        Node.InsertNodeAtEnd(frameset, node);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, frameset, node, Report.DISCARDING_UNEXPECTED);
                }

                Report.Warning(lexer, frameset, null, Report.MISSING_ENDTAG_FOR);
            }
Beispiel #13
0
            public virtual void Parse(Lexer lexer, Node list, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((list.Tag.Model & ContentModel.EMPTY) != 0)
                    return;

                lexer.Insert = - 1; /* defer implicit inline start tags */

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == list.Tag && node.Type == Node.END_TAG)
                    {
                        list.Closed = true;
                        Node.TrimEmptyElement(lexer, list);
                        return;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(list, node))
                    {
                        continue;
                    }

                    if (node.Type == Node.TEXT_NODE)
                    {
                        lexer.UngetToken();
                        node = lexer.InferredTag("dt");
                        Report.Warning(lexer, list, node, Report.MISSING_STARTTAG);
                    }

                    if (node.Tag == null)
                    {
                        Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        Node parent;
                        for (parent = list.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                Report.Warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);

                                lexer.UngetToken();
                                Node.TrimEmptyElement(lexer, list);
                                return;
                            }
                        }
                    }

                    /* center in a dt or a dl breaks the dl list in two */
                    if (node.Tag == tt.TagCenter)
                    {
                        if (list.Content != null)
                        {
                            Node.InsertNodeAfterElement(list, node);
                        }
                        else
                        {
                            /* trim empty dl list */
                            Node.InsertNodeBeforeElement(list, node);
                            Node.DiscardElement(list);
                        }

                        /* and parse contents of center */
                        ParseTag(lexer, node, mode);

                        /* now create a new dl element */
                        list = lexer.InferredTag("dl");
                        Node.InsertNodeAfterElement(node, list);
                        continue;
                    }

                    if (!(node.Tag == tt.TagDt || node.Tag == tt.TagDd))
                    {
                        lexer.UngetToken();

                        if ((node.Tag.Model & (ContentModel.BLOCK | ContentModel.INLINE)) == 0)
                        {
                            Report.Warning(lexer, list, node, Report.TAG_NOT_ALLOWED_IN);
                            Node.TrimEmptyElement(lexer, list);
                            return;
                        }

                        /* if DD appeared directly in BODY then exclude blocks */
                        if ((node.Tag.Model & ContentModel.INLINE) == 0 && lexer.ExcludeBlocks)
                        {
                            Node.TrimEmptyElement(lexer, list);
                            return;
                        }

                        node = lexer.InferredTag("dd");
                        Report.Warning(lexer, list, node, Report.MISSING_STARTTAG);
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /* node should be <DT> or <DD>*/
                    Node.InsertNodeAtEnd(list, node);
                    ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                }

                Report.Warning(lexer, list, null, Report.MISSING_ENDTAG_FOR);
                Node.TrimEmptyElement(lexer, list);
            }
Beispiel #14
0
        public static void TrimEmptyElement(Lexer lexer, Node element)
        {
            TagCollection tt = lexer.Options.TagTable;

            if (lexer.CanPrune(element))
            {
                if (element.Type != TEXT_NODE)
                {
                    Report.Warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
                }

                DiscardElement(element);
            }
            else if (element.Tag == tt.TagP && element.Content == null)
            {
                /* replace <p></p> by <br><br> to preserve formatting */
                Node node = lexer.InferredTag("br");
                CoerceNode(lexer, element, tt.TagBr);
                InsertNodeAfterElement(element, node);
            }
        }
Beispiel #15
0
            public virtual void Parse(Lexer lexer, Node body, short mode)
            {
                bool checkstack;

                mode = Lexer.IGNORE_WHITESPACE;
                checkstack = true;
                TagCollection tt = lexer.Options.TagTable;

                while (true)
                {
                    Node node = lexer.GetToken(mode);
                    if (node == null)
                    {
                        break;
                    }
                    if (node.Tag == body.Tag && node.Type == Node.END_TAG)
                    {
                        body.Closed = true;
                        Node.TrimSpaces(lexer, body);
                        lexer.SeenBodyEndTag = 1;
                        mode = Lexer.IGNORE_WHITESPACE;

                        if (body.Parent.Tag == tt.TagNoframes)
                        {
                            break;
                        }

                        continue;
                    }

                    if (node.Tag == tt.TagNoframes)
                    {
                        if (node.Type == Node.START_TAG)
                        {
                            Node.InsertNodeAtEnd(body, node);
                            ParseBlock.Parse(lexer, node, mode);
                            continue;
                        }

                        if (node.Type == Node.END_TAG && body.Parent.Tag == tt.TagNoframes)
                        {
                            Node.TrimSpaces(lexer, body);
                            lexer.UngetToken();
                            break;
                        }
                    }

                    if ((node.Tag == tt.TagFrame || node.Tag == tt.TagFrameset) && body.Parent.Tag == tt.TagNoframes)
                    {
                        Node.TrimSpaces(lexer, body);
                        lexer.UngetToken();
                        break;
                    }

                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                        {
                            Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                        }

                        continue;
                    }

                    bool iswhitenode = node.Type == Node.TEXT_NODE && node.End <= node.Start + 1 &&
                                       node.Textarray[node.Start] == (sbyte) ' ';

                    /* deal with comments etc. */
                    if (Node.InsertMisc(body, node))
                    {
                        continue;
                    }

                    if (lexer.SeenBodyEndTag == 1 && !iswhitenode)
                    {
                        ++lexer.SeenBodyEndTag;
                        Report.Warning(lexer, body, node, Report.CONTENT_AFTER_BODY);
                    }

                    /* mixed content model permits text */
                    if (node.Type == Node.TEXT_NODE)
                    {
                        if (iswhitenode && mode == Lexer.IGNORE_WHITESPACE)
                        {
                            continue;
                        }

                        if (lexer.Options.EncloseText && !iswhitenode)
                        {
                            lexer.UngetToken();
                            Node para = lexer.InferredTag("p");
                            Node.InsertNodeAtEnd(body, para);
                            ParseTag(lexer, para, mode);
                            mode = Lexer.MIXED_CONTENT;
                            continue;
                        }
                        /* strict doesn't allow text here */
                        lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);

                        if (checkstack)
                        {
                            checkstack = false;

                            if (lexer.InlineDup(node) > 0)
                            {
                                continue;
                            }
                        }

                        Node.InsertNodeAtEnd(body, node);
                        mode = Lexer.MIXED_CONTENT;
                        continue;
                    }

                    if (node.Type == Node.DOC_TYPE_TAG)
                    {
                        Node.InsertDocType(lexer, body, node);
                        continue;
                    }
                    /* discard unknown  and PARAM tags */
                    if (node.Tag == null || node.Tag == tt.TagParam)
                    {
                        Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    Netscape allows LI and DD directly in BODY
                    We infer UL or DL respectively and use this
                    boolean to exclude block-level elements so as
                    to match Netscape's observed behaviour.
                    */
                    lexer.ExcludeBlocks = false;

                    if ((node.Tag.Model & ContentModel.BLOCK) == 0 && (node.Tag.Model & ContentModel.INLINE) == 0)
                    {
                        /* avoid this error message being issued twice */
                        if ((node.Tag.Model & ContentModel.HEAD) == 0)
                        {
                            Report.Warning(lexer, body, node, Report.TAG_NOT_ALLOWED_IN);
                        }

                        if ((node.Tag.Model & ContentModel.HTML) != 0)
                        {
                            /* copy body attributes if current body was inferred */
                            if (node.Tag == tt.TagBody && body.Isimplicit && body.Attributes == null)
                            {
                                body.Attributes = node.Attributes;
                                node.Attributes = null;
                            }

                            continue;
                        }

                        if ((node.Tag.Model & ContentModel.HEAD) != 0)
                        {
                            MoveToHead(lexer, body, node);
                            continue;
                        }

                        if ((node.Tag.Model & ContentModel.LIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("ul");
                            Node.AddClass(node, "noindent");
                            lexer.ExcludeBlocks = true;
                        }
                        else if ((node.Tag.Model & ContentModel.DEFLIST) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("dl");
                            lexer.ExcludeBlocks = true;
                        }
                        else if ((node.Tag.Model & (ContentModel.TABLE | ContentModel.ROWGRP | ContentModel.ROW)) != 0)
                        {
                            lexer.UngetToken();
                            node = lexer.InferredTag("table");
                            lexer.ExcludeBlocks = true;
                        }
                        else
                        {
                            /* AQ: The following line is from the official C
                            version of tidy.  It doesn't make sense to me
                            because the '!' operator has higher precedence
                            than the '&' operator.  It seems to me that the
                            expression always evaluates to 0.

                            if (!node->tag->model & (CM_ROW | CM_FIELD))

                            AQ: 13Jan2000 fixed in C tidy
                            */
                            if ((node.Tag.Model & (ContentModel.ROW | ContentModel.FIELD)) == 0)
                            {
                                lexer.UngetToken();
                                return;
                            }

                            /* ignore </td> </th> <option> etc. */
                            continue;
                        }
                    }

                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagBr)
                        {
                            node.Type = Node.START_TAG;
                        }
                        else if (node.Tag == tt.TagP)
                        {
                            Node.CoerceNode(lexer, node, tt.TagBr);
                            Node.InsertNodeAtEnd(body, node);
                            node = lexer.InferredTag("br");
                        }
                        else if ((node.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            lexer.PopInline(node);
                        }
                    }

                    if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                    {
                        if (((node.Tag.Model & ContentModel.INLINE) != 0) &&
                            (node.Tag.Model & ContentModel.MIXED) == 0)
                        {
                            /* HTML4 strict doesn't allow inline content here */
                            /* but HTML2 does allow img elements as children of body */
                            if (node.Tag == tt.TagImg)
                            {
                                lexer.Versions &= ~ HtmlVersion.Html40Strict;
                            }
                            else
                            {
                                lexer.Versions &= ~ (HtmlVersion.Html40Strict | HtmlVersion.Html20);
                            }

                            if (checkstack && !node.Isimplicit)
                            {
                                checkstack = false;

                                if (lexer.InlineDup(node) > 0)
                                {
                                    continue;
                                }
                            }

                            mode = Lexer.MIXED_CONTENT;
                        }
                        else
                        {
                            checkstack = true;
                            mode = Lexer.IGNORE_WHITESPACE;
                        }

                        if (node.Isimplicit)
                        {
                            Report.Warning(lexer, body, node, Report.INSERTING_TAG);
                        }

                        Node.InsertNodeAtEnd(body, node);
                        ParseTag(lexer, node, mode);
                        continue;
                    }

                    /* discard unexpected tags */
                    Report.Warning(lexer, body, node, Report.DISCARDING_UNEXPECTED);
                }
            }
Beispiel #16
0
            public virtual void Parse(Lexer lexer, Node list, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                if ((list.Tag.Model & ContentModel.EMPTY) != 0)
                {
                    return;
                }

                lexer.Insert = - 1; /* defer implicit inline start tags */

                while (true)
                {
                    Node node = lexer.GetToken(Lexer.IGNORE_WHITESPACE);
                    if (node == null)
                    {
                        break;
                    }

                    if (node.Tag == list.Tag && node.Type == Node.END_TAG)
                    {
                        if ((list.Tag.Model & ContentModel.OBSOLETE) != 0)
                        {
                            Node.CoerceNode(lexer, list, tt.TagUl);
                        }

                        list.Closed = true;
                        Node.TrimEmptyElement(lexer, list);
                        return;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(list, node))
                    {
                        continue;
                    }

                    if (node.Type != Node.TEXT_NODE && node.Tag == null)
                    {
                        Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    /*
                    if this is the end tag for an ancestor element
                    then infer end tag for this element
                    */
                    if (node.Type == Node.END_TAG)
                    {
                        if (node.Tag == tt.TagForm)
                        {
                            lexer.BadForm = 1;
                            Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                            continue;
                        }

                        if (node.Tag != null && (node.Tag.Model & ContentModel.INLINE) != 0)
                        {
                            Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                            lexer.PopInline(node);
                            continue;
                        }

                        Node parent;
                        for (parent = list.Parent; parent != null; parent = parent.Parent)
                        {
                            if (node.Tag == parent.Tag)
                            {
                                Report.Warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
                                lexer.UngetToken();

                                if ((list.Tag.Model & ContentModel.OBSOLETE) != 0)
                                {
                                    Node.CoerceNode(lexer, list, tt.TagUl);
                                }

                                Node.TrimEmptyElement(lexer, list);
                                return;
                            }
                        }

                        Report.Warning(lexer, list, node, Report.DISCARDING_UNEXPECTED);
                        continue;
                    }

                    if (node.Tag != tt.TagLi)
                    {
                        lexer.UngetToken();

                        if (node.Tag != null && (node.Tag.Model & ContentModel.BLOCK) != 0 && lexer.ExcludeBlocks)
                        {
                            Report.Warning(lexer, list, node, Report.MISSING_ENDTAG_BEFORE);
                            Node.TrimEmptyElement(lexer, list);
                            return;
                        }

                        node = lexer.InferredTag("li");
                        node.AddAttribute("style", "list-style: none");
                        Report.Warning(lexer, list, node, Report.MISSING_STARTTAG);
                    }

                    /* node should be <LI> */
                    Node.InsertNodeAtEnd(list, node);
                    ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                }

                if ((list.Tag.Model & ContentModel.OBSOLETE) != 0)
                {
                    Node.CoerceNode(lexer, list, tt.TagUl);
                }

                Report.Warning(lexer, list, null, Report.MISSING_ENDTAG_FOR);
                Node.TrimEmptyElement(lexer, list);
            }
Beispiel #17
0
        /*
        This is a major clean up to strip out all the extra stuff you get
        when you save as web page from Word 2000. It doesn't yet know what
        to do with VML tags, but these will appear as errors unless you
        declare them as new tags, such as o:p which needs to be declared
        as inline.
        */
        public virtual void CleanWord2000(Lexer lexer, Node node)
        {
            /* used to a list from a sequence of bulletted p's */
            Node list = null;

            while (node != null)
            {
                /* discard Word's style verbiage */
                if (node.Tag == _tt.TagStyle || node.Tag == _tt.TagMeta || node.Type == Node.COMMENT_TAG)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                /* strip out all span tags Word scatters so liberally! */
                if (node.Tag == _tt.TagSpan)
                {
                    node = StripSpan(lexer, node);
                    continue;
                }

                /* get rid of Word's xmlns attributes */
                if (node.Tag == _tt.TagHtml)
                {
                    /* check that it's a Word 2000 document */
                    if (node.GetAttrByName("xmlns:o") == null)
                    {
                        return;
                    }
                }

                if (node.Tag == _tt.TagLink)
                {
                    AttVal attr = node.GetAttrByName("rel");

                    if (attr != null && attr.Val != null && attr.Val.Equals("File-List"))
                    {
                        node = Node.DiscardElement(node);
                        continue;
                    }
                }

                /* discard empty paragraphs */
                if (node.Content == null && node.Tag == _tt.TagP)
                {
                    node = Node.DiscardElement(node);
                    continue;
                }

                if (node.Tag == _tt.TagP)
                {
                    AttVal attr = node.GetAttrByName("class");

                    /* map sequence of <p class="MsoListBullet"> to <ul>...</ul> */
                    if (attr != null && attr.Val != null && attr.Val.Equals("MsoListBullet"))
                    {
                        Node.CoerceNode(lexer, node, _tt.TagLi);

                        if (list == null || list.Tag != _tt.TagUl)
                        {
                            list = lexer.InferredTag("ul");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        PurgeAttributes(node);

                        if (node.Content != null)
                        {
                            CleanWord2000(lexer, node.Content);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        node = list.Next;
                    }
                    else if (attr != null && attr.Val != null && attr.Val.Equals("Code"))
                    {
                        /* map sequence of <p class="Code"> to <pre>...</pre> */
                        Node br = lexer.NewLineNode();
                        NormalizeSpaces(node);

                        if (list == null || list.Tag != _tt.TagPre)
                        {
                            list = lexer.InferredTag("pre");
                            Node.InsertNodeBeforeElement(node, list);
                        }

                        /* remove node and append to contents of list */
                        Node.RemoveNode(node);
                        Node.InsertNodeAtEnd(list, node);
                        StripSpan(lexer, node);
                        Node.InsertNodeAtEnd(list, br);
                        node = list.Next;
                    }
                    else
                    {
                        list = null;
                    }
                }
                else
                {
                    list = null;
                }

                /* strip out style and class attributes */
                if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                {
                    PurgeAttributes(node);
                }

                if (node.Content != null)
                {
                    CleanWord2000(lexer, node.Content);
                }

                node = node.Next;
            }
        }
Beispiel #18
0
            public virtual void Parse(Lexer lexer, Node noframes, short mode)
            {
                TagCollection tt = lexer.Options.TagTable;

                lexer.BadAccess |= Report.USING_NOFRAMES;
                mode = Lexer.IGNORE_WHITESPACE;

                while (true)
                {
                    Node node = lexer.GetToken(mode);
                    if (node == null)
                        break;
                    if (node.Tag == noframes.Tag && node.Type == Node.END_TAG)
                    {
                        noframes.Closed = true;
                        Node.TrimSpaces(lexer, noframes);
                        return;
                    }

                    if ((node.Tag == tt.TagFrame || node.Tag == tt.TagFrameset))
                    {
                        Report.Warning(lexer, noframes, node, Report.MISSING_ENDTAG_BEFORE);
                        Node.TrimSpaces(lexer, noframes);
                        lexer.UngetToken();
                        return;
                    }

                    if (node.Tag == tt.TagHtml)
                    {
                        if (node.Type == Node.START_TAG || node.Type == Node.START_END_TAG)
                            Report.Warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);

                        continue;
                    }

                    /* deal with comments etc. */
                    if (Node.InsertMisc(noframes, node))
                        continue;

                    if (node.Tag == tt.TagBody && node.Type == Node.START_TAG)
                    {
                        Node.InsertNodeAtEnd(noframes, node);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }

                    /* implicit body element inferred */
                    if (node.Type == Node.TEXT_NODE || node.Tag != null)
                    {
                        lexer.UngetToken();
                        node = lexer.InferredTag("body");
                        if (lexer.Options.XmlOut)
                            Report.Warning(lexer, noframes, node, Report.INSERTING_TAG);
                        Node.InsertNodeAtEnd(noframes, node);
                        ParseTag(lexer, node, Lexer.IGNORE_WHITESPACE);
                        continue;
                    }
                    /* discard unexpected end tags */
                    Report.Warning(lexer, noframes, node, Report.DISCARDING_UNEXPECTED);
                }

                Report.Warning(lexer, noframes, null, Report.MISSING_ENDTAG_FOR);
            }
Beispiel #19
0
        /*
        Add meta element for page transition effect, this works on IE but not NS
        */
        public virtual void AddTransitionEffect(Lexer lexer, Node root, short effect, double duration)
        {
            Node head = root.FindHead(lexer.Options.TagTable);
            string transition;

            if (0 <= effect && effect <= 23)
            {
                transition = "revealTrans(Duration=" + (duration).ToString() + ",Transition=" + effect + ")";
            }
            else
            {
                transition = "blendTrans(Duration=" + (duration).ToString() + ")";
            }

            if (head != null)
            {
                Node meta = lexer.InferredTag("meta");
                meta.AddAttribute("http-equiv", "Page-Enter");
                meta.AddAttribute("content", transition);
                Node.InsertNodeAtStart(head, meta);
            }
        }
Beispiel #20
0
 public static void CoerceNode(Lexer lexer, Node node, Dict tag)
 {
     Node tmp = lexer.InferredTag(tag.Name);
     Report.Warning(lexer, node, tmp, Report.OBSOLETE_ELEMENT);
     node.Was = node.Tag;
     node.Tag = tag;
     node.Type = START_TAG;
     node.Isimplicit = true;
     node.Element = tag.Name;
 }