Mutable Object (c) 1998-2000 (W3C) MIT, INRIA, Keio University See Tidy.cs for the copyright notice. Derived from HTML Tidy Release 4 Aug 2000
Beispiel #1
0
        /*
        Applies all matching rules to a node.
        */
        private Node CleanNode(Lexer lexer, Node node)
        {
            Node next;
            var o = new MutableObject();

            for (next = node; node.IsElement; node = next)
            {
                o.Object = next;

                bool b = Dir2Div(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = NestedList(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Center2Div(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = MergeDivs(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = BlockStyle(node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = InlineStyle(lexer, node);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Font2Span(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                break;
            }

            return next;
        }
Beispiel #2
0
        /* simplifies <b><b> ... </b> ...</b> etc. */
        public virtual void NestedEmphasis(Node node)
        {
            var o = new MutableObject();

            while (node != null)
            {
                Node next = node.Next;

                if ((node.Tag == _tt.TagB || node.Tag == _tt.TagI) && node.Parent != null && node.Parent.Tag == node.Tag)
                {
                    /* strip redundant inner element */
                    o.Object = next;
                    DiscardContainer(node, o);
                    next = (Node) o.Object;
                    node = next;
                    continue;
                }

                if (node.Content != null)
                {
                    NestedEmphasis(node.Content);
                }

                node = next;
            }
        }
Beispiel #3
0
        /*
        Symptom: <center>
        Action: replace <center> by <div style="text-align: center">
        */
        private bool Center2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagCenter)
            {
                if (lexer.Options.DropFontTags)
                {
                    if (node.Content != null)
                    {
                        Node last = node.Last;
                        Node parent = node.Parent;

                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");

                        if (last.Next != null)
                        {
                            last.Next.Prev = node;
                        }

                        node.Next = last.Next;
                        last.Next = node;
                        node.Prev = last;

                        if (parent.Last == last)
                        {
                            parent.Last = node;
                        }

                        node.Parent = parent;
                    }
                    else
                    {
                        Node prev = node.Prev;
                        Node next = node.Next;
                        Node parent = node.Parent;
                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");
                        node.Next = next;
                        node.Prev = prev;
                        node.Parent = parent;

                        if (next != null)
                        {
                            next.Prev = node;
                        }
                        else
                        {
                            parent.Last = node;
                        }

                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        else
                        {
                            parent.Content = node;
                        }
                    }

                    return true;
                }
                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "text-align: center");
                return true;
            }

            return false;
        }
Beispiel #4
0
        /*
        Symptom: <ul><li><ul>...</ul></li></ul>
        Action: discard outer list
        */
        private bool NestedList(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl)
            {
                Node child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                Node list = child.Content;

                if (list == null)
                {
                    return false;
                }

                if (list.Tag != node.Tag)
                {
                    return false;
                }

                pnode.Object = node.Next;

                /* move inner list node into position of outer node */
                list.Prev = node.Prev;
                list.Next = node.Next;
                list.Parent = node.Parent;
                FixNodeLinks(list);

                /* get rid of outer ul and its li */
                child.Content = null;
                node.Content = null;
                node.Next = null;

                /*
                If prev node was a list the chances are this node
                should be appended to that list. Word has no way of
                recognizing nested lists and just uses indents
                */

                if (list.Prev != null)
                {
                    node = list;
                    list = node.Prev;

                    if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl)
                    {
                        list.Next = node.Next;

                        if (list.Next != null)
                        {
                            list.Next.Prev = list;
                        }

                        child = list.Last; /* <li> */

                        node.Parent = child;
                        node.Next = null;
                        node.Prev = child.Last;
                        FixNodeLinks(node);
                    }
                }

                CleanNode(lexer, node);
                return true;
            }

            return false;
        }
Beispiel #5
0
        /*
        Replace font elements by span elements, deleting
        the font element's attributes and replacing them
        by a single style attribute.
        */
        private bool Font2Span(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagFont)
            {
                if (lexer.Options.DropFontTags)
                {
                    DiscardContainer(node, pnode);
                    return false;
                }

                /* if FONT is only child of parent element then leave alone */
                if (node.Parent.Content == node && node.Next == null)
                {
                    return false;
                }

                AddFontStyles(node, node.Attributes);

                /* extract style attribute and free the rest */
                AttVal av = node.Attributes;
                AttVal style = null;

                while (av != null)
                {
                    AttVal next = av.Next;

                    if (av.Attribute.Equals("style"))
                    {
                        av.Next = null;
                        style = av;
                    }

                    av = next;
                }

                node.Attributes = style;

                node.Tag = _tt.TagSpan;
                node.Element = "span";

                return true;
            }

            return false;
        }
Beispiel #6
0
        /* used to strip font start and end tags */
        private void DiscardContainer(Node element, MutableObject pnode)
        {
            Node parent = element.Parent;

            if (element.Content != null)
            {
                element.Last.Next = element.Next;

                if (element.Next != null)
                {
                    element.Next.Prev = element.Last;
                    element.Last.Next = element.Next;
                }
                else
                {
                    parent.Last = element.Last;
                }

                if (element.Prev != null)
                {
                    element.Content.Prev = element.Prev;
                    element.Prev.Next = element.Content;
                }
                else
                {
                    parent.Content = element.Content;
                }

                Node node;
                for (node = element.Content; node != null; node = node.Next)
                {
                    node.Parent = parent;
                }

                pnode.Object = element.Content;
            }
            else
            {
                if (element.Next != null)
                {
                    element.Next.Prev = element.Prev;
                }
                else
                {
                    parent.Last = element.Prev;
                }

                if (element.Prev != null)
                {
                    element.Prev.Next = element.Next;
                }
                else
                {
                    parent.Content = element.Next;
                }

                pnode.Object = element.Next;
            }

            element.Next = null;
            element.Content = null;
        }
Beispiel #7
0
        /* swallows closing '>' */
        public virtual AttVal ParseAttrs(MutableBoolean isempty)
        {
            var delim = new MutableInteger();
            var asp = new MutableObject();
            var php = new MutableObject();

            AttVal list = null;

            while (!EndOfInput())
            {
                string attribute = ParseAttribute(isempty, asp, php);

                AttVal av;
                if (attribute == null)
                {
                    /* check if attributes are created by ASP markup */
                    if (asp.Object != null)
                    {
                        av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null);
                        list = av;
                        continue;
                    }

                    /* check if attributes are created by PHP markup */
                    if (php.Object != null)
                    {
                        av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null);
                        list = av;
                        continue;
                    }

                    break;
                }

                string val = ParseValue(attribute, false, isempty, delim);

                if (IsValidAttrName(attribute))
                {
                    av = new AttVal(list, null, null, null, delim.Val, attribute, val);
                    av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av);
                    list = av;
                }
                else
                {
                    //av = new AttVal(null, null, null, null, 0, attribute, val);
                    Report.AttrError(this, Token, val, Report.BAD_ATTRIBUTE_VALUE);
                }
            }

            return list;
        }
Beispiel #8
0
        /* consumes the '>' terminating start tags */
        public virtual string ParseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php)
        {
            int start;
            // int len = 0;   Removed by BUGFIX for 126265
            short map;
            int c;

            asp.Object = null; /* clear asp pointer */
            php.Object = null; /* clear php pointer */
            /* skip white space before the attribute */

            for (;;)
            {
                c = Input.ReadChar();
                if (c == '/')
                {
                    c = Input.ReadChar();
                    if (c == '>')
                    {
                        isempty.Val = true;
                        return null;
                    }

                    Input.UngetChar(c);
                    c = '/';
                    break;
                }

                if (c == '>')
                {
                    return null;
                }

                if (c == '<')
                {
                    c = Input.ReadChar();

                    if (c == '%')
                    {
                        asp.Object = ParseAsp();
                        return null;
                    }
                    if (c == '?')
                    {
                        php.Object = ParsePhp();
                        return null;
                    }

                    Input.UngetChar(c);
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_GT);
                    return null;
                }

                if (c == '"' || c == '\'')
                {
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_QUOTEMARK);
                    continue;
                }

                if (c == StreamIn.END_OF_STREAM)
                {
                    Report.AttrError(this, Token, null, Report.UNEXPECTED_END_OF_FILE);
                    Input.UngetChar(c);
                    return null;
                }

                map = Map((char) c);

                if ((map & WHITE) == 0)
                {
                    break;
                }
            }

            start = Lexsize;

            for (;;)
            {
                /* but push back '=' for parseValue() */
                if (c == '=' || c == '>')
                {
                    Input.UngetChar(c);
                    break;
                }

                if (c == '<' || c == StreamIn.END_OF_STREAM)
                {
                    Input.UngetChar(c);
                    break;
                }

                map = Map((char) c);

                if ((map & WHITE) != 0)
                    break;

                /* what should be done about non-namechar characters? */
                /* currently these are incorporated into the attr name */

                if (!Options.XmlTags && (map & UPPERCASE) != 0)
                {
                    c += ('a' - 'A');
                }

                //  ++len;    Removed by BUGFIX for 126265
                AddCharToLexer(c);

                c = Input.ReadChar();
            }

            // Following line added by GLP to fix BUG 126265.  This is a temporary comment
            // and should be removed when Tidy is fixed.
            int len = Lexsize - start;
            string attr = (len > 0 ? GetString(Lexbuf, start, len) : null);
            Lexsize = start;

            return attr;
        }