Пример #1
0
        /*
        Applies all matching rules to a node.
        */
        private Node CleanNode(Lexer lexer, Node node)
        {
            Node next = null;
            MutableObject o = new MutableObject();
            bool b = false;

            for (next = node; node.IsElement; node = next)
            {
                o.Object = next;

                b = Dir2Div(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = NestedList(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Center2Div(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = MergeDivs(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = BlockStyle(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = InlineStyle(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                b = Font2Span(lexer, node, o);
                next = (Node) o.Object;
                if (b)
                {
                    continue;
                }

                break;
            }

            return next;
        }
Пример #2
0
        /*
        Symptom: the only child of a block-level element is a
        presentation element such as B, I or FONT

        Action: add style "font-weight: bold" to the block and
        strip the <b> element, leaving its children.

        example:

        <p>
        <b><font face="Arial" size="6">Draft Recommended Practice</font></b>
        </p>

        becomes:

        <p style="font-weight: bold; font-family: Arial; font-size: 6">
        Draft Recommended Practice
        </p>

        This code also replaces the align attribute by a style attribute.
        However, to avoid CSS problems with Navigator 4, this isn't done
        for the elements: caption, tr and table
        */
        private bool BlockStyle(Lexer lexer, Node node, MutableObject pnode)
        {
            Node child;

            if ((node.Tag.Model & (ContentModel.Block | ContentModel.List | ContentModel.Deflist | ContentModel.Table)) != 0)
            {
                if (node.Tag != _tt.tagTable && node.Tag != _tt.TagTr && node.Tag != _tt.TagLi)
                {
                    /* check for align attribute */
                    if (node.Tag != _tt.TagCaption)
                    {
                        TextAlign(lexer, node);
                    }

                    child = node.Content;

                    if (child == null)
                    {
                        return false;
                    }

                    /* check child has no peers */

                    if (child.Next != null)
                    {
                        return false;
                    }

                    if (child.Tag == _tt.TagB)
                    {
                        MergeStyles(node, child);
                        AddStyleProperty(node, "font-weight: bold");
                        StripOnlyChild(node);
                        return true;
                    }

                    if (child.Tag == _tt.TagI)
                    {
                        MergeStyles(node, child);
                        AddStyleProperty(node, "font-style: italic");
                        StripOnlyChild(node);
                        return true;
                    }

                    if (child.Tag == _tt.TagFont)
                    {
                        MergeStyles(node, child);
                        AddFontStyles(node, child.Attributes);
                        StripOnlyChild(node);
                        return true;
                    }
                }
            }

            return false;
        }
Пример #3
0
        /*
        Symptom: <center>
        Action: replace <center> by <div style="text-align: center">
        */
        private bool Center2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            if (node.Tag == _tt.TagCenter)
            {
                if (lexer.Options.DropFontTags)
                {
                    if (node.Content != null)
                    {
                        Node last = node.Last;
                        Node parent = node.Parent;

                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");

                        if (last.Next != null)
                        {
                            last.Next.Prev = node;
                        }

                        node.Next = last.Next;
                        last.Next = node;
                        node.Prev = last;

                        if (parent.Last == last)
                        {
                            parent.Last = node;
                        }

                        node.Parent = parent;
                    }
                    else
                    {
                        Node prev = node.Prev;
                        Node next = node.Next;
                        Node parent = node.Parent;
                        DiscardContainer(node, pnode);

                        node = lexer.InferredTag("br");
                        node.Next = next;
                        node.Prev = prev;
                        node.Parent = parent;

                        if (next != null)
                        {
                            next.Prev = node;
                        }
                        else
                        {
                            parent.Last = node;
                        }

                        if (prev != null)
                        {
                            prev.Next = node;
                        }
                        else
                        {
                            parent.Content = node;
                        }
                    }

                    return true;
                }
                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "text-align: center");
                return true;
            }

            return false;
        }
Пример #4
0
        /*
        Symptom: <ul><li><ul>...</ul></li></ul>
        Action: discard outer list
        */
        private bool NestedList(Lexer lexer, Node node, MutableObject pnode)
        {
            Node child, list;

            if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl)
            {
                child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                list = child.Content;

                if (list == null)
                {
                    return false;
                }

                if (list.Tag != node.Tag)
                {
                    return false;
                }

                pnode.Object = node.Next;

                /* move inner list node into position of outer node */
                list.Prev = node.Prev;
                list.Next = node.Next;
                list.Parent = node.Parent;
                FixNodeLinks(list);

                /* get rid of outer ul and its li */
                child.Content = null;
                node.Content = null;
                node.Next = null;

                /*
                If prev node was a list the chances are this node
                should be appended to that list. Word has no way of
                recognizing nested lists and just uses indents
                */

                if (list.Prev != null)
                {
                    node = list;
                    list = node.Prev;

                    if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl)
                    {
                        list.Next = node.Next;

                        if (list.Next != null)
                        {
                            list.Next.Prev = list;
                        }

                        child = list.Last; /* <li> */

                        node.Parent = child;
                        node.Next = null;
                        node.Prev = child.Last;
                        FixNodeLinks(node);
                    }
                }

                CleanNode(lexer, node);
                return true;
            }

            return false;
        }
Пример #5
0
        /* simplifies <b><b> ... </b> ...</b> etc. */
        public virtual void NestedEmphasis(Node node)
        {
            MutableObject o = new MutableObject();
            Node next;

            while (node != null)
            {
                next = node.Next;

                if ((node.Tag == _tt.TagB || node.Tag == _tt.TagI) && node.Parent != null && node.Parent.Tag == node.Tag)
                {
                    /* strip redundant inner element */
                    o.Object = next;
                    DiscardContainer(node, o);
                    next = (Node) o.Object;
                    node = next;
                    continue;
                }

                if (node.Content != null)
                {
                    NestedEmphasis(node.Content);
                }

                node = next;
            }
        }
Пример #6
0
        /* the only child of table cell or an inline element such as em */
        private bool InlineStyle(Lexer lexer, Node node, MutableObject pnode)
        {
            Node child;

            if (node.Tag != _tt.TagFont && (node.Tag.Model & (ContentModel.Inline | ContentModel.Row)) != 0)
            {
                child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                if (child.Tag == _tt.TagB && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-weight: bold");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagI && lexer.Options.LogicalEmphasis)
                {
                    MergeStyles(node, child);
                    AddStyleProperty(node, "font-style: italic");
                    StripOnlyChild(node);
                    return true;
                }

                if (child.Tag == _tt.TagFont)
                {
                    MergeStyles(node, child);
                    AddFontStyles(node, child.Attributes);
                    StripOnlyChild(node);
                    return true;
                }
            }

            return false;
        }
Пример #7
0
        /*
        Symptom <div><div>...</div></div>
        Action: merge the two divs

        This is useful after nested <dir>s used by Word
        for indenting have been converted to <div>s
        */
        private bool MergeDivs(Lexer lexer, Node node, MutableObject pnode)
        {
            Node child;

            if (node.Tag != _tt.TagDiv)
            {
                return false;
            }

            child = node.Content;

            if (child == null)
            {
                return false;
            }

            if (child.Tag != _tt.TagDiv)
            {
                return false;
            }

            if (child.Next != null)
            {
                return false;
            }

            MergeStyles(node, child);
            StripOnlyChild(node);
            return true;
        }
Пример #8
0
        /*
        Replace font elements by span elements, deleting
        the font element's attributes and replacing them
        by a single style attribute.
        */
        private bool Font2Span(Lexer lexer, Node node, MutableObject pnode)
        {
            AttVal av, style, next;

            if (node.Tag == _tt.TagFont)
            {
                if (lexer.Options.DropFontTags)
                {
                    DiscardContainer(node, pnode);
                    return false;
                }

                /* if FONT is only child of parent element then leave alone */
                if (node.Parent.Content == node && node.Next == null)
                {
                    return false;
                }

                AddFontStyles(node, node.Attributes);

                /* extract style attribute and free the rest */
                av = node.Attributes;
                style = null;

                while (av != null)
                {
                    next = av.Next;

                    if (av.Attribute.Equals("style"))
                    {
                        av.Next = null;
                        style = av;
                    }

                    av = next;
                }

                node.Attributes = style;

                node.Tag = _tt.TagSpan;
                node.Element = "span";

                return true;
            }

            return false;
        }
Пример #9
0
        /* used to strip font start and end tags */
        private void DiscardContainer(Node element, MutableObject pnode)
        {
            Node node;
            Node parent = element.Parent;

            if (element.Content != null)
            {
                element.Last.Next = element.Next;

                if (element.Next != null)
                {
                    element.Next.Prev = element.Last;
                    element.Last.Next = element.Next;
                }
                else
                {
                    parent.Last = element.Last;
                }

                if (element.Prev != null)
                {
                    element.Content.Prev = element.Prev;
                    element.Prev.Next = element.Content;
                }
                else
                {
                    parent.Content = element.Content;
                }

                for (node = element.Content; node != null; node = node.Next)
                {
                    node.Parent = parent;
                }

                pnode.Object = element.Content;
            }
            else
            {
                if (element.Next != null)
                {
                    element.Next.Prev = element.Prev;
                }
                else
                {
                    parent.Last = element.Prev;
                }

                if (element.Prev != null)
                {
                    element.Prev.Next = element.Next;
                }
                else
                {
                    parent.Content = element.Next;
                }

                pnode.Object = element.Next;
            }

            element.Next = null;
            element.Content = null;
        }
Пример #10
0
        /*
        The clean up rules use the pnode argument to return the
        next node when the orignal node has been deleted
        */
        /*
        Symptom: <dir> <li> where <li> is only child
        Action: coerce <dir> <li> to <div> with indent.
        */
        private bool Dir2Div(Lexer lexer, Node node, MutableObject pnode)
        {
            Node child;

            if (node.Tag == _tt.TagDir || node.Tag == _tt.TagUl || node.Tag == _tt.TagOl)
            {
                child = node.Content;

                if (child == null)
                {
                    return false;
                }

                /* check child has no peers */

                if (child.Next != null)
                {
                    return false;
                }

                if (child.Tag != _tt.TagLi)
                {
                    return false;
                }

                if (!child.Isimplicit)
                {
                    return false;
                }

                /* coerce dir to div */

                node.Tag = _tt.TagDiv;
                node.Element = "div";
                AddStyleProperty(node, "margin-left: 2em");
                StripOnlyChild(node);
                return true;
            }

            return false;
        }
		/* swallows closing '>' */
		
		public virtual AttVal ParseAttrs(MutableBoolean isempty)
		{
			AttVal av, list;
			string attribute, val;
			MutableInteger delim = new MutableInteger();
			MutableObject asp = new MutableObject();
			MutableObject php = new MutableObject();
			
			list = null;
			
			while (!EndOfInput())
			{
				attribute = ParseAttribute(isempty, asp, php);
				
				if (attribute == null)
				{
					/* check if attributes are created by ASP markup */
					if (asp.Object != null)
					{
						av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null);
						list = av;
						continue;
					}
					
					/* check if attributes are created by PHP markup */
					if (php.Object != null)
					{
						av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null);
						list = av;
						continue;
					}
					
					break;
				}
				
				val = ParseValue(attribute, false, isempty, delim);
				
				if (attribute != null && IsValidAttrName(attribute))
				{
					av = new AttVal(list, null, null, null, delim.Val, attribute, val);
					av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av);
					list = av;
				}
				else
				{
					av = new AttVal(null, null, null, null, 0, attribute, val);
					Report.AttrError(this, token, val, Report.BAD_ATTRIBUTE_VALUE);
				}
			}
			
			return list;
		}
		/* consumes the '>' terminating start tags */
		public virtual string ParseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php)
		{
			int start = 0;
			// int len = 0;   Removed by BUGFIX for 126265
			short map;
			string attr;
			int c = 0;
			
			asp.Object = null; /* clear asp pointer */
			php.Object = null; /* clear php pointer */
			/* skip white space before the attribute */
			
			for (;;)
			{
				c = input.ReadChar();
				if (c == '/')
				{
					c = input.ReadChar();
					if (c == '>')
					{
						isempty.Val = true;
						return null;
					}
					
					input.UngetChar(c);
					c = '/';
					break;
				}
				
				if (c == '>')
				{
					return null;
				}
				
				if (c == '<')
				{
					c = input.ReadChar();
					
					if (c == '%')
					{
						asp.Object = ParseAsp();
						return null;
					}
					else if (c == '?')
					{
						php.Object = ParsePhp();
						return null;
					}
					
					input.UngetChar(c);
					Report.AttrError(this, token, null, Report.UNEXPECTED_GT);
					return null;
				}
				
				if (c == '"' || c == '\'')
				{
					Report.AttrError(this, token, null, Report.UNEXPECTED_QUOTEMARK);
					continue;
				}
				
				if (c == StreamIn.EndOfStream)
				{
					Report.AttrError(this, token, null, Report.UNEXPECTED_END_OF_FILE);
					input.UngetChar(c);
					return null;
				}
				
				map = MAP((char) c);
				
				if ((map & WHITE) == 0)
				{
					break;
				}
			}
			
			start = lexsize;
			
			for (;;)
			{
				/* but push back '=' for parseValue() */
				if (c == '=' || c == '>')
				{
					input.UngetChar(c);
					break;
				}
				
				if (c == '<' || c == StreamIn.EndOfStream)
				{
					input.UngetChar(c);
					break;
				}
				
				map = MAP((char) c);
				
				if ((map & WHITE) != 0)
					break;
				
				/* what should be done about non-namechar characters? */
				/* currently these are incorporated into the attr name */
				
				if (!Options.XmlTags && (map & UPPERCASE) != 0)
				{
					c += (int) ('a' - 'A');
				}
				
				//  ++len;    Removed by BUGFIX for 126265 
				AddCharToLexer(c);
				
				c = input.ReadChar();
			}
			
			// Following line added by GLP to fix BUG 126265.  This is a temporary comment
			// and should be removed when Tidy is fixed.
			int len = lexsize - start;
			attr = (len > 0?GetString(lexbuf, start, len):null);
			lexsize = start;
			
			return attr;
		}