예제 #1
0
        public virtual Node FindBody(TagTable tt)
        {
            Node node;

            node = _content;

            while (node != null && node.Tag != tt.TagHtml)
            {
                node = node.Next;
            }

            if (node == null)
            {
                return(null);
            }

            node = node.Content;

            while (node != null && node.Tag != tt.TagBody)
            {
                node = node.Next;
            }

            return(node);
        }
예제 #2
0
        /*
         * unexpected content in table row is moved to just before
         * the table in accordance with Netscape and IE. This code
         * assumes that node hasn't been inserted into the row.
         */
        public static void MoveBeforeTable(Node row, Node node, TagTable tt)
        {
            Node table;

            /* first find the table element */
            for (table = row.Parent; table != null; table = table.Parent)
            {
                if (table.Tag == tt.tagTable)
                {
                    if (table.Parent.Content == table)
                    {
                        table.Parent.Content = node;
                    }

                    node.Prev   = table.Prev;
                    node.Next   = table;
                    table.Prev  = node;
                    node.Parent = table.Parent;

                    if (node.Prev != null)
                    {
                        node.Prev.Next = node;
                    }

                    break;
                }
            }
        }
예제 #3
0
        public Tidy()
        {
            _options = new TidyOptions();

            AttributeTable at = AttributeTable.DefaultAttributeTable;

            if (at == null)
            {
                return;
            }

            TagTable tt = new TagTable();

            if (tt == null)
            {
                return;
            }

            tt.Options  = _options;
            _options.tt = tt;

            EntityTable et = EntityTable.DefaultEntityTable;

            if (et == null)
            {
                return;
            }
        }
예제 #4
0
        /* find html element */
        public virtual Node FindHtml(TagTable tt)
        {
            Node node;

            for (node = _content; node != null && node.Tag != tt.TagHtml; node = node.Next)
            {
                ;
            }

            return(node);
        }
예제 #5
0
        /*
         * the doctype has been found after other tags,
         * and needs moving to before the html element
         */
        public static void InsertDocType(Lexer lexer, Node element, Node doctype)
        {
            TagTable tt = lexer.Options.tt;

            Report.Warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS);

            while (element.Tag != tt.TagHtml)
            {
                element = element.Parent;
            }

            InsertNodeBeforeElement(element, doctype);
        }
예제 #6
0
        public virtual Node FindHead(TagTable tt)
        {
            Node node;

            node = FindHtml(tt);

            if (node != null)
            {
                for (node = node.Content; node != null && node.Tag != tt.TagHead; node = node.Next)
                {
                    ;
                }
            }

            return(node);
        }
예제 #7
0
        /*
         * Move initial and trailing space out.
         * This routine maps:
         *
         * hello<em> world</em>
         * to
         * hello <em>world</em>
         * and
         * <em>hello </em><strong>world</strong>
         * to
         * <em>hello</em> <strong>world</strong>
         */
        public static void TrimSpaces(Lexer lexer, Node element)
        {
            Node     text = element.Content;
            TagTable tt   = lexer.Options.tt;

            if (text != null && text.Type == Node.TextNode && element.Tag != tt.TagPre)
            {
                TrimInitialSpace(lexer, element, text);
            }

            text = element.Last;

            if (text != null && text.Type == Node.TextNode)
            {
                TrimTrailingSpace(lexer, element, text);
            }
        }
예제 #8
0
파일: AttVal.cs 프로젝트: r-win/TidyNet
        /* ignore unknown attributes for proprietary elements */
        public virtual Attribute CheckAttribute(Lexer lexer, Node node)
        {
            TagTable tt = lexer.Options.tt;

            if (Asp == null && Php == null)
            {
                CheckUniqueAttribute(lexer, node);
            }

            Attribute attribute = Dict;

            if (attribute != null)
            {
                /* title is vers 2.0 for A and LINK otherwise vers 4.0 */
                if (attribute == AttributeTable.AttrTitle && (node.Tag == tt.TagA || node.Tag == tt.TagLink))
                {
                    lexer.versions &= HtmlVersion.All;
                }
                else if ((attribute.Versions & HtmlVersion.Xml) != 0)
                {
                    if (!(lexer.Options.XmlTags || lexer.Options.XmlOut))
                    {
                        Report.AttrError(lexer, node, Attribute, Report.XML_ATTRIBUTE_VALUE);
                    }
                }
                else
                {
                    lexer.versions &= attribute.Versions;
                }

                if (attribute.AttrCheck != null)
                {
                    attribute.AttrCheck.Check(lexer, node, this);
                }
            }
            else if (!lexer.Options.XmlTags && !(node.Tag == null) && _asp == null && !(node.Tag != null && ((node.Tag.Versions & HtmlVersion.Proprietary) != HtmlVersion.Unknown)))
            {
                Report.AttrError(lexer, node, Attribute, Report.UNKNOWN_ATTRIBUTE);
            }

            return(attribute);
        }
예제 #9
0
        public static void TrimEmptyElement(Lexer lexer, Node element)
        {
            TagTable tt = lexer.Options.tt;

            if (lexer.CanPrune(element))
            {
                if (element.Type != TextNode)
                {
                    Report.Warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT);
                }

                DiscardElement(element);
            }
            else if (element.Tag == tt.TagP && element.Content == null)
            {
                /* replace <p></p> by <br><br> to preserve formatting */
                Node node = lexer.InferredTag("br");
                Node.CoerceNode(lexer, element, tt.TagBr);
                Node.InsertNodeAfterElement(element, node);
            }
        }
예제 #10
0
		public Node(short type, byte[] textarray, int start, int end, string element, TagTable tt)
		{
			_parent = null;
			_prev = null;
			_next = null;
			_last = null;
			_start = start;
			_end = end;
			_textarray = textarray;
			_type = type;
			_closed = false;
			_isimplicit = false;
			_linebreak = false;
			_was = null;
			_tag = null;
			_element = element;
			_attributes = null;
			_content = null;
			if (type == StartTag || type == StartEndTag || type == EndTag)
			{
				tt.FindTag(this);
			}
		}
예제 #11
0
        /*
         * This maps
         * <em>hello </em><strong>world</strong>
         * to
         * <em>hello</em> <strong>world</strong>
         *
         * If last child of element is a text node
         * then trim trailing white space character
         * moving it to after element's end tag.
         */
        public static void TrimTrailingSpace(Lexer lexer, Node element, Node last)
        {
            byte     c;
            TagTable tt = lexer.Options.tt;

            if (last != null && last.Type == Node.TextNode && last.End > last.Start)
            {
                c = lexer.lexbuf[last.End - 1];

                if (c == 160 || c == (byte)' ')
                {
                    /* take care with <td>&nbsp;</td> */
                    if (element.Tag == tt.TagTd || element.Tag == tt.TagTh)
                    {
                        if (last.End > last.Start + 1)
                        {
                            last.End -= 1;
                        }
                    }
                    else
                    {
                        last.End -= 1;

                        if (((element.Tag.Model & ContentModel.Inline) != 0) && !((element.Tag.Model & ContentModel.Field) != 0))
                        {
                            lexer.insertspace = true;
                        }

                        /* if empty string then delete from parse tree */
                        if (last.Start == last.End)
                        {
                            TrimEmptyElement(lexer, last);
                        }
                    }
                }
            }
        }
예제 #12
0
파일: Clean.cs 프로젝트: AlfieJ/TidyNet
 public Clean(TagTable tt)
 {
     _tt = tt;
 }
예제 #13
0
파일: Clean.cs 프로젝트: AlfieJ/TidyNet
        public virtual bool IsWord2000(Node root, TagTable tt)
        {
            Node html = root.FindHtml(tt);

            return (html != null && html.GetAttrByName("xmlns:o") != null);
        }
예제 #14
0
        public Tidy()
        {
            _options = new TidyOptions();

            AttributeTable at = AttributeTable.DefaultAttributeTable;
            if (at == null)
            {
                return;
            }

            TagTable tt = new TagTable();
            if (tt == null)
            {
                return;
            }

            tt.Options = _options;
            _options.tt = tt;

            EntityTable et = EntityTable.DefaultEntityTable;
            if (et == null)
            {
                return;
            }
        }
예제 #15
0
        public static void Warning(Lexer lexer, Node element, Node node, short code)
        {
            TagTable tt = lexer.Options.tt;

            /* keep quiet after 6 errors */
            if (lexer.messages == null)
            {
                return;
            }
            if (lexer.messages.Errors > 6)
            {
                return;
            }

            /* on end of file adjust reported position to end of input */
            if (code == UNEXPECTED_END_OF_FILE)
            {
                lexer.lines   = lexer.input.curline;
                lexer.columns = lexer.input.curcol;
            }

            if (code == MISSING_ENDTAG_FOR)
            {
                AddMessage(lexer, String.Format(GetMessage("missing_endtag_for"), element.Element), MessageLevel.Warning);
            }
            else if (code == MISSING_ENDTAG_BEFORE)
            {
                AddMessage(lexer, String.Format(GetMessage("missing_endtag_before"), element.Element, Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == DISCARDING_UNEXPECTED)
            {
                AddMessage(lexer, String.Format(GetMessage("discarding_unexpected"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == NESTED_EMPHASIS)
            {
                AddMessage(lexer, String.Format(GetMessage("nested_emphasis"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == COERCE_TO_ENDTAG)
            {
                AddMessage(lexer, String.Format(GetMessage("coerce_to_endtag"), element.Element), MessageLevel.Warning);
            }
            else if (code == NON_MATCHING_ENDTAG)
            {
                AddMessage(lexer, String.Format(GetMessage("non_matching_endtag_1"), Tag(lexer, node), element.Element), MessageLevel.Warning);
            }
            else if (code == TAG_NOT_ALLOWED_IN)
            {
                AddMessage(lexer, String.Format(GetMessage("tag_not_allowed_in"), Tag(lexer, node), element.Element), MessageLevel.Warning);
            }
            else if (code == DOCTYPE_AFTER_TAGS)
            {
                AddMessage(lexer, GetMessage("doctype_after_tags"), MessageLevel.Warning);
            }
            else if (code == MISSING_STARTTAG)
            {
                AddMessage(lexer, String.Format(GetMessage("missing_starttag"), node.Element), MessageLevel.Warning);
            }
            else if (code == UNEXPECTED_ENDTAG)
            {
                string message;
                if (element != null)
                {
                    message = String.Format(GetMessage("unexpected_endtag_suffix"), node.Element, element.Element);
                }
                else
                {
                    message = String.Format(GetMessage("unexpected_endtag"), node.Element);
                }

                AddMessage(lexer, message, MessageLevel.Warning);
            }
            else if (code == TOO_MANY_ELEMENTS)
            {
                string message;
                if (element != null)
                {
                    message = String.Format(GetMessage("too_many_elements_suffix"), node.Element, element.Element);
                }
                else
                {
                    message = String.Format(GetMessage("too_many_elements"), node.Element);
                }

                AddMessage(lexer, message, MessageLevel.Warning);
            }
            else if (code == USING_BR_INPLACE_OF)
            {
                AddMessage(lexer, GetMessage("using_br_inplace_of") + Tag(lexer, node), MessageLevel.Warning);
            }
            else if (code == INSERTING_TAG)
            {
                AddMessage(lexer, String.Format(GetMessage("inserting_tag"), node.Element), MessageLevel.Warning);
            }
            else if (code == CANT_BE_NESTED)
            {
                AddMessage(lexer, String.Format(GetMessage("cant_be_nested"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == PROPRIETARY_ELEMENT)
            {
                AddMessage(lexer, String.Format(GetMessage("proprietary_element"), Tag(lexer, node)), MessageLevel.Warning);

                if (node.Tag == tt.TagLayer)
                {
                    lexer.badLayout |= USING_LAYER;
                }
                else if (node.Tag == tt.TagSpacer)
                {
                    lexer.badLayout |= USING_SPACER;
                }
                else if (node.Tag == tt.TagNobr)
                {
                    lexer.badLayout |= USING_NOBR;
                }
            }
            else if (code == OBSOLETE_ELEMENT)
            {
                string message;
                if (element.Tag != null && (element.Tag.Model & ContentModel.Obsolete) != 0)
                {
                    message = String.Format(GetMessage("obsolete_element"), Tag(lexer, node), Tag(lexer, node));
                }
                else
                {
                    message = String.Format(GetMessage("replacing_element"), Tag(lexer, node), Tag(lexer, node));
                }

                AddMessage(lexer, message, MessageLevel.Warning);
            }
            else if (code == TRIM_EMPTY_ELEMENT)
            {
                AddMessage(lexer, String.Format(GetMessage("trim_empty_element"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == MISSING_TITLE_ELEMENT)
            {
                AddMessage(lexer, GetMessage("missing_title_element"), MessageLevel.Warning);
            }
            else if (code == ILLEGAL_NESTING)
            {
                AddMessage(lexer, String.Format(GetMessage("illegal_nesting"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == NOFRAMES_CONTENT)
            {
                AddMessage(lexer, String.Format(GetMessage("noframes_content"), Tag(lexer, node)), MessageLevel.Warning);
            }
            else if (code == INCONSISTENT_VERSION)
            {
                AddMessage(lexer, GetMessage("inconsistent_version"), MessageLevel.Warning);
            }
            else if (code == MALFORMED_DOCTYPE)
            {
                AddMessage(lexer, GetMessage("malformed_doctype"), MessageLevel.Warning);
            }
            else if (code == CONTENT_AFTER_BODY)
            {
                AddMessage(lexer, GetMessage("content_after_body"), MessageLevel.Warning);
            }
            else if (code == MALFORMED_COMMENT)
            {
                AddMessage(lexer, GetMessage("malformed_comment"), MessageLevel.Warning);
            }
            else if (code == BAD_COMMENT_CHARS)
            {
                AddMessage(lexer, GetMessage("bad_comment_chars"), MessageLevel.Warning);
            }
            else if (code == BAD_XML_COMMENT)
            {
                AddMessage(lexer, GetMessage("bad_xml_comment"), MessageLevel.Warning);
            }
            else if (code == BAD_CDATA_CONTENT)
            {
                AddMessage(lexer, GetMessage("bad_cdata_content"), MessageLevel.Warning);
            }
            else if (code == INCONSISTENT_NAMESPACE)
            {
                AddMessage(lexer, GetMessage("inconsistent_namespace"), MessageLevel.Warning);
            }
            else if (code == DTYPE_NOT_UPPER_CASE)
            {
                AddMessage(lexer, GetMessage("dtype_not_upper_case"), MessageLevel.Warning);
            }
            else if (code == UNEXPECTED_END_OF_FILE)
            {
                AddMessage(lexer, GetMessage("unexpected_end_of_file") + Tag(lexer, node), MessageLevel.Warning);
            }
        }
		/// <summary>  Indicates whether or not whitespace should be preserved for this element.
		/// If an <code>xml:space</code> attribute is found, then if the attribute value is
		/// <code>preserve</code>, returns <code>true</code>.  For any other value, returns
		/// <code>false</code>.  If an <code>xml:space</code> attribute was <em>not</em>
		/// found, then the following element names result in a return value of <code>true:
		/// pre, script, style,</code> and <code>xsl:text</code>.  Finally, if a
		/// <code>TagTable</code> was passed in and the element appears as the "pre" element
		/// in the <code>TagTable</code>, then <code>true</code> will be returned.
		/// Otherwise, <code>false</code> is returned.
		/// </summary>
		/// <param name="element">The <code>Node</code> to test to see if whitespace should be
		/// preserved.
		/// </param>
		/// <param name="tt">The <code>TagTable</code> to test for the <code>getNodePre()</code>
		/// function.  This may be <code>null</code>, in which case this test
		/// is bypassed.
		/// </param>
		/// <returns> <code>true</code> or <code>false</code>, as explained above.
		/// 
		/// </returns>
		
		public static bool XMLPreserveWhiteSpace(Node element, TagTable tt)
		{
			AttVal attribute;
			
			/* search attributes for xml:space */
			for (attribute = element.Attributes; attribute != null; attribute = attribute.Next)
			{
				if (attribute.Attribute.Equals("xml:space"))
				{
					if (attribute.Val.Equals("preserve"))
					{
						return true;
					}
					
					return false;
				}
			}
			
			/* kludge for html docs without explicit xml:space attribute */
			if (String.Compare(element.Element, "pre") == 0 || String.Compare(element.Element, "script") == 0 || String.Compare(element.Element, "style") == 0)
			{
				return true;
			}
			
			if ((tt != null) && (tt.FindParser(element) == ParsePre))
			{
				return true;
			}
			
			/* kludge for XSL docs */
			if (String.Compare(element.Element, "xsl:text") == 0)
			{
				return true;
			}
			
			return false;
		}
예제 #17
0
 protected internal DomDocumentImpl(Node Adaptee) : base(Adaptee)
 {
     tt = new TagTable();
 }
예제 #18
0
파일: Node.cs 프로젝트: AlfieJ/TidyNet
        public virtual Node FindBody(TagTable tt)
        {
            Node node;

            node = _content;

            while (node != null && node.Tag != tt.TagHtml)
            {
                node = node.Next;
            }

            if (node == null)
            {
                return null;
            }

            node = node.Content;

            while (node != null && node.Tag != tt.TagBody)
            {
                node = node.Next;
            }

            return node;
        }
예제 #19
0
파일: Node.cs 프로젝트: AlfieJ/TidyNet
        /*
        unexpected content in table row is moved to just before
        the table in accordance with Netscape and IE. This code
        assumes that node hasn't been inserted into the row.
        */
        public static void MoveBeforeTable(Node row, Node node, TagTable tt)
        {
            Node table;

            /* first find the table element */
            for (table = row.Parent; table != null; table = table.Parent)
            {
                if (table.Tag == tt.tagTable)
                {
                    if (table.Parent.Content == table)
                    {
                        table.Parent.Content = node;
                    }

                    node.Prev = table.Prev;
                    node.Next = table;
                    table.Prev = node;
                    node.Parent = table.Parent;

                    if (node.Prev != null)
                    {
                        node.Prev.Next = node;
                    }

                    break;
                }
            }
        }
예제 #20
0
파일: Node.cs 프로젝트: AlfieJ/TidyNet
        /* find html element */
        public virtual Node FindHtml(TagTable tt)
        {
            Node node;

            for (node = _content; node != null && node.Tag != tt.TagHtml; node = node.Next)
            {
                ;
            }

            return node;
        }
예제 #21
0
파일: Node.cs 프로젝트: AlfieJ/TidyNet
        public virtual Node FindHead(TagTable tt)
        {
            Node node;

            node = FindHtml(tt);

            if (node != null)
            {
                for (node = node.Content; node != null && node.Tag != tt.TagHead; node = node.Next)
                {
                    ;
                }
            }

            return node;
        }
예제 #22
0
 protected internal DomDocumentImpl(Node Adaptee)
     : base(Adaptee)
 {
     tt = new TagTable();
 }
예제 #23
0
 public Node(short type, byte[] textarray, int start, int end, string element, TagTable tt)
 {
     _parent     = null;
     _prev       = null;
     _next       = null;
     _last       = null;
     _start      = start;
     _end        = end;
     _textarray  = textarray;
     _type       = type;
     _closed     = false;
     _isimplicit = false;
     _linebreak  = false;
     _was        = null;
     _tag        = null;
     _element    = element;
     _attributes = null;
     _content    = null;
     if (type == StartTag || type == StartEndTag || type == EndTag)
     {
         tt.FindTag(this);
     }
 }