public virtual Node FindBody(TagTable tt) { Node node; node = _content; while (node != null && node.Tag != tt.TagHtml) { node = node.Next; } if (node == null) { return(null); } node = node.Content; while (node != null && node.Tag != tt.TagBody) { node = node.Next; } return(node); }
/* * unexpected content in table row is moved to just before * the table in accordance with Netscape and IE. This code * assumes that node hasn't been inserted into the row. */ public static void MoveBeforeTable(Node row, Node node, TagTable tt) { Node table; /* first find the table element */ for (table = row.Parent; table != null; table = table.Parent) { if (table.Tag == tt.tagTable) { if (table.Parent.Content == table) { table.Parent.Content = node; } node.Prev = table.Prev; node.Next = table; table.Prev = node; node.Parent = table.Parent; if (node.Prev != null) { node.Prev.Next = node; } break; } } }
public Tidy() { _options = new TidyOptions(); AttributeTable at = AttributeTable.DefaultAttributeTable; if (at == null) { return; } TagTable tt = new TagTable(); if (tt == null) { return; } tt.Options = _options; _options.tt = tt; EntityTable et = EntityTable.DefaultEntityTable; if (et == null) { return; } }
/* find html element */ public virtual Node FindHtml(TagTable tt) { Node node; for (node = _content; node != null && node.Tag != tt.TagHtml; node = node.Next) { ; } return(node); }
/* * the doctype has been found after other tags, * and needs moving to before the html element */ public static void InsertDocType(Lexer lexer, Node element, Node doctype) { TagTable tt = lexer.Options.tt; Report.Warning(lexer, element, doctype, Report.DOCTYPE_AFTER_TAGS); while (element.Tag != tt.TagHtml) { element = element.Parent; } InsertNodeBeforeElement(element, doctype); }
public virtual Node FindHead(TagTable tt) { Node node; node = FindHtml(tt); if (node != null) { for (node = node.Content; node != null && node.Tag != tt.TagHead; node = node.Next) { ; } } return(node); }
/* * Move initial and trailing space out. * This routine maps: * * hello<em> world</em> * to * hello <em>world</em> * and * <em>hello </em><strong>world</strong> * to * <em>hello</em> <strong>world</strong> */ public static void TrimSpaces(Lexer lexer, Node element) { Node text = element.Content; TagTable tt = lexer.Options.tt; if (text != null && text.Type == Node.TextNode && element.Tag != tt.TagPre) { TrimInitialSpace(lexer, element, text); } text = element.Last; if (text != null && text.Type == Node.TextNode) { TrimTrailingSpace(lexer, element, text); } }
/* ignore unknown attributes for proprietary elements */ public virtual Attribute CheckAttribute(Lexer lexer, Node node) { TagTable tt = lexer.Options.tt; if (Asp == null && Php == null) { CheckUniqueAttribute(lexer, node); } Attribute attribute = Dict; if (attribute != null) { /* title is vers 2.0 for A and LINK otherwise vers 4.0 */ if (attribute == AttributeTable.AttrTitle && (node.Tag == tt.TagA || node.Tag == tt.TagLink)) { lexer.versions &= HtmlVersion.All; } else if ((attribute.Versions & HtmlVersion.Xml) != 0) { if (!(lexer.Options.XmlTags || lexer.Options.XmlOut)) { Report.AttrError(lexer, node, Attribute, Report.XML_ATTRIBUTE_VALUE); } } else { lexer.versions &= attribute.Versions; } if (attribute.AttrCheck != null) { attribute.AttrCheck.Check(lexer, node, this); } } else if (!lexer.Options.XmlTags && !(node.Tag == null) && _asp == null && !(node.Tag != null && ((node.Tag.Versions & HtmlVersion.Proprietary) != HtmlVersion.Unknown))) { Report.AttrError(lexer, node, Attribute, Report.UNKNOWN_ATTRIBUTE); } return(attribute); }
public static void TrimEmptyElement(Lexer lexer, Node element) { TagTable tt = lexer.Options.tt; if (lexer.CanPrune(element)) { if (element.Type != TextNode) { Report.Warning(lexer, element, null, Report.TRIM_EMPTY_ELEMENT); } DiscardElement(element); } else if (element.Tag == tt.TagP && element.Content == null) { /* replace <p></p> by <br><br> to preserve formatting */ Node node = lexer.InferredTag("br"); Node.CoerceNode(lexer, element, tt.TagBr); Node.InsertNodeAfterElement(element, node); } }
public Node(short type, byte[] textarray, int start, int end, string element, TagTable tt) { _parent = null; _prev = null; _next = null; _last = null; _start = start; _end = end; _textarray = textarray; _type = type; _closed = false; _isimplicit = false; _linebreak = false; _was = null; _tag = null; _element = element; _attributes = null; _content = null; if (type == StartTag || type == StartEndTag || type == EndTag) { tt.FindTag(this); } }
/* * This maps * <em>hello </em><strong>world</strong> * to * <em>hello</em> <strong>world</strong> * * If last child of element is a text node * then trim trailing white space character * moving it to after element's end tag. */ public static void TrimTrailingSpace(Lexer lexer, Node element, Node last) { byte c; TagTable tt = lexer.Options.tt; if (last != null && last.Type == Node.TextNode && last.End > last.Start) { c = lexer.lexbuf[last.End - 1]; if (c == 160 || c == (byte)' ') { /* take care with <td> </td> */ if (element.Tag == tt.TagTd || element.Tag == tt.TagTh) { if (last.End > last.Start + 1) { last.End -= 1; } } else { last.End -= 1; if (((element.Tag.Model & ContentModel.Inline) != 0) && !((element.Tag.Model & ContentModel.Field) != 0)) { lexer.insertspace = true; } /* if empty string then delete from parse tree */ if (last.Start == last.End) { TrimEmptyElement(lexer, last); } } } } }
public Clean(TagTable tt) { _tt = tt; }
public virtual bool IsWord2000(Node root, TagTable tt) { Node html = root.FindHtml(tt); return (html != null && html.GetAttrByName("xmlns:o") != null); }
public static void Warning(Lexer lexer, Node element, Node node, short code) { TagTable tt = lexer.Options.tt; /* keep quiet after 6 errors */ if (lexer.messages == null) { return; } if (lexer.messages.Errors > 6) { return; } /* on end of file adjust reported position to end of input */ if (code == UNEXPECTED_END_OF_FILE) { lexer.lines = lexer.input.curline; lexer.columns = lexer.input.curcol; } if (code == MISSING_ENDTAG_FOR) { AddMessage(lexer, String.Format(GetMessage("missing_endtag_for"), element.Element), MessageLevel.Warning); } else if (code == MISSING_ENDTAG_BEFORE) { AddMessage(lexer, String.Format(GetMessage("missing_endtag_before"), element.Element, Tag(lexer, node)), MessageLevel.Warning); } else if (code == DISCARDING_UNEXPECTED) { AddMessage(lexer, String.Format(GetMessage("discarding_unexpected"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == NESTED_EMPHASIS) { AddMessage(lexer, String.Format(GetMessage("nested_emphasis"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == COERCE_TO_ENDTAG) { AddMessage(lexer, String.Format(GetMessage("coerce_to_endtag"), element.Element), MessageLevel.Warning); } else if (code == NON_MATCHING_ENDTAG) { AddMessage(lexer, String.Format(GetMessage("non_matching_endtag_1"), Tag(lexer, node), element.Element), MessageLevel.Warning); } else if (code == TAG_NOT_ALLOWED_IN) { AddMessage(lexer, String.Format(GetMessage("tag_not_allowed_in"), Tag(lexer, node), element.Element), MessageLevel.Warning); } else if (code == DOCTYPE_AFTER_TAGS) { AddMessage(lexer, GetMessage("doctype_after_tags"), MessageLevel.Warning); } else if (code == MISSING_STARTTAG) { AddMessage(lexer, String.Format(GetMessage("missing_starttag"), node.Element), MessageLevel.Warning); } else if (code == UNEXPECTED_ENDTAG) { string message; if (element != null) { message = String.Format(GetMessage("unexpected_endtag_suffix"), node.Element, element.Element); } else { message = String.Format(GetMessage("unexpected_endtag"), node.Element); } AddMessage(lexer, message, MessageLevel.Warning); } else if (code == TOO_MANY_ELEMENTS) { string message; if (element != null) { message = String.Format(GetMessage("too_many_elements_suffix"), node.Element, element.Element); } else { message = String.Format(GetMessage("too_many_elements"), node.Element); } AddMessage(lexer, message, MessageLevel.Warning); } else if (code == USING_BR_INPLACE_OF) { AddMessage(lexer, GetMessage("using_br_inplace_of") + Tag(lexer, node), MessageLevel.Warning); } else if (code == INSERTING_TAG) { AddMessage(lexer, String.Format(GetMessage("inserting_tag"), node.Element), MessageLevel.Warning); } else if (code == CANT_BE_NESTED) { AddMessage(lexer, String.Format(GetMessage("cant_be_nested"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == PROPRIETARY_ELEMENT) { AddMessage(lexer, String.Format(GetMessage("proprietary_element"), Tag(lexer, node)), MessageLevel.Warning); if (node.Tag == tt.TagLayer) { lexer.badLayout |= USING_LAYER; } else if (node.Tag == tt.TagSpacer) { lexer.badLayout |= USING_SPACER; } else if (node.Tag == tt.TagNobr) { lexer.badLayout |= USING_NOBR; } } else if (code == OBSOLETE_ELEMENT) { string message; if (element.Tag != null && (element.Tag.Model & ContentModel.Obsolete) != 0) { message = String.Format(GetMessage("obsolete_element"), Tag(lexer, node), Tag(lexer, node)); } else { message = String.Format(GetMessage("replacing_element"), Tag(lexer, node), Tag(lexer, node)); } AddMessage(lexer, message, MessageLevel.Warning); } else if (code == TRIM_EMPTY_ELEMENT) { AddMessage(lexer, String.Format(GetMessage("trim_empty_element"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == MISSING_TITLE_ELEMENT) { AddMessage(lexer, GetMessage("missing_title_element"), MessageLevel.Warning); } else if (code == ILLEGAL_NESTING) { AddMessage(lexer, String.Format(GetMessage("illegal_nesting"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == NOFRAMES_CONTENT) { AddMessage(lexer, String.Format(GetMessage("noframes_content"), Tag(lexer, node)), MessageLevel.Warning); } else if (code == INCONSISTENT_VERSION) { AddMessage(lexer, GetMessage("inconsistent_version"), MessageLevel.Warning); } else if (code == MALFORMED_DOCTYPE) { AddMessage(lexer, GetMessage("malformed_doctype"), MessageLevel.Warning); } else if (code == CONTENT_AFTER_BODY) { AddMessage(lexer, GetMessage("content_after_body"), MessageLevel.Warning); } else if (code == MALFORMED_COMMENT) { AddMessage(lexer, GetMessage("malformed_comment"), MessageLevel.Warning); } else if (code == BAD_COMMENT_CHARS) { AddMessage(lexer, GetMessage("bad_comment_chars"), MessageLevel.Warning); } else if (code == BAD_XML_COMMENT) { AddMessage(lexer, GetMessage("bad_xml_comment"), MessageLevel.Warning); } else if (code == BAD_CDATA_CONTENT) { AddMessage(lexer, GetMessage("bad_cdata_content"), MessageLevel.Warning); } else if (code == INCONSISTENT_NAMESPACE) { AddMessage(lexer, GetMessage("inconsistent_namespace"), MessageLevel.Warning); } else if (code == DTYPE_NOT_UPPER_CASE) { AddMessage(lexer, GetMessage("dtype_not_upper_case"), MessageLevel.Warning); } else if (code == UNEXPECTED_END_OF_FILE) { AddMessage(lexer, GetMessage("unexpected_end_of_file") + Tag(lexer, node), MessageLevel.Warning); } }
/// <summary> Indicates whether or not whitespace should be preserved for this element. /// If an <code>xml:space</code> attribute is found, then if the attribute value is /// <code>preserve</code>, returns <code>true</code>. For any other value, returns /// <code>false</code>. If an <code>xml:space</code> attribute was <em>not</em> /// found, then the following element names result in a return value of <code>true: /// pre, script, style,</code> and <code>xsl:text</code>. Finally, if a /// <code>TagTable</code> was passed in and the element appears as the "pre" element /// in the <code>TagTable</code>, then <code>true</code> will be returned. /// Otherwise, <code>false</code> is returned. /// </summary> /// <param name="element">The <code>Node</code> to test to see if whitespace should be /// preserved. /// </param> /// <param name="tt">The <code>TagTable</code> to test for the <code>getNodePre()</code> /// function. This may be <code>null</code>, in which case this test /// is bypassed. /// </param> /// <returns> <code>true</code> or <code>false</code>, as explained above. /// /// </returns> public static bool XMLPreserveWhiteSpace(Node element, TagTable tt) { AttVal attribute; /* search attributes for xml:space */ for (attribute = element.Attributes; attribute != null; attribute = attribute.Next) { if (attribute.Attribute.Equals("xml:space")) { if (attribute.Val.Equals("preserve")) { return true; } return false; } } /* kludge for html docs without explicit xml:space attribute */ if (String.Compare(element.Element, "pre") == 0 || String.Compare(element.Element, "script") == 0 || String.Compare(element.Element, "style") == 0) { return true; } if ((tt != null) && (tt.FindParser(element) == ParsePre)) { return true; } /* kludge for XSL docs */ if (String.Compare(element.Element, "xsl:text") == 0) { return true; } return false; }
protected internal DomDocumentImpl(Node Adaptee) : base(Adaptee) { tt = new TagTable(); }
public virtual Node FindBody(TagTable tt) { Node node; node = _content; while (node != null && node.Tag != tt.TagHtml) { node = node.Next; } if (node == null) { return null; } node = node.Content; while (node != null && node.Tag != tt.TagBody) { node = node.Next; } return node; }
/* unexpected content in table row is moved to just before the table in accordance with Netscape and IE. This code assumes that node hasn't been inserted into the row. */ public static void MoveBeforeTable(Node row, Node node, TagTable tt) { Node table; /* first find the table element */ for (table = row.Parent; table != null; table = table.Parent) { if (table.Tag == tt.tagTable) { if (table.Parent.Content == table) { table.Parent.Content = node; } node.Prev = table.Prev; node.Next = table; table.Prev = node; node.Parent = table.Parent; if (node.Prev != null) { node.Prev.Next = node; } break; } } }
/* find html element */ public virtual Node FindHtml(TagTable tt) { Node node; for (node = _content; node != null && node.Tag != tt.TagHtml; node = node.Next) { ; } return node; }
public virtual Node FindHead(TagTable tt) { Node node; node = FindHtml(tt); if (node != null) { for (node = node.Content; node != null && node.Tag != tt.TagHead; node = node.Next) { ; } } return node; }