/* Applies all matching rules to a node. */ private Node CleanNode(Lexer lexer, Node node) { Node next = null; MutableObject o = new MutableObject(); bool b = false; for (next = node; node.IsElement; node = next) { o.Object = next; b = Dir2Div(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = NestedList(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = Center2Div(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = MergeDivs(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = BlockStyle(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = InlineStyle(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = Font2Span(lexer, node, o); next = (Node) o.Object; if (b) { continue; } break; } return next; }
/* Symptom: the only child of a block-level element is a presentation element such as B, I or FONT Action: add style "font-weight: bold" to the block and strip the <b> element, leaving its children. example: <p> <b><font face="Arial" size="6">Draft Recommended Practice</font></b> </p> becomes: <p style="font-weight: bold; font-family: Arial; font-size: 6"> Draft Recommended Practice </p> This code also replaces the align attribute by a style attribute. However, to avoid CSS problems with Navigator 4, this isn't done for the elements: caption, tr and table */ private bool BlockStyle(Lexer lexer, Node node, MutableObject pnode) { Node child; if ((node.Tag.Model & (ContentModel.Block | ContentModel.List | ContentModel.Deflist | ContentModel.Table)) != 0) { if (node.Tag != _tt.tagTable && node.Tag != _tt.TagTr && node.Tag != _tt.TagLi) { /* check for align attribute */ if (node.Tag != _tt.TagCaption) { TextAlign(lexer, node); } child = node.Content; if (child == null) { return false; } /* check child has no peers */ if (child.Next != null) { return false; } if (child.Tag == _tt.TagB) { MergeStyles(node, child); AddStyleProperty(node, "font-weight: bold"); StripOnlyChild(node); return true; } if (child.Tag == _tt.TagI) { MergeStyles(node, child); AddStyleProperty(node, "font-style: italic"); StripOnlyChild(node); return true; } if (child.Tag == _tt.TagFont) { MergeStyles(node, child); AddFontStyles(node, child.Attributes); StripOnlyChild(node); return true; } } } return false; }
/* Symptom: <center> Action: replace <center> by <div style="text-align: center"> */ private bool Center2Div(Lexer lexer, Node node, MutableObject pnode) { if (node.Tag == _tt.TagCenter) { if (lexer.Options.DropFontTags) { if (node.Content != null) { Node last = node.Last; Node parent = node.Parent; DiscardContainer(node, pnode); node = lexer.InferredTag("br"); if (last.Next != null) { last.Next.Prev = node; } node.Next = last.Next; last.Next = node; node.Prev = last; if (parent.Last == last) { parent.Last = node; } node.Parent = parent; } else { Node prev = node.Prev; Node next = node.Next; Node parent = node.Parent; DiscardContainer(node, pnode); node = lexer.InferredTag("br"); node.Next = next; node.Prev = prev; node.Parent = parent; if (next != null) { next.Prev = node; } else { parent.Last = node; } if (prev != null) { prev.Next = node; } else { parent.Content = node; } } return true; } node.Tag = _tt.TagDiv; node.Element = "div"; AddStyleProperty(node, "text-align: center"); return true; } return false; }
/* Symptom: <ul><li><ul>...</ul></li></ul> Action: discard outer list */ private bool NestedList(Lexer lexer, Node node, MutableObject pnode) { Node child, list; if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl) { child = node.Content; if (child == null) { return false; } /* check child has no peers */ if (child.Next != null) { return false; } list = child.Content; if (list == null) { return false; } if (list.Tag != node.Tag) { return false; } pnode.Object = node.Next; /* move inner list node into position of outer node */ list.Prev = node.Prev; list.Next = node.Next; list.Parent = node.Parent; FixNodeLinks(list); /* get rid of outer ul and its li */ child.Content = null; node.Content = null; node.Next = null; /* If prev node was a list the chances are this node should be appended to that list. Word has no way of recognizing nested lists and just uses indents */ if (list.Prev != null) { node = list; list = node.Prev; if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl) { list.Next = node.Next; if (list.Next != null) { list.Next.Prev = list; } child = list.Last; /* <li> */ node.Parent = child; node.Next = null; node.Prev = child.Last; FixNodeLinks(node); } } CleanNode(lexer, node); return true; } return false; }
/* simplifies <b><b> ... </b> ...</b> etc. */ public virtual void NestedEmphasis(Node node) { MutableObject o = new MutableObject(); Node next; while (node != null) { next = node.Next; if ((node.Tag == _tt.TagB || node.Tag == _tt.TagI) && node.Parent != null && node.Parent.Tag == node.Tag) { /* strip redundant inner element */ o.Object = next; DiscardContainer(node, o); next = (Node) o.Object; node = next; continue; } if (node.Content != null) { NestedEmphasis(node.Content); } node = next; } }
/* the only child of table cell or an inline element such as em */ private bool InlineStyle(Lexer lexer, Node node, MutableObject pnode) { Node child; if (node.Tag != _tt.TagFont && (node.Tag.Model & (ContentModel.Inline | ContentModel.Row)) != 0) { child = node.Content; if (child == null) { return false; } /* check child has no peers */ if (child.Next != null) { return false; } if (child.Tag == _tt.TagB && lexer.Options.LogicalEmphasis) { MergeStyles(node, child); AddStyleProperty(node, "font-weight: bold"); StripOnlyChild(node); return true; } if (child.Tag == _tt.TagI && lexer.Options.LogicalEmphasis) { MergeStyles(node, child); AddStyleProperty(node, "font-style: italic"); StripOnlyChild(node); return true; } if (child.Tag == _tt.TagFont) { MergeStyles(node, child); AddFontStyles(node, child.Attributes); StripOnlyChild(node); return true; } } return false; }
/* Symptom <div><div>...</div></div> Action: merge the two divs This is useful after nested <dir>s used by Word for indenting have been converted to <div>s */ private bool MergeDivs(Lexer lexer, Node node, MutableObject pnode) { Node child; if (node.Tag != _tt.TagDiv) { return false; } child = node.Content; if (child == null) { return false; } if (child.Tag != _tt.TagDiv) { return false; } if (child.Next != null) { return false; } MergeStyles(node, child); StripOnlyChild(node); return true; }
/* Replace font elements by span elements, deleting the font element's attributes and replacing them by a single style attribute. */ private bool Font2Span(Lexer lexer, Node node, MutableObject pnode) { AttVal av, style, next; if (node.Tag == _tt.TagFont) { if (lexer.Options.DropFontTags) { DiscardContainer(node, pnode); return false; } /* if FONT is only child of parent element then leave alone */ if (node.Parent.Content == node && node.Next == null) { return false; } AddFontStyles(node, node.Attributes); /* extract style attribute and free the rest */ av = node.Attributes; style = null; while (av != null) { next = av.Next; if (av.Attribute.Equals("style")) { av.Next = null; style = av; } av = next; } node.Attributes = style; node.Tag = _tt.TagSpan; node.Element = "span"; return true; } return false; }
/* used to strip font start and end tags */ private void DiscardContainer(Node element, MutableObject pnode) { Node node; Node parent = element.Parent; if (element.Content != null) { element.Last.Next = element.Next; if (element.Next != null) { element.Next.Prev = element.Last; element.Last.Next = element.Next; } else { parent.Last = element.Last; } if (element.Prev != null) { element.Content.Prev = element.Prev; element.Prev.Next = element.Content; } else { parent.Content = element.Content; } for (node = element.Content; node != null; node = node.Next) { node.Parent = parent; } pnode.Object = element.Content; } else { if (element.Next != null) { element.Next.Prev = element.Prev; } else { parent.Last = element.Prev; } if (element.Prev != null) { element.Prev.Next = element.Next; } else { parent.Content = element.Next; } pnode.Object = element.Next; } element.Next = null; element.Content = null; }
/* The clean up rules use the pnode argument to return the next node when the orignal node has been deleted */ /* Symptom: <dir> <li> where <li> is only child Action: coerce <dir> <li> to <div> with indent. */ private bool Dir2Div(Lexer lexer, Node node, MutableObject pnode) { Node child; if (node.Tag == _tt.TagDir || node.Tag == _tt.TagUl || node.Tag == _tt.TagOl) { child = node.Content; if (child == null) { return false; } /* check child has no peers */ if (child.Next != null) { return false; } if (child.Tag != _tt.TagLi) { return false; } if (!child.Isimplicit) { return false; } /* coerce dir to div */ node.Tag = _tt.TagDiv; node.Element = "div"; AddStyleProperty(node, "margin-left: 2em"); StripOnlyChild(node); return true; } return false; }
/* swallows closing '>' */ public virtual AttVal ParseAttrs(MutableBoolean isempty) { AttVal av, list; string attribute, val; MutableInteger delim = new MutableInteger(); MutableObject asp = new MutableObject(); MutableObject php = new MutableObject(); list = null; while (!EndOfInput()) { attribute = ParseAttribute(isempty, asp, php); if (attribute == null) { /* check if attributes are created by ASP markup */ if (asp.Object != null) { av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null); list = av; continue; } /* check if attributes are created by PHP markup */ if (php.Object != null) { av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null); list = av; continue; } break; } val = ParseValue(attribute, false, isempty, delim); if (attribute != null && IsValidAttrName(attribute)) { av = new AttVal(list, null, null, null, delim.Val, attribute, val); av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av); list = av; } else { av = new AttVal(null, null, null, null, 0, attribute, val); Report.AttrError(this, token, val, Report.BAD_ATTRIBUTE_VALUE); } } return list; }
/* consumes the '>' terminating start tags */ public virtual string ParseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php) { int start = 0; // int len = 0; Removed by BUGFIX for 126265 short map; string attr; int c = 0; asp.Object = null; /* clear asp pointer */ php.Object = null; /* clear php pointer */ /* skip white space before the attribute */ for (;;) { c = input.ReadChar(); if (c == '/') { c = input.ReadChar(); if (c == '>') { isempty.Val = true; return null; } input.UngetChar(c); c = '/'; break; } if (c == '>') { return null; } if (c == '<') { c = input.ReadChar(); if (c == '%') { asp.Object = ParseAsp(); return null; } else if (c == '?') { php.Object = ParsePhp(); return null; } input.UngetChar(c); Report.AttrError(this, token, null, Report.UNEXPECTED_GT); return null; } if (c == '"' || c == '\'') { Report.AttrError(this, token, null, Report.UNEXPECTED_QUOTEMARK); continue; } if (c == StreamIn.EndOfStream) { Report.AttrError(this, token, null, Report.UNEXPECTED_END_OF_FILE); input.UngetChar(c); return null; } map = MAP((char) c); if ((map & WHITE) == 0) { break; } } start = lexsize; for (;;) { /* but push back '=' for parseValue() */ if (c == '=' || c == '>') { input.UngetChar(c); break; } if (c == '<' || c == StreamIn.EndOfStream) { input.UngetChar(c); break; } map = MAP((char) c); if ((map & WHITE) != 0) break; /* what should be done about non-namechar characters? */ /* currently these are incorporated into the attr name */ if (!Options.XmlTags && (map & UPPERCASE) != 0) { c += (int) ('a' - 'A'); } // ++len; Removed by BUGFIX for 126265 AddCharToLexer(c); c = input.ReadChar(); } // Following line added by GLP to fix BUG 126265. This is a temporary comment // and should be removed when Tidy is fixed. int len = lexsize - start; attr = (len > 0?GetString(lexbuf, start, len):null); lexsize = start; return attr; }