/* Applies all matching rules to a node. */ private Node CleanNode(Lexer lexer, Node node) { Node next; var o = new MutableObject(); for (next = node; node.IsElement; node = next) { o.Object = next; bool b = Dir2Div(node); next = (Node) o.Object; if (b) { continue; } b = NestedList(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = Center2Div(lexer, node, o); next = (Node) o.Object; if (b) { continue; } b = MergeDivs(node); next = (Node) o.Object; if (b) { continue; } b = BlockStyle(node); next = (Node) o.Object; if (b) { continue; } b = InlineStyle(lexer, node); next = (Node) o.Object; if (b) { continue; } b = Font2Span(lexer, node, o); next = (Node) o.Object; if (b) { continue; } break; } return next; }
/* simplifies <b><b> ... </b> ...</b> etc. */ public virtual void NestedEmphasis(Node node) { var o = new MutableObject(); while (node != null) { Node next = node.Next; if ((node.Tag == _tt.TagB || node.Tag == _tt.TagI) && node.Parent != null && node.Parent.Tag == node.Tag) { /* strip redundant inner element */ o.Object = next; DiscardContainer(node, o); next = (Node) o.Object; node = next; continue; } if (node.Content != null) { NestedEmphasis(node.Content); } node = next; } }
/* Symptom: <center> Action: replace <center> by <div style="text-align: center"> */ private bool Center2Div(Lexer lexer, Node node, MutableObject pnode) { if (node.Tag == _tt.TagCenter) { if (lexer.Options.DropFontTags) { if (node.Content != null) { Node last = node.Last; Node parent = node.Parent; DiscardContainer(node, pnode); node = lexer.InferredTag("br"); if (last.Next != null) { last.Next.Prev = node; } node.Next = last.Next; last.Next = node; node.Prev = last; if (parent.Last == last) { parent.Last = node; } node.Parent = parent; } else { Node prev = node.Prev; Node next = node.Next; Node parent = node.Parent; DiscardContainer(node, pnode); node = lexer.InferredTag("br"); node.Next = next; node.Prev = prev; node.Parent = parent; if (next != null) { next.Prev = node; } else { parent.Last = node; } if (prev != null) { prev.Next = node; } else { parent.Content = node; } } return true; } node.Tag = _tt.TagDiv; node.Element = "div"; AddStyleProperty(node, "text-align: center"); return true; } return false; }
/* Symptom: <ul><li><ul>...</ul></li></ul> Action: discard outer list */ private bool NestedList(Lexer lexer, Node node, MutableObject pnode) { if (node.Tag == _tt.TagUl || node.Tag == _tt.TagOl) { Node child = node.Content; if (child == null) { return false; } /* check child has no peers */ if (child.Next != null) { return false; } Node list = child.Content; if (list == null) { return false; } if (list.Tag != node.Tag) { return false; } pnode.Object = node.Next; /* move inner list node into position of outer node */ list.Prev = node.Prev; list.Next = node.Next; list.Parent = node.Parent; FixNodeLinks(list); /* get rid of outer ul and its li */ child.Content = null; node.Content = null; node.Next = null; /* If prev node was a list the chances are this node should be appended to that list. Word has no way of recognizing nested lists and just uses indents */ if (list.Prev != null) { node = list; list = node.Prev; if (list.Tag == _tt.TagUl || list.Tag == _tt.TagOl) { list.Next = node.Next; if (list.Next != null) { list.Next.Prev = list; } child = list.Last; /* <li> */ node.Parent = child; node.Next = null; node.Prev = child.Last; FixNodeLinks(node); } } CleanNode(lexer, node); return true; } return false; }
/* Replace font elements by span elements, deleting the font element's attributes and replacing them by a single style attribute. */ private bool Font2Span(Lexer lexer, Node node, MutableObject pnode) { if (node.Tag == _tt.TagFont) { if (lexer.Options.DropFontTags) { DiscardContainer(node, pnode); return false; } /* if FONT is only child of parent element then leave alone */ if (node.Parent.Content == node && node.Next == null) { return false; } AddFontStyles(node, node.Attributes); /* extract style attribute and free the rest */ AttVal av = node.Attributes; AttVal style = null; while (av != null) { AttVal next = av.Next; if (av.Attribute.Equals("style")) { av.Next = null; style = av; } av = next; } node.Attributes = style; node.Tag = _tt.TagSpan; node.Element = "span"; return true; } return false; }
/* used to strip font start and end tags */ private void DiscardContainer(Node element, MutableObject pnode) { Node parent = element.Parent; if (element.Content != null) { element.Last.Next = element.Next; if (element.Next != null) { element.Next.Prev = element.Last; element.Last.Next = element.Next; } else { parent.Last = element.Last; } if (element.Prev != null) { element.Content.Prev = element.Prev; element.Prev.Next = element.Content; } else { parent.Content = element.Content; } Node node; for (node = element.Content; node != null; node = node.Next) { node.Parent = parent; } pnode.Object = element.Content; } else { if (element.Next != null) { element.Next.Prev = element.Prev; } else { parent.Last = element.Prev; } if (element.Prev != null) { element.Prev.Next = element.Next; } else { parent.Content = element.Next; } pnode.Object = element.Next; } element.Next = null; element.Content = null; }
/* swallows closing '>' */ public virtual AttVal ParseAttrs(MutableBoolean isempty) { var delim = new MutableInteger(); var asp = new MutableObject(); var php = new MutableObject(); AttVal list = null; while (!EndOfInput()) { string attribute = ParseAttribute(isempty, asp, php); AttVal av; if (attribute == null) { /* check if attributes are created by ASP markup */ if (asp.Object != null) { av = new AttVal(list, null, (Node) asp.Object, null, '\x0000', null, null); list = av; continue; } /* check if attributes are created by PHP markup */ if (php.Object != null) { av = new AttVal(list, null, null, (Node) php.Object, '\x0000', null, null); list = av; continue; } break; } string val = ParseValue(attribute, false, isempty, delim); if (IsValidAttrName(attribute)) { av = new AttVal(list, null, null, null, delim.Val, attribute, val); av.Dict = AttributeTable.DefaultAttributeTable.FindAttribute(av); list = av; } else { //av = new AttVal(null, null, null, null, 0, attribute, val); Report.AttrError(this, Token, val, Report.BAD_ATTRIBUTE_VALUE); } } return list; }
/* consumes the '>' terminating start tags */ public virtual string ParseAttribute(MutableBoolean isempty, MutableObject asp, MutableObject php) { int start; // int len = 0; Removed by BUGFIX for 126265 short map; int c; asp.Object = null; /* clear asp pointer */ php.Object = null; /* clear php pointer */ /* skip white space before the attribute */ for (;;) { c = Input.ReadChar(); if (c == '/') { c = Input.ReadChar(); if (c == '>') { isempty.Val = true; return null; } Input.UngetChar(c); c = '/'; break; } if (c == '>') { return null; } if (c == '<') { c = Input.ReadChar(); if (c == '%') { asp.Object = ParseAsp(); return null; } if (c == '?') { php.Object = ParsePhp(); return null; } Input.UngetChar(c); Report.AttrError(this, Token, null, Report.UNEXPECTED_GT); return null; } if (c == '"' || c == '\'') { Report.AttrError(this, Token, null, Report.UNEXPECTED_QUOTEMARK); continue; } if (c == StreamIn.END_OF_STREAM) { Report.AttrError(this, Token, null, Report.UNEXPECTED_END_OF_FILE); Input.UngetChar(c); return null; } map = Map((char) c); if ((map & WHITE) == 0) { break; } } start = Lexsize; for (;;) { /* but push back '=' for parseValue() */ if (c == '=' || c == '>') { Input.UngetChar(c); break; } if (c == '<' || c == StreamIn.END_OF_STREAM) { Input.UngetChar(c); break; } map = Map((char) c); if ((map & WHITE) != 0) break; /* what should be done about non-namechar characters? */ /* currently these are incorporated into the attr name */ if (!Options.XmlTags && (map & UPPERCASE) != 0) { c += ('a' - 'A'); } // ++len; Removed by BUGFIX for 126265 AddCharToLexer(c); c = Input.ReadChar(); } // Following line added by GLP to fix BUG 126265. This is a temporary comment // and should be removed when Tidy is fixed. int len = Lexsize - start; string attr = (len > 0 ? GetString(Lexbuf, start, len) : null); Lexsize = start; return attr; }