private void PopStackToClose(Token.EndTag endTag) { String elName = endTag.Name; DomContainer firstFound = null; var it = stack.GetDescendingEnumerator(); while (it.MoveNext()) { DomContainer next = it.Current; if (next.NodeName.Equals(elName)) { firstFound = next; break; } } if (firstFound == null) { return; // not found, skip } it = stack.GetDescendingEnumerator(); while (it.MoveNext()) { DomContainer next = it.Current; if (next == firstFound) { it.Remove(); break; } else { it.Remove(); } } }
public void Emit(Token token) { if (isEmitPending) { HtmlWarning.UnreadTokenPending(); } emitPending = token; isEmitPending = true; if (token.Type == TokenType.StartTag) { Token.StartTag startTag = (Token.StartTag)token; lastStartTag = startTag; if (startTag.selfClosing) { selfClosingFlagAcknowledged = false; } } else if (token.Type == TokenType.EndTag) { Token.EndTag endTag = (Token.EndTag)token; if (endTag.Attributes.Any()) { ParseError.AttributesPresentOnEndTagError(this); } } }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (t.IsEndTag && t.AsEndTag().Name.Equals("caption")) { Token.EndTag endTag = t.AsEndTag(); string name = endTag.Name; if (!tb.InTableScope(name)) { tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals("caption")) { tb.Error(this); } tb.PopStackToClose("caption"); tb.ClearFormattingElementsToLastMarker(); tb.Transition(InTable); } } else if ((t.IsStartTag && StringSet.Create("caption col colgroup tbody td tfoot th thead tr").Contains(t.AsStartTag().Name) || t.IsEndTag && t.AsEndTag().Name.Equals("table")) ) { tb.Error(this); bool processed = tb.Process(new Token.EndTag("caption")); if (processed) { return(tb.Process(t)); } } else if (t.IsEndTag && StringSet.Create("body col colgroup html tbody td tfoot th thead tr").Contains(t.AsEndTag().Name)) { tb.Error(this); return(false); } else { return(tb.Process(t, InBody)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { switch (t.Type) { case TokenType.Character: Token.Character c = t.AsCharacter(); if (c.Data.Equals(NullString)) { tb.Error(this); return(false); } else { tb.Insert(c); } break; case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); return(false); case TokenType.StartTag: Token.StartTag start = t.AsStartTag(); string name = start.Name; if (name.Equals("html")) { return(tb.Process(start, InBody)); } else if (name.Equals("option")) { tb.Process(new Token.EndTag("option")); tb.Insert(start); } else if (name.Equals("optgroup")) { if (tb.CurrentElement.NodeName.Equals("option")) { tb.Process(new Token.EndTag("option")); } else if (tb.CurrentElement.NodeName.Equals("optgroup")) { tb.Process(new Token.EndTag("optgroup")); } tb.Insert(start); } else if (name.Equals("select")) { tb.Error(this); return(tb.Process(new Token.EndTag("select"))); } else if (StringUtil.In(name, "input", "keygen", "textarea")) { tb.Error(this); if (!tb.InSelectScope("select")) { return(false); // frag } tb.Process(new Token.EndTag("select")); return(tb.Process(start)); } else if (name.Equals("script")) { return(tb.Process(t, InHead)); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag end = t.AsEndTag(); name = end.Name; if (name.Equals("optgroup")) { if (tb.CurrentElement.NodeName.Equals("option") && tb.AboveOnStack(tb.CurrentElement) != null && tb.AboveOnStack(tb.CurrentElement).NodeName.Equals("optgroup")) { tb.Process(new Token.EndTag("option")); } if (tb.CurrentElement.NodeName.Equals("optgroup")) { tb.Pop(); } else { tb.Error(this); } } else if (name.Equals("option")) { if (tb.CurrentElement.NodeName.Equals("option")) { tb.Pop(); } else { tb.Error(this); } } else if (name.Equals("select")) { if (!tb.InSelectScope(name)) { tb.Error(this); return(false); } else { tb.PopStackToClose(name); tb.ResetInsertionMode(); } } else { return(AnythingElse(t, tb)); } break; case TokenType.EOF: if (!tb.CurrentElement.NodeName.Equals("html")) { tb.Error(this); } break; default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { switch (t.Type) { case TokenType.StartTag: Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; if (name.Equals("tr")) { tb.ClearStackToTableBodyContext(); tb.Insert(startTag); tb.Transition(InRow); } else if (StringUtil.In(name, "th", "td")) { tb.Error(this); tb.Process(new Token.StartTag("tr")); return(tb.Process(startTag)); } else if (StringSet.Create("caption col colgroup tbody tfoot thead").Contains(name)) { return(ExitTableBody(t, tb)); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag endTag = t.AsEndTag(); name = endTag.Name; if (StringUtil.In(name, "tbody", "tfoot", "thead")) { if (!tb.InTableScope(name)) { tb.Error(this); return(false); } else { tb.ClearStackToTableBodyContext(); tb.Pop(); tb.Transition(InTable); } } else if (name.Equals("table")) { return(ExitTableBody(t, tb)); } else if (StringSet.Create("body caption col colgroup html td th tr").Contains(name)) { tb.Error(this); return(false); } else { return(AnythingElse(t, tb)); } break; default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); return(true); } switch (t.Type) { case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); break; case TokenType.StartTag: Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; if (name.Equals("html")) { return(tb.Process(t, InBody)); } else if (name.Equals("col")) { tb.InsertEmpty(startTag); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag endTag = t.AsEndTag(); name = endTag.Name; if (name.Equals("colgroup")) { if (tb.CurrentElement.NodeName.Equals("html")) // frag case { tb.Error(this); return(false); } else { tb.Pop(); tb.Transition(InTable); } } else { return(AnythingElse(t, tb)); } break; case TokenType.EOF: if (tb.CurrentElement.NodeName.Equals("html")) { return(true); // stop parsing; frag case } else { return(AnythingElse(t, tb)); } default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (t.IsCharacter) { tb.NewPendingTableCharacters(); tb.MarkInsertionMode(); tb.Transition(InTableText); return(tb.Process(t)); } else if (t.IsComment) { tb.Insert(t.AsComment()); return(true); } else if (t.IsDoctype) { tb.Error(this); return(false); } else if (t.IsStartTag) { Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; switch (name) { case "caption": tb.ClearStackToTableContext(); tb.InsertMarkerToFormattingElements(); tb.Insert(startTag); tb.Transition(InCaption); break; case "colgroup": tb.ClearStackToTableContext(); tb.Insert(startTag); tb.Transition(InColumnGroup); break; case "col": tb.Process(new Token.StartTag("colgroup")); return(tb.Process(t)); case "table": tb.Error(this); bool processed = tb.Process(new Token.EndTag("table")); if (processed) // only ignored if in fragment { return(tb.Process(t)); } break; case "tbody": case "tfoot": case "thead": tb.ClearStackToTableContext(); tb.Insert(startTag); tb.Transition(InTableBody); break; case "td": case "th": case "tr": tb.Process(new Token.StartTag("tbody")); return(tb.Process(t)); case "style": case "script": return(tb.Process(t, InHead)); case "input": if (!startTag.Attributes["type"] .Equals("hidden", StringComparison.OrdinalIgnoreCase)) { return(AnythingElse(t, tb)); } else { tb.InsertEmpty(startTag); } break; case "form": tb.Error(this); if (tb.FormElement != null) { return(false); } else { HtmlElement form = tb.InsertEmpty(startTag); tb.FormElement = form; } break; default: return(AnythingElse(t, tb)); } } else if (t.IsEndTag) { Token.EndTag endTag = t.AsEndTag(); string name = endTag.Name; switch (name) { case "table": if (!tb.InTableScope(name)) { tb.Error(this); return(false); } else { tb.PopStackToClose("table"); } tb.ResetInsertionMode(); break; case "body": case "caption": case "col": case "colgroup": case "html": case "tbody": case "td": case "tfoot": case "th": case "thead": case "tr": tb.Error(this); return(false); default: return(AnythingElse(t, tb)); } } else if (t.IsEOF) { if (tb.CurrentElement.NodeName.Equals("html")) { tb.Error(this); } return(true); // stops parsing } return(AnythingElse(t, tb)); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (t.IsEndTag) { Token.EndTag endTag = t.AsEndTag(); string name = endTag.Name; if (name.In("td", "th")) { if (!tb.InTableScope(name)) { tb.Error(this); tb.Transition(InRow); // might not be in scope if empty: <td /> and processing fake end tag return(false); } tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); tb.ClearFormattingElementsToLastMarker(); tb.Transition(InRow); } else if (StringUtil.In(name, "body", "caption", "col", "colgroup", "html")) { tb.Error(this); return(false); } else if (StringUtil.In(name, "table", "tbody", "tfoot", "thead", "tr")) { if (!tb.InTableScope(name)) { tb.Error(this); return(false); } CloseCell(tb); return(tb.Process(t)); } else { return(AnythingElse(t, tb)); } } else if (t.IsStartTag && StringSet.Create("caption col colgroup tbody td tfoot th thead tr").Contains(t.AsStartTag().Name)) { if (!(tb.InTableScope("td") || tb.InTableScope("th"))) { tb.Error(this); return(false); } CloseCell(tb); return(tb.Process(t)); } else { return(AnythingElse(t, tb)); } return(true); }
private bool?HandleEndTag(Token t, HtmlTreeBuilder tb) { Token.EndTag endTag = t.AsEndTag(); string name = endTag.Name; if (name.Equals("body")) { if (!tb.InScope("body")) { tb.Error(this); return(false); } else { // TODO: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html tb.Transition(AfterBody); } } else if (name.Equals("html")) { bool notIgnored = tb.Process(new Token.EndTag("body")); if (notIgnored) { return(tb.Process(endTag)); } } else if (StringSet.Create(@"address article aside blockquote button center details dir div dl fieldset figcaption figure footer header hgroup listing menu nav ol pre section summary ul").Contains(name)) { // TODO: refactor these lookups if (!tb.InScope(name)) { // nothing to close tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (name.Equals("form")) { HtmlElement currentForm = tb.FormElement; tb.FormElement = null; if (currentForm == null || !tb.InScope(name)) { tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } // remove currentForm from stack. will shift anything under up. tb.RemoveFromStack(currentForm); } } else if (name.Equals("p")) { if (!tb.InButtonScope(name)) { tb.Error(this); tb.Process(new Token.StartTag(name)); // if no p to close, creates an empty <p></p> return(tb.Process(endTag)); } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (name.Equals("li")) { if (!tb.InListItemScope(name)) { tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (StringUtil.In(name, "dd", "dt")) { if (!tb.InScope(name)) { tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (HeadingTags.Contains(name)) { if (!tb.InScope(HeadingTags)) { tb.Error(this); return(false); } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(HeadingTags); } } else if (name.Equals("sarcasm")) { // *sigh* return(AnyOtherEndTag(t, tb)); } else if (StringSet.Create("a b big code em font i nobr s small strike strong tt u").Contains(name)) { // Adoption Agency Algorithm. OUTER: for (int i = 0; i < 8; i++) { var formatEl = tb.GetActiveFormattingElement(name); if (formatEl == null) { return(AnyOtherEndTag(t, tb)); } else if (!tb.OnStack(formatEl)) { tb.Error(this); tb.RemoveFromActiveFormattingElements(formatEl); return(true); } else if (!tb.InScope(formatEl.NodeName)) { tb.Error(this); return(false); } else if (tb.CurrentElement != formatEl) { tb.Error(this); } DomContainer furthestBlock = null; DomContainer commonAncestor = null; bool seenFormattingElement = false; DescendableLinkedList <DomContainer> stack = tb.Stack; for (int si = 0; si < stack.Count; si++) { DomContainer el = stack.ElementAt(si); if (el == formatEl) { commonAncestor = stack.ElementAt(si - 1); seenFormattingElement = true; } else if (seenFormattingElement && tb.IsSpecial(el)) { furthestBlock = el; break; } } if (furthestBlock == null) { tb.PopStackToClose(formatEl.NodeName); tb.RemoveFromActiveFormattingElements(formatEl); return(true); } // TODO: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list. // does that mean: int pos of format el in list? DomContainer node = furthestBlock; DomContainer lastNode = furthestBlock; INNER: for (int j = 0; j < 3; j++) { continueINNER: if (tb.OnStack(node)) { node = tb.AboveOnStack(node); } if (!tb.IsInActiveFormattingElements(node)) // note no bookmark check { tb.RemoveFromStack(node); goto continueINNER; } else if (node == formatEl) { goto breakINNER; } HtmlElement replacement = new HtmlElement(node.NodeName); tb.ReplaceActiveFormattingElement(node, replacement); tb.ReplaceOnStack(node, replacement); node = replacement; if (lastNode == furthestBlock) { // TODO: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. // not getting how this bookmark both straddles the element above, but is inbetween here... } if (lastNode.Parent != null) { lastNode.Remove(); } node.Append(lastNode); lastNode = node; } breakINNER: if (StringUtil.In(commonAncestor.NodeName, "table", "tbody", "tfoot", "thead", "tr")) { if (lastNode.Parent != null) { lastNode.Remove(); } tb.InsertInFosterParent(lastNode); } else { if (lastNode.Parent != null) { lastNode.Remove(); } commonAncestor.Append(lastNode); } HtmlElement adopter = new HtmlElement(name); var childNodes = furthestBlock.ChildNodes.ToArray(); foreach (var childNode in childNodes) { adopter.Append(childNode); // append will reparent. thus the clone to avvoid concurrent mod. } furthestBlock.Append(adopter); tb.RemoveFromActiveFormattingElements(formatEl); // TODO: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. tb.RemoveFromStack(formatEl); tb.InsertOnStackAfter(furthestBlock, adopter); } } else if (StringUtil.In(name, "applet", "marquee", "object")) { if (!tb.InScope("name")) { if (!tb.InScope(name)) { tb.Error(this); return(false); } tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); tb.ClearFormattingElementsToLastMarker(); } } else if (name.Equals("br")) { tb.Error(this); tb.Process(new Token.StartTag("br")); return(false); } else { return(AnyOtherEndTag(t, tb)); } return(null); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); return(true); } switch (t.Type) { case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); return(false); case TokenType.StartTag: Token.StartTag start = t.AsStartTag(); string name = start.Name; if (name.Equals("html")) { return(InBody.Process(t, tb)); } else if (StringUtil.In(name, "base", "basefont", "bgsound", "command", "link")) { HtmlElement el = tb.InsertEmpty(start); // jsoup special: update base the frist time it is seen if (name.Equals("base") && el.HasAttribute("href")) { tb.MaybeSetBaseUri(el); } } else if (name.Equals("meta")) { HtmlElement meta = tb.InsertEmpty(start); // TODO: charset switches } else if (name.Equals("title")) { HandleRcData(start, tb); } else if (StringUtil.In(name, "noframes", "style")) { HandleRawtext(start, tb); } else if (name.Equals("noscript")) { // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript) tb.Insert(start); tb.Transition(InHeadNoscript); } else if (name.Equals("script")) { // skips some script rules as won't execute them tb.Insert(start); tb.tokeniser.Transition(TokeniserState.ScriptData); tb.MarkInsertionMode(); tb.Transition(Text); } else if (name.Equals("head")) { tb.Error(this); return(false); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag end = t.AsEndTag(); name = end.Name; if (name.Equals("head")) { tb.Pop(); tb.Transition(AfterHead); } else if (StringUtil.In(name, "body", "html", "br")) { return(AnythingElse(t, tb)); } else { tb.Error(this); return(false); } break; default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (t.IsStartTag) { Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; if (StringUtil.In(name, "th", "td")) { tb.ClearStackToTableRowContext(); tb.Insert(startTag); tb.Transition(InCell); tb.InsertMarkerToFormattingElements(); } else if (StringSet.Create("caption col colgroup tbody tfoot thead tr").Contains(name)) { return(HandleMissingTr(t, tb)); } else { return(AnythingElse(t, tb)); } } else if (t.IsEndTag) { Token.EndTag endTag = t.AsEndTag(); string name = endTag.Name; if (name.Equals("tr")) { if (!tb.InTableScope(name)) { tb.Error(this); // frag return(false); } tb.ClearStackToTableRowContext(); tb.Pop(); // tr tb.Transition(InTableBody); } else if (name.Equals("table")) { return(HandleMissingTr(t, tb)); } else if (StringUtil.In(name, "tbody", "tfoot", "thead")) { if (!tb.InTableScope(name)) { tb.Error(this); return(false); } tb.Process(new Token.EndTag("tr")); return(tb.Process(t)); } else if (StringSet.Create("body caption col colgroup html td th").Contains(name)) { tb.Error(this); return(false); } else { return(AnythingElse(t, tb)); } } else { return(AnythingElse(t, tb)); } return(true); }