public override bool Process(Token t, TreeBuilder tb) { if (t.IsDoctype()) { tb.Error(this); return false; } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (IsWhitespace(t)) { return true; // ignore whitespace } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("html")) { tb.Insert(t.AsStartTag()); tb.Transition(BeforeHead); } else if (t.IsEndTag() && (StringUtil.In(t.AsEndTag().Name(), "head", "body", "html", "br"))) { return AnythingElse(t, tb); } else if (t.IsEndTag()) { tb.Error(this); return false; } else { return AnythingElse(t, tb); } return true; }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { return true; // ignore whitespace } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype()) { // todo: parse error check on expected doctypes // todo: quirk state check on doctype ids Token.Doctype d = t.AsDoctype(); DocumentType doctype = new DocumentType(d.Name.ToString(), d.PublicIdentifier.ToString(), d.SystemIdentifier.ToString(), tb.BaseUri.ToString()); tb.Document.AppendChild(doctype); if (d.ForceQuirks) { tb.Document.QuirksMode(Document.QuirksModeEnum.Quirks); } tb.Transition(BeforeHtml); } else { // todo: check not iframe srcdoc tb.Transition(BeforeHtml); return tb.Process(t); // re-process token } return true; }
public override bool Process(Token token) { // start tag, end tag, doctype, comment, character, eof switch (token.Type) { case Token.TokenType.StartTag: Insert(token.AsStartTag()); break; case Token.TokenType.EndTag: PopStackToClose(token.AsEndTag()); break; case Token.TokenType.Comment: Insert(token.AsComment()); break; case Token.TokenType.Character: Insert(token.AsCharacter()); break; case Token.TokenType.Doctype: Insert(token.AsDoctype()); break; case Token.TokenType.EOF: // could put some normalisation here if desired break; default: throw new Exception("Unexpected token type: " + token.Type); } return true; }
public void Emit(Token token) { if (_isEmitPending) { throw new InvalidOperationException("There is an unread token pending!"); } _emitPending = token; _isEmitPending = true; if (token.Type == Token.TokenType.StartTag) { Token.StartTag startTag = (Token.StartTag)token; _lastStartTag = startTag; if (startTag.IsSelfClosing) { _selfClosingFlagAcknowledged = false; } } else if (token.Type == Token.TokenType.EndTag) { Token.EndTag endTag = (Token.EndTag)token; if (endTag.Attributes != null) { Error("Attributes incorrectly present on end tag"); } } }
Element Insert(Token.StartTag startTag) { Tag tag = Tag.ValueOf(startTag.Name()); // todo: wonder if for xml parsing, should treat all tags as unknown? because it's not html. Element el = new Element(tag, _baseUri, startTag.Attributes); InsertNode(el); if (startTag.IsSelfClosing) { _tokeniser.AcknowledgeSelfClosingFlag(); if (!tag.IsKnownTag()) // unknown tag, remember this is self closing for output. see above. tag.SetSelfClosing(); } else { _stack.AddLast(el); } return el; }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { return true; } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype()) { tb.Error(this); return false; } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("html")) { return InBody.Process(t, tb); // does not transition } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("head")) { Element head = tb.Insert(t.AsStartTag()); tb.HeadElement = head; tb.Transition(InHead); } else if (t.IsEndTag() && (StringUtil.In(t.AsEndTag().Name(), "head", "body", "html", "br"))) { tb.Process(new Token.StartTag("head")); return tb.Process(t); } else if (t.IsEndTag()) { tb.Error(this); return false; } else { tb.Process(new Token.StartTag("head")); return tb.Process(t); } return true; }
public override bool Process(Token token) { _currentToken = token; return this._state.Process(token, this); }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype()) { tb.Error(this); return false; } else if (t.IsStartTag()) { Token.StartTag start = t.AsStartTag(); string name = start.Name(); if (name.Equals("html")) { return tb.Process(start, InBody); } else if (name.Equals("frameset")) { tb.Insert(start); } else if (name.Equals("frame")) { tb.InsertEmpty(start); } else if (name.Equals("noframes")) { return tb.Process(start, InHead); } else { tb.Error(this); return false; } } else if (t.IsEndTag() && t.AsEndTag().Name().Equals("frameset")) { if (tb.CurrentElement.NodeName.Equals("html")) { // frag tb.Error(this); return false; } else { tb.Pop(); if (!tb.IsFragmentParsing() && !tb.CurrentElement.NodeName.Equals("frameset")) { tb.Transition(AfterFrameset); } } } else if (t.IsEOF()) { if (!tb.CurrentElement.NodeName.Equals("html")) { tb.Error(this); return true; } } else { tb.Error(this); return false; } return true; }
private bool AnythingElse(Token t, TreeBuilder tb) { tb.Insert("html"); tb.Transition(BeforeHead); return tb.Process(t); }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { return tb.Process(t, InBody); } else if (t.IsComment()) { tb.Insert(t.AsComment()); // into html node } else if (t.IsDoctype()) { tb.Error(this); return false; } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("html")) { return tb.Process(t, InBody); } else if (t.IsEndTag() && t.AsEndTag().Name().Equals("html")) { if (tb.IsFragmentParsing()) { tb.Error(this); return false; } else { tb.Transition(AfterAfterBody); } } else if (t.IsEOF()) { // chillax! we're done } else { tb.Error(this); tb.Transition(InBody); return tb.Process(t); } return true; }
public override bool Process(Token t, TreeBuilder tb) { return true; // todo: implement. Also; how do we get here? }
private bool AnythingElse(Token t, TreeBuilder tb) { tb.Error(this); tb.Process(new Token.EndTag("noscript")); return tb.Process(t); }
public Element InsertEmpty(Token.StartTag startTag) { Tag tag = Tag.ValueOf(startTag.Name()); Element el = new Element(tag, _baseUri, startTag.Attributes); InsertNode(el); if (startTag.IsSelfClosing) { _tokeniser.AcknowledgeSelfClosingFlag(); if (!tag.IsKnownTag()) // unknown tag, remember this is self closing for output { tag.SetSelfClosing(); } } return el; }
private bool AnythingElse(Token t, TreeBuilder tb) { tb.Process(new Token.EndTag("head")); return tb.Process(t); }
public override bool Process(Token t, TreeBuilder tb) { if (t.IsDoctype()) { tb.Error(this); } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("html")) { return tb.Process(t, InBody); } else if (t.IsEndTag() && t.AsEndTag().Name().Equals("noscript")) { tb.Pop(); tb.Transition(InHead); } else if (IsWhitespace(t) || t.IsComment() || (t.IsStartTag() && StringUtil.In(t.AsStartTag().Name(), "basefont", "bgsound", "link", "meta", "noframes", "style"))) { return tb.Process(t, InHead); } else if (t.IsEndTag() && t.AsEndTag().Name().Equals("br")) { return AnythingElse(t, tb); } else if ((t.IsStartTag() && StringUtil.In(t.AsStartTag().Name(), "head", "noscript")) || t.IsEndTag()) { tb.Error(this); return false; } else { return AnythingElse(t, tb); } return true; }
protected void HandleRawText(Token.StartTag startTag, TreeBuilder tb) { tb.Insert(startTag); tb.Tokeniser.Transition(TokeniserState.RawText); tb.MarkInsertionMode(); tb.Transition(Text); }
protected bool IsWhitespace(Token t) { if (t.IsCharacter()) { string data = t.AsCharacter().Data.ToString(); // todo: this checks more than spec - "\t", "\n", "\f", "\r", " " for (int i = 0; i < data.Length; i++) { char c = data[i]; if (!char.IsWhiteSpace(c)) { return false; } } return true; } return false; }
public abstract bool Process(Token t, TreeBuilder tb);
public bool Process(Token token, HtmlTreeBuilderState state) { _currentToken = token; return state.Process(token, this); }
public void Insert(Token.Comment commentToken) { Comment comment = new Comment(commentToken.Data.ToString(), _baseUri); InsertNode(comment); }
public Element Insert(Token.StartTag startTag) { // handle empty unknown tags // when the spec expects an empty tag, will directly hit insertEmpty, so won't generate fake end tag. if (startTag.IsSelfClosing && !Tag.IsKnownTag(startTag.Name())) { Element el = InsertEmpty(startTag); Process(new Token.EndTag(el.TagName())); // ensure we get out of whatever state we are in return el; } Element element = new Element(Tag.ValueOf(startTag.Name()), _baseUri, startTag.Attributes); Insert(element); return element; }
public void Insert(Token.Doctype d) { DocumentType doctypeNode = new DocumentType(d.Name.ToString(), d.PublicIdentifier.ToString(), d.SystemIdentifier.ToString(), _baseUri); InsertNode(doctypeNode); }
public void Insert(Token.Character characterToken) { Node node; // characters in script and style go in as datanodes, not text nodes if (StringUtil.In(CurrentElement.TagName(), "script", "style")) { node = new DataNode(characterToken.Data.ToString(), _baseUri); } else { node = new TextNode(characterToken.Data.ToString(), _baseUri); } CurrentElement.AppendChild(node); // doesn't use insertNode, because we don't foster these; and will always have a stack. }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype()) { tb.Error(this); return false; } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("html")) { return tb.Process(t, InBody); } else if (t.IsEndTag() && t.AsEndTag().Name().Equals("html")) { tb.Transition(AfterAfterFrameset); } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("noframes")) { return tb.Process(t, InHead); } else if (t.IsEOF()) { // cool your heels, we're complete } else { tb.Error(this); return false; } return true; }
public override bool Process(Token t, TreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); } else if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype()) { tb.Error(this); } else if (t.IsStartTag()) { Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name(); if (name.Equals("html")) { return tb.Process(t, InBody); } else if (name.Equals("body")) { tb.Insert(startTag); tb.FramesetOk(false); tb.Transition(InBody); } else if (name.Equals("frameset")) { tb.Insert(startTag); tb.Transition(InFrameset); } else if (StringUtil.In(name, "base", "basefont", "bgsound", "link", "meta", "noframes", "script", "style", "title")) { tb.Error(this); Element head = tb.HeadElement; tb.Push(head); tb.Process(t, InHead); tb.RemoveFromStack(head); } else if (name.Equals("head")) { tb.Error(this); return false; } else { AnythingElse(t, tb); } } else if (t.IsEndTag()) { if (StringUtil.In(t.AsEndTag().Name(), "body", "html")) { AnythingElse(t, tb); } else { tb.Error(this); return false; } } else { AnythingElse(t, tb); } return true; }
public override bool Process(Token t, TreeBuilder tb) { if (t.IsComment()) { tb.Insert(t.AsComment()); } else if (t.IsDoctype() || IsWhitespace(t) || (t.IsStartTag() && t.AsStartTag().Name().Equals("html"))) { return tb.Process(t, InBody); } else if (t.IsEOF()) { // nice work chuck } else if (t.IsStartTag() && t.AsStartTag().Name().Equals("nofrmes")) { return tb.Process(t, InHead); } else { tb.Error(this); tb.Transition(InBody); return tb.Process(t); } return true; }
public void Insert(Token.Character characterToken) { Node node = new TextNode(characterToken.Data.ToString(), _baseUri); InsertNode(node); }
private bool AnythingElse(Token t, TreeBuilder tb) { tb.Process(new Token.StartTag("body")); tb.FramesetOk(true); return tb.Process(t); }
/** * If the stack contains an element with this tag's name, pop up the stack to remove the first occurrence. If not * found, skips. * * @param endTag */ private void PopStackToClose(Token.EndTag endTag) { string elName = endTag.Name(); Element firstFound = null; IEnumerator<Element> it = _stack.GetDescendingEnumerator(); while (it.MoveNext()) { Element next = it.Current; if (next.NodeName.Equals(elName)) { firstFound = next; break; } } if (firstFound == null) { return; // not found, skip } it = _stack.GetDescendingEnumerator(); List<Element> remove = new List<Element>(); while (it.MoveNext()) { Element next = it.Current; if (next == firstFound) { remove.Add(next); break; } else { remove.Add(next); } } foreach (Element item in remove) { _stack.Remove(item); } }
public override bool Process(Token t, TreeBuilder tb) { switch (t.Type) { case Token.TokenType.Character: Token.Character c = t.AsCharacter(); if (c.Data.Equals(_nullString)) { // todo confirm that check tb.Error(this); return false; } else if (IsWhitespace(c)) { tb.ReconstructFormattingElements(); tb.Insert(c); } else { tb.ReconstructFormattingElements(); tb.Insert(c); tb.FramesetOk(false); } break; case Token.TokenType.Comment: tb.Insert(t.AsComment()); break; case Token.TokenType.Doctype: tb.Error(this); return false; case Token.TokenType.StartTag: Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name(); if (name.Equals("html")) { tb.Error(this); // merge attributes onto real html Element html = tb.Stack.First.Value; foreach (NSoup.Nodes.Attribute attribute in startTag.Attributes) { if (!html.HasAttr(attribute.Key)) { html.Attributes.Add(attribute); } } } else if (StringUtil.In(name, "base", "basefont", "bgsound", "command", "link", "meta", "noframes", "script", "style", "title")) { return tb.Process(t, InHead); } else if (name.Equals("body")) { tb.Error(this); LinkedList<Element> stack = tb.Stack; if (stack.Count == 1 || (stack.Count > 2 && !stack.ElementAt(1).NodeName.Equals("body"))) { // only in fragment case return false; // ignore } else { tb.FramesetOk(false); Element body = stack.ElementAt(1); foreach (NSoup.Nodes.Attribute attribute in startTag.Attributes) { if (!body.HasAttr(attribute.Key)) { body.Attributes.Add(attribute); } } } } else if (name.Equals("frameset")) { tb.Error(this); LinkedList<Element> stack = tb.Stack; if (stack.Count == 1 || (stack.Count > 2 && !stack.ElementAt(1).NodeName.Equals("body"))) { // only in fragment case return false; // ignore } else if (!tb.FramesetOk()) { return false; // ignore frameset } else { Element second = stack.ElementAt(1); if (second.Parent != null) second.Remove(); // pop up to html element while (stack.Count > 1) { stack.RemoveLast(); } tb.Insert(startTag); tb.Transition(InFrameset); } } else if (StringUtil.In(name, "address", "article", "aside", "blockquote", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "menu", "nav", "ol", "p", "section", "summary", "ul")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); } else if (StringUtil.In(name, "h1", "h2", "h3", "h4", "h5", "h6")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } if (StringUtil.In(tb.CurrentElement.NodeName, "h1", "h2", "h3", "h4", "h5", "h6")) { tb.Error(this); tb.Pop(); } tb.Insert(startTag); } else if (StringUtil.In(name, "pre", "listing")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); // todo: ignore LF if next token tb.FramesetOk(false); } else if (name.Equals("form")) { if (tb.FormElement != null) { tb.Error(this); return false; } if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } Element form = tb.Insert(startTag); tb.FormElement = form; } else if (name.Equals("li")) { tb.FramesetOk(false); LinkedList<Element> stack = tb.Stack; for (int i = stack.Count - 1; i > 0; i--) { Element el = stack.ElementAt(i); if (el.NodeName.Equals("li")) { tb.Process(new Token.EndTag("li")); break; } if (tb.IsSpecial(el) && !StringUtil.In(el.NodeName, "address", "div", "p")) break; } if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); } else if (StringUtil.In(name, "dd", "dt")) { tb.FramesetOk(false); LinkedList<Element> stack = tb.Stack; for (int i = stack.Count - 1; i > 0; i--) { Element el = stack.ElementAt(i); if (StringUtil.In(el.NodeName, "dd", "dt")) { tb.Process(new Token.EndTag(el.NodeName)); break; } if (tb.IsSpecial(el) && !StringUtil.In(el.NodeName, "address", "div", "p")) { break; } } if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); } else if (name.Equals("plaintext")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); tb.Tokeniser.Transition(TokeniserState.PlainText); // once in, never gets out } else if (name.Equals("button")) { if (tb.InButtonScope("button")) { // close and reprocess tb.Error(this); tb.Process(new Token.EndTag("button")); tb.Process(startTag); } else { tb.ReconstructFormattingElements(); tb.Insert(startTag); tb.FramesetOk(false); } } else if (name.Equals("a")) { if (tb.GetActiveFormattingElement("a") != null) { tb.Error(this); tb.Process(new Token.EndTag("a")); // still on stack? Element remainingA = tb.GetFromStack("a"); if (remainingA != null) { tb.RemoveFromActiveFormattingElements(remainingA); tb.RemoveFromStack(remainingA); } } tb.ReconstructFormattingElements(); Element a = tb.Insert(startTag); tb.PushActiveFormattingElements(a); } else if (StringUtil.In(name, "b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) { tb.ReconstructFormattingElements(); Element el = tb.Insert(startTag); tb.PushActiveFormattingElements(el); } else if (name.Equals("nobr")) { tb.ReconstructFormattingElements(); if (tb.InScope("nobr")) { tb.Error(this); tb.Process(new Token.EndTag("nobr")); tb.ReconstructFormattingElements(); } Element el = tb.Insert(startTag); tb.PushActiveFormattingElements(el); } else if (StringUtil.In(name, "applet", "marquee", "object")) { tb.ReconstructFormattingElements(); tb.Insert(startTag); tb.InsertMarkerToFormattingElements(); tb.FramesetOk(false); } else if (name.Equals("table")) { if (tb.Document.QuirksMode() != Document.QuirksModeEnum.Quirks && tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.Insert(startTag); tb.FramesetOk(false); tb.Transition(InTable); } else if (StringUtil.In(name, "area", "br", "embed", "img", "keygen", "wbr")) { tb.ReconstructFormattingElements(); tb.InsertEmpty(startTag); tb.FramesetOk(false); } else if (name.Equals("input")) { tb.ReconstructFormattingElements(); Element el = tb.InsertEmpty(startTag); if (!el.Attr("type").Equals("hidden", StringComparison.InvariantCultureIgnoreCase)) { tb.FramesetOk(false); } } else if (StringUtil.In(name, "param", "source", "track")) { tb.InsertEmpty(startTag); } else if (name.Equals("hr")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.InsertEmpty(startTag); tb.FramesetOk(false); } else if (name.Equals("image")) { // we're not supposed to ask. startTag.Name("img"); return tb.Process(startTag); } else if (name.Equals("isindex")) { // how much do we care about the early 90s? tb.Error(this); if (tb.FormElement != null) { return false; } tb.Tokeniser.AcknowledgeSelfClosingFlag(); tb.Process(new Token.StartTag("form")); if (startTag.Attributes.ContainsKey("action")) { Element form = tb.FormElement; form.Attr("action", startTag.Attributes["action"]); } tb.Process(new Token.StartTag("hr")); tb.Process(new Token.StartTag("label")); // hope you like english. string prompt = startTag.Attributes.ContainsKey("prompt") ? startTag.Attributes["prompt"] : "This is a searchable index. Enter search keywords: "; tb.Process(new Token.Character(prompt)); // input Attributes inputAttribs = new Attributes(); foreach (NSoup.Nodes.Attribute attr in startTag.Attributes) { if (!StringUtil.In(attr.Key, "name", "action", "prompt")) { inputAttribs.Add(attr); } } inputAttribs["name"] = "isindex"; tb.Process(new Token.StartTag("input", inputAttribs)); tb.Process(new Token.EndTag("label")); tb.Process(new Token.StartTag("hr")); tb.Process(new Token.EndTag("form")); } else if (name.Equals("textarea")) { tb.Insert(startTag); // todo: If the next token is a U+000A LINE FEED (LF) character token, then ignore that token and move on to the next one. (Newlines at the start of textarea elements are ignored as an authoring convenience.) tb.Tokeniser.Transition(TokeniserState.RcData); tb.MarkInsertionMode(); tb.FramesetOk(false); tb.Transition(Text); } else if (name.Equals("xmp")) { if (tb.InButtonScope("p")) { tb.Process(new Token.EndTag("p")); } tb.ReconstructFormattingElements(); tb.FramesetOk(false); HandleRawText(startTag, tb); } else if (name.Equals("iframe")) { tb.FramesetOk(false); HandleRawText(startTag, tb); } else if (name.Equals("noembed")) { // also handle noscript if script enabled HandleRawText(startTag, tb); } else if (name.Equals("select")) { tb.ReconstructFormattingElements(); tb.Insert(startTag); tb.FramesetOk(false); TreeBuilderState state = tb.State; if (state.Equals(InTable) || state.Equals(InCaption) || state.Equals(InTableBody) || state.Equals(InRow) || state.Equals(InCell)) { tb.Transition(InSelectInTable); } else { tb.Transition(InSelect); } } else if (StringUtil.In("optgroup", "option")) { if (tb.CurrentElement.NodeName.Equals("option")) { tb.Process(new Token.EndTag("option")); } tb.ReconstructFormattingElements(); tb.Insert(startTag); } else if (StringUtil.In("rp", "rt")) { if (tb.InScope("ruby")) { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals("ruby")) { tb.Error(this); tb.PopStackToBefore("ruby"); // i.e. close up to but not include name } tb.Insert(startTag); } } else if (name.Equals("math")) { tb.ReconstructFormattingElements(); // todo: handle A start tag whose tag name is "math" (i.e. foreign, mathml) tb.Insert(startTag); tb.Tokeniser.AcknowledgeSelfClosingFlag(); } else if (name.Equals("svg")) { tb.ReconstructFormattingElements(); // todo: handle A start tag whose tag name is "svg" (xlink, svg) tb.Insert(startTag); tb.Tokeniser.AcknowledgeSelfClosingFlag(); } else if (StringUtil.In(name, "caption", "col", "colgroup", "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr")) { tb.Error(this); return false; } else { tb.ReconstructFormattingElements(); tb.Insert(startTag); } break; case Token.TokenType.EndTag: Token.EndTag endTag = t.AsEndTag(); name = endTag.Name(); if (name.Equals("body")) { if (!tb.InScope("body")) { tb.Error(this); return false; } else { // todo: error if stack contains something not dd, dt, li, optgroup, option, p, rp, rt, tbody, td, tfoot, th, thead, tr, body, html tb.Transition(AfterBody); } } else if (name.Equals("html")) { bool notIgnored = tb.Process(new Token.EndTag("body")); if (notIgnored) { return tb.Process(endTag); } } else if (StringUtil.In(name, "address", "article", "aside", "blockquote", "button", "center", "details", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "listing", "menu", "nav", "ol", "pre", "section", "summary", "ul")) { // todo: refactor these lookups if (!tb.InScope(name)) { // nothing to close tb.Error(this); return false; } else { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (name.Equals("form")) { Element currentForm = tb.FormElement; tb.FormElement = null; if (currentForm == null || !tb.InScope(name)) { tb.Error(this); return false; } else { tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } // remove currentForm from stack. will shift anything under up. tb.RemoveFromStack(currentForm); } } else if (name.Equals("p")) { if (!tb.InButtonScope(name)) { tb.Error(this); tb.Process(new Token.StartTag(name)); // if no p to close, creates an empty <p></p> return tb.Process(endTag); } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (name.Equals("li")) { if (!tb.InListItemScope(name)) { tb.Error(this); return false; } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (StringUtil.In(name, "dd", "dt")) { if (!tb.InScope(name)) { tb.Error(this); return false; } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); } } else if (StringUtil.In(name, "h1", "h2", "h3", "h4", "h5", "h6")) { if (!tb.InScope(new string[] { "h1", "h2", "h3", "h4", "h5", "h6" })) { tb.Error(this); return false; } else { tb.GenerateImpliedEndTags(name); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose("h1", "h2", "h3", "h4", "h5", "h6"); } } else if (name.Equals("sarcasm")) { // *sigh* return AnyOtherEndTag(t, tb); } else if (StringUtil.In(name, "a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) { // Adoption Agency Algorithm. //OUTER: for (int i = 0; i < 8; i++) { Element formatEl = tb.GetActiveFormattingElement(name); if (formatEl == null) { return AnyOtherEndTag(t, tb); } else if (!tb.OnStack(formatEl)) { tb.Error(this); tb.RemoveFromActiveFormattingElements(formatEl); return true; } else if (!tb.InScope(formatEl.NodeName)) { tb.Error(this); return false; } else if (tb.CurrentElement != formatEl) { tb.Error(this); } Element furthestBlock = null; Element commonAncestor = null; bool seenFormattingElement = false; LinkedList<Element> stack = tb.Stack; for (int si = 0; si < stack.Count; si++) { Element el = stack.ElementAt(si); if (el == formatEl) { commonAncestor = stack.ElementAt(si - 1); seenFormattingElement = true; } else if (seenFormattingElement && tb.IsSpecial(el)) { furthestBlock = el; break; } } if (furthestBlock == null) { tb.PopStackToClose(formatEl.NodeName); tb.RemoveFromActiveFormattingElements(formatEl); return true; } // todo: Let a bookmark note the position of the formatting element in the list of active formatting elements relative to the elements on either side of it in the list. // does that mean: int pos of format el in list? Element node = furthestBlock; Element lastNode = furthestBlock; for (int j = 0; j < 3; j++) { if (tb.OnStack(node)) node = tb.AboveOnStack(node); if (!tb.IsInActiveFormattingElements(node)) { // note no bookmark check tb.RemoveFromStack(node); continue; } else if (node == formatEl) { break; } Element replacement = new Element(Tag.ValueOf(node.NodeName), tb.BaseUri); tb.ReplaceActiveFormattingElement(node, replacement); tb.ReplaceOnStack(node, replacement); node = replacement; if (lastNode == furthestBlock) { // todo: move the aforementioned bookmark to be immediately after the new node in the list of active formatting elements. // not getting how this bookmark both straddles the element above, but is inbetween here... } if (lastNode.Parent != null) { lastNode.Remove(); } node.AppendChild(lastNode); lastNode = node; } if (StringUtil.In(commonAncestor.NodeName, "table", "tbody", "tfoot", "thead", "tr")) { if (lastNode.Parent != null) { lastNode.Remove(); } tb.InsertInFosterParent(lastNode); } else { if (lastNode.Parent != null) { lastNode.Remove(); } commonAncestor.AppendChild(lastNode); } Element adopter = new Element(Tag.ValueOf(name), tb.BaseUri); Node[] childNodes = furthestBlock.ChildNodes.ToArray(); foreach (Node childNode in childNodes) { adopter.AppendChild(childNode); // append will reparent. thus the clone to avvoid concurrent mod. } furthestBlock.AppendChild(adopter); tb.RemoveFromActiveFormattingElements(formatEl); // todo: insert the new element into the list of active formatting elements at the position of the aforementioned bookmark. tb.RemoveFromStack(formatEl); tb.InsertOnStackAfter(furthestBlock, adopter); } } else if (StringUtil.In(name, "applet", "marquee", "object")) { if (!tb.InScope("name")) { if (!tb.InScope(name)) { tb.Error(this); return false; } tb.GenerateImpliedEndTags(); if (!tb.CurrentElement.NodeName.Equals(name)) { tb.Error(this); } tb.PopStackToClose(name); tb.ClearFormattingElementsToLastMarker(); } } else if (name.Equals("br")) { tb.Error(this); tb.Process(new Token.StartTag("br")); return false; } else { return AnyOtherEndTag(t, tb); } break; case Token.TokenType.EOF: // todo: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html // stop parsing break; default: break; } return true; }