public override bool Process(Token t, HtmlTreeBuilder tb) { switch (t.Type) { case TokenType.Character: Token.Character c = t.AsCharacter(); if (c.Data.Equals(NullString)) { // TODO: confirm that check tb.Error(this); return(false); } else if (IsWhitespace(c)) { tb.ReconstructFormattingElements(); tb.Insert(c); } else { tb.ReconstructFormattingElements(); tb.Insert(c); tb.FramesetOK = false; } break; case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); return(false); case TokenType.StartTag: bool?result = HandleStartTag(t, tb); if (result.HasValue) { return(result.Value); } break; case TokenType.EndTag: result = HandleEndTag(t, tb); if (result.HasValue) { return(result.Value); } break; case TokenType.EOF: // TODO: error if stack contains something not dd, dt, li, p, tbody, td, tfoot, th, thead, tr, body, html // stop parsing break; } return(true); }
public override bool Process(Token token) { // start tag, end tag, doctype, comment, character, eof switch (token.Type) { case TokenType.StartTag: Insert(token.AsStartTag()); break; case TokenType.EndTag: PopStackToClose(token.AsEndTag()); break; case TokenType.Comment: Insert(token.AsComment()); break; case TokenType.Character: Insert(token.AsCharacter()); break; case TokenType.Doctype: Insert(token.AsDoctype()); break; case TokenType.EOF: // could put some normalisation here if desired break; default: HtmlWarning.UnexpectedTokenType(); break; } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { switch (t.Type) { case TokenType.Character: Token.Character c = t.AsCharacter(); if (c.Data.Equals(NullString)) { tb.Error(this); return(false); } else { tb.GetPendingTableCharacters().Add(c); } break; default: if (tb.GetPendingTableCharacters().Count > 0) { foreach (Token.Character char2 in tb.GetPendingTableCharacters()) { if (!IsWhitespace(char2)) { // InTable anything else section: tb.Error(this); if (StringUtil.In(tb.CurrentElement.NodeName, "table", "tbody", "tfoot", "thead", "tr")) { tb.SetFosterInserts(true); tb.Process(char2, InBody); tb.SetFosterInserts(false); } else { tb.Process(char2, InBody); } } else { tb.Insert(char2); } } tb.NewPendingTableCharacters(); } tb.Transition(tb.OriginalState); return(tb.Process(t)); } return(true); }
private static bool IsWhitespace(Token t) { if (t.IsCharacter) { string data = t.AsCharacter().Data; // TODO: this checks more than spec - "\t", "\n", "\f", "\r", " " foreach (char c in data) { if (!StringUtil.IsWhitespace(c)) { return(false); } } return(true); } return(false); }
// in script, style etc. normally treated as data tags public override bool Process(Token t, HtmlTreeBuilder tb) { if (t.IsCharacter) { tb.Insert(t.AsCharacter()); } else if (t.IsEOF) { tb.Error(this); // if current node is script: already started tb.Pop(); tb.Transition(tb.OriginalState); return(tb.Process(t)); } else if (t.IsEndTag) { // if: An end tag whose tag name is "script" -- scripting nesting level, if evaluating scripts tb.Pop(); tb.Transition(tb.OriginalState); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { switch (t.Type) { case TokenType.Character: Token.Character c = t.AsCharacter(); if (c.Data.Equals(NullString)) { tb.Error(this); return(false); } else { tb.Insert(c); } break; case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); return(false); case TokenType.StartTag: Token.StartTag start = t.AsStartTag(); string name = start.Name; if (name.Equals("html")) { return(tb.Process(start, InBody)); } else if (name.Equals("option")) { tb.Process(new Token.EndTag("option")); tb.Insert(start); } else if (name.Equals("optgroup")) { if (tb.CurrentElement.NodeName.Equals("option")) { tb.Process(new Token.EndTag("option")); } else if (tb.CurrentElement.NodeName.Equals("optgroup")) { tb.Process(new Token.EndTag("optgroup")); } tb.Insert(start); } else if (name.Equals("select")) { tb.Error(this); return(tb.Process(new Token.EndTag("select"))); } else if (StringUtil.In(name, "input", "keygen", "textarea")) { tb.Error(this); if (!tb.InSelectScope("select")) { return(false); // frag } tb.Process(new Token.EndTag("select")); return(tb.Process(start)); } else if (name.Equals("script")) { return(tb.Process(t, InHead)); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag end = t.AsEndTag(); name = end.Name; if (name.Equals("optgroup")) { if (tb.CurrentElement.NodeName.Equals("option") && tb.AboveOnStack(tb.CurrentElement) != null && tb.AboveOnStack(tb.CurrentElement).NodeName.Equals("optgroup")) { tb.Process(new Token.EndTag("option")); } if (tb.CurrentElement.NodeName.Equals("optgroup")) { tb.Pop(); } else { tb.Error(this); } } else if (name.Equals("option")) { if (tb.CurrentElement.NodeName.Equals("option")) { tb.Pop(); } else { tb.Error(this); } } else if (name.Equals("select")) { if (!tb.InSelectScope(name)) { tb.Error(this); return(false); } else { tb.PopStackToClose(name); tb.ResetInsertionMode(); } } else { return(AnythingElse(t, tb)); } break; case TokenType.EOF: if (!tb.CurrentElement.NodeName.Equals("html")) { tb.Error(this); } break; default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); return(true); } switch (t.Type) { case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); break; case TokenType.StartTag: Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; if (name.Equals("html")) { return(tb.Process(t, InBody)); } else if (name.Equals("col")) { tb.InsertEmpty(startTag); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag endTag = t.AsEndTag(); name = endTag.Name; if (name.Equals("colgroup")) { if (tb.CurrentElement.NodeName.Equals("html")) // frag case { tb.Error(this); return(false); } else { tb.Pop(); tb.Transition(InTable); } } else { return(AnythingElse(t, tb)); } break; case TokenType.EOF: if (tb.CurrentElement.NodeName.Equals("html")) { return(true); // stop parsing; frag case } else { return(AnythingElse(t, tb)); } default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); } else if (t.IsComment) { tb.Insert(t.AsComment()); } else if (t.IsDoctype) { tb.Error(this); return(false); } else if (t.IsStartTag) { Token.StartTag start = t.AsStartTag(); switch (start.Name) { case "html": return(tb.Process(start, InBody)); case "frameset": tb.Insert(start); break; case "frame": tb.InsertEmpty(start); break; case "noframes": return(tb.Process(start, InHead)); default: tb.Error(this); return(false); } } else if (t.IsEndTag && t.AsEndTag().Name.Equals("frameset")) { if (tb.CurrentElement.NodeName.Equals("html")) // frag { tb.Error(this); return(false); } else { tb.Pop(); if (!tb.IsFragmentParsing() && !tb.CurrentElement.NodeName.Equals("frameset")) { tb.Transition(AfterFrameset); } } } else if (t.IsEOF) { if (!tb.CurrentElement.NodeName.Equals("html")) { tb.Error(this); return(true); } } else { tb.Error(this); return(false); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); return(true); } switch (t.Type) { case TokenType.Comment: tb.Insert(t.AsComment()); break; case TokenType.Doctype: tb.Error(this); return(false); case TokenType.StartTag: Token.StartTag start = t.AsStartTag(); string name = start.Name; if (name.Equals("html")) { return(InBody.Process(t, tb)); } else if (StringUtil.In(name, "base", "basefont", "bgsound", "command", "link")) { HtmlElement el = tb.InsertEmpty(start); // jsoup special: update base the frist time it is seen if (name.Equals("base") && el.HasAttribute("href")) { tb.MaybeSetBaseUri(el); } } else if (name.Equals("meta")) { HtmlElement meta = tb.InsertEmpty(start); // TODO: charset switches } else if (name.Equals("title")) { HandleRcData(start, tb); } else if (StringUtil.In(name, "noframes", "style")) { HandleRawtext(start, tb); } else if (name.Equals("noscript")) { // else if noscript && scripting flag = true: rawtext (jsoup doesn't run script, to handle as noscript) tb.Insert(start); tb.Transition(InHeadNoscript); } else if (name.Equals("script")) { // skips some script rules as won't execute them tb.Insert(start); tb.tokeniser.Transition(TokeniserState.ScriptData); tb.MarkInsertionMode(); tb.Transition(Text); } else if (name.Equals("head")) { tb.Error(this); return(false); } else { return(AnythingElse(t, tb)); } break; case TokenType.EndTag: Token.EndTag end = t.AsEndTag(); name = end.Name; if (name.Equals("head")) { tb.Pop(); tb.Transition(AfterHead); } else if (StringUtil.In(name, "body", "html", "br")) { return(AnythingElse(t, tb)); } else { tb.Error(this); return(false); } break; default: return(AnythingElse(t, tb)); } return(true); }
public override bool Process(Token t, HtmlTreeBuilder tb) { if (IsWhitespace(t)) { tb.Insert(t.AsCharacter()); } else if (t.IsComment) { tb.Insert(t.AsComment()); } else if (t.IsDoctype) { tb.Error(this); } else if (t.IsStartTag) { Token.StartTag startTag = t.AsStartTag(); string name = startTag.Name; switch (name) { case "html": return(tb.Process(t, InBody)); case "body": tb.Insert(startTag); tb.FramesetOK = false; tb.Transition(InBody); break; case "frameset": tb.Insert(startTag); tb.Transition(InFrameset); break; case "base": case "basefont": case "bgsound": case "link": case "meta": case "noframes": case "script": case "style": case "title": tb.Error(this); HtmlElement head = tb.HeadElement; tb.Push(head); tb.Process(t, InHead); tb.RemoveFromStack(head); break; case "head": tb.Error(this); return(false); default: AnythingElse(t, tb); break; } } else if (t.IsEndTag) { if (StringUtil.In(t.AsEndTag().Name, "body", "html")) { AnythingElse(t, tb); } else { tb.Error(this); return(false); } } else { AnythingElse(t, tb); } return(true); }