public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc) { if (IsWhitespace(token)) { InsertCharacter((CharacterToken)token, doc); return this; } CommentToken commentToken = token as CommentToken; if (commentToken != null) { InsertComment(commentToken, doc); return this; } if (token is DocTypeToken) { ReportParseError(); return this; } StartTagToken startTagToken = token as StartTagToken; if (startTagToken != null) { switch (startTagToken.TagName) { case "html": //TODO - Process the token using the rules for the "in body" insertion mode. break; case "base": case "basefont": case "bgsound": case "link": //TODO - Insert an HTML element for the token. Immediately pop the current node off the stack of open elements. //TODO - Acknowledge the token's self-closing flag, if it is set. break; case "meta": //TODO - Insert an HTML element for the token. Immediately pop the current node off the stack of open elements. //TODO - Acknowledge the token's self-closing flag, if it is set. //TODO - If the element has a charset attribute, and getting an encoding from its value results in a supported ASCII-compatible character encoding or a UTF-16 encoding, and the confidence is currently tentative, then change the encoding to the resulting encoding. //TODO - Otherwise, if the element has an http-equiv attribute whose value is an ASCII case-insensitive match for the string "Content-Type", and the element has a content attribute, and applying the algorithm for extracting a character encoding from a meta element to that attribute's value returns a supported ASCII-compatible character encoding or a UTF-16 encoding, and the confidence is currently tentative, then change the encoding to the extracted encoding. break; case "title": //Follow the generic RCDATA element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-rcdata-element-parsing-algorithm) //IHTMLTitleElement titleElement = (IHTMLTitleElement)HtmlElementFactory.Instance.CreateElement("title", doc); InsertHtmlElement(startTagToken, doc); //doc.appendChild(titleElement); // TODO - Follow insert HTML element algorithm (http://www.w3.org/TR/html5/syntax.html#insert-an-html-element) tokenizer.SetNextState(RCDATAState.Instance); TreeConstruction.Instance.SaveCurrentInsertionModeState(); return TextInsertionModeState.Instance; case "noframes": case "style": //TODO - Follow the generic raw text element parsing algorithm. break; case "noscript": //TODO - If scripting flag (http://www.w3.org/TR/html5/syntax.html#scripting-flag) is enabled: //TODO - Follow the generic raw text element parsing algorithm. //TODO - Else, if the flag is disabled: //TODO - Insert an HTML element for the token. //TODO - return InHeadNoScriptInsertionModeState.Instance; break; case "script": //TODO - Run these steps: //TODO - 1. Let the adjusted insertion location be the appropriate place for inserting a node. //TODO - 2. Create an element for the token in the HTML namespace, with the intended parent being the element in which the adjusted insertion location finds itself. //TODO - 3. Mark the element as being "parser-inserted" and unset the element's "force-async" flag. //TODO - //TODO - NOTE: This ensures that, if the script is external, any document.write() calls in the script will execute in-line, instead of blowing the document away, //TODO - as would happen in most other cases. It also prevents the script from executing until the end tag is seen. //TODO - //TODO - 4. If the parser was originally created for the HTML fragment parsing algorithm, then mark the script element as "already started". (fragment case) //TODO - 5. Insert the newly created element at the adjusted insertion location. //TODO - 6. Push the element onto the stack of open elements so that it is the new current node. //TODO - 7. Switch the tokenizer to the script data state. //TODO - 8. Let the original insertion mode be the current insertion mode. // 9. Switch the insertion mode to "text". return TextInsertionModeState.Instance; case "template": //TODO - Insert an HTML element for the token. //TODO - Insert a marker at the end of the list of active formatting elements. //TODO - Set the frameset-ok flag to "not ok". //TODO - Switch the insertion mode to "in template". //TODO - Push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode. break; case "head": ReportParseError(); return this; } } EndTagToken endTagToken = token as EndTagToken; if (endTagToken != null) { switch (endTagToken.TagName) { case "head": //TODO - Pop the current node (which will be the head element) off the stack of open elements. //TODO - Switch the insertion mode to "after head". break; case "body": case "html": case "br": // Act as described in the "anything else" entry below. break; case "template": //TODO - If there is no template element on the stack of open elements, then this is a parse error; ignore the token. //TODO - Otherwise, run these steps: //TODO - 1. Generate implied end tags. //TODO - 2. If the current node is not a template element, then this is a parse error. //TODO - 3. Pop elements from the stack of open elements until a template element has been popped from the stack. //TODO - 4. Clear the list of active formatting elements up to the last marker. //TODO - 5. Pop the current template insertion mode off the stack of template insertion modes. //TODO - 6. Reset the insertion mode appropriately. break; } } // Anything else: // Pop the current node (which will be the head element) off the stack of open elements. // Switch the insertion mode to "after head". // Reprocess the token. TreeConstruction.Instance.StackOfOpenElements.Pop(); queue.EnqueueTokenForReprocessing(token); return AfterHeadInsertionModeState.Instance; }
public override BaseInsertionModeState ProcessToken(HtmlTokenizer tokenizer, ITokenQueue queue, BaseToken token, IDocument doc) { //TODO - PERFORMANCE! CharacterToken charToken = token as CharacterToken; if (charToken != null) { if (charToken.Character == 0) { ReportParseError(); return this; } if (IsWhitespace(charToken)) { //TODO - Reconstruct the active formatting elements, if any. (http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements) // Insert the token's character. (http://www.w3.org/TR/html5/syntax.html#insert-a-character) InsertCharacter(charToken, doc); return this; } //TODO - Reconstruct the active formatting elements, if any. (http://www.w3.org/TR/html5/syntax.html#reconstruct-the-active-formatting-elements) // Insert the token's character. (http://www.w3.org/TR/html5/syntax.html#insert-a-character) InsertCharacter(charToken, doc); //TODO - Set the frameset-ok flag to "not ok". (http://www.w3.org/TR/html5/syntax.html#frameset-ok-flag) return this; } CommentToken comment = token as CommentToken; if (comment != null) { InsertComment(comment, doc); return this; } if (token is DocTypeToken) { ReportParseError(); return this; } StartTagToken startTag = token as StartTagToken; EndTagToken endTag = token as EndTagToken; if (startTag != null && startTag.TagName == "html") { ReportParseError(); //TODO - If there is a template element on the stack of open elements, then ignore the token. //TODO - Otherwise, for each attribute on the token, check to see if the attribute is already //TODO - present on the top element of the stack of open elements. If it is not, add the attribute //TODO - and its corresponding value to that element. return this; } if ((startTag != null && ( startTag.TagName == "base" || startTag.TagName == "basefont" || startTag.TagName == "bgsound" || startTag.TagName == "link" || startTag.TagName == "meta" || startTag.TagName == "noframes" || startTag.TagName == "script" || startTag.TagName == "style" || startTag.TagName == "template" || startTag.TagName == "title")) || (endTag != null && endTag.TagName == "template")) { BaseInsertionModeState nextState = InHeadInsertionModeState.Instance.ProcessToken(tokenizer, queue, token, doc); if (nextState != InHeadInsertionModeState.Instance) { return nextState; } return this; } if (startTag != null && startTag.TagName == "body") { ReportParseError(); //TODO - If the second element on the stack of open elements is not a body element, if the stack of open elements //TODO - has only one node on it, or if there is a template element on the stack of open elements, then ignore the //TODO - token. (fragment case) //TODO - Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute on the token, check to see if //TODO - the attribute is already present on the body element (the second element) on the stack of open elements, //TODO - and if it is not, add the attribute and its corresponding value to that element. return this; } if (startTag != null && startTag.TagName == "frameset") { ReportParseError(); //TODO - If the stack of open elements has only one node on it, or if the second element on the stack of open //TODO - elements is not a body element, then ignore the token. (fragment case) //TODO - If the frameset-ok flag is set to "not ok", ignore the token. //TODO - Otherwise, run the following steps: //TODO - 1. Remove the second element on the stack of open elements from its parent node, if it has one. //TODO - 2. Pop all the nodes from the bottom of the stack of open elements, from the current node up to, //TODO - but not including, the root html element. //TODO - 3. Insert an HTML element for the token. //TODO - 4. Switch the insertion mode to "in frameset". return this; } if (token is EndOfFileToken) { //TODO - If there is a node in the stack of open elements that is not either a dd element, a dt element, //TODO - an li element, a p element, a tbody element, a td element, a tfoot element, a th element, //TODO - a thead element, a tr element, the body element, or the html element, then this is a parse error. //TODO - If the stack of template insertion modes is not empty, then process the token using the rules for //TODO - the "in template" insertion mode. // Otherwise, stop parsing. base.StopParsing(); return null; } if (endTag != null && endTag.TagName == "body") { return ProcessEndTagBodyOrHtml(); } if (endTag != null && endTag.TagName == "html") { BaseInsertionModeState nextState = ProcessEndTagBodyOrHtml(); queue.EnqueueTokenForReprocessing(token); return nextState; } if (startTag != null && ( startTag.TagName == "address" || startTag.TagName == "article" || startTag.TagName == "aside" || startTag.TagName == "blockquote" || startTag.TagName == "center" || startTag.TagName == "details" || startTag.TagName == "dialog" || startTag.TagName == "dir" || startTag.TagName == "div" || startTag.TagName == "dl" || startTag.TagName == "fieldset" || startTag.TagName == "figcaption" || startTag.TagName == "figure" || startTag.TagName == "footer" || startTag.TagName == "header" || startTag.TagName == "hgroup" || startTag.TagName == "main" || startTag.TagName == "nav" || startTag.TagName == "ol" || startTag.TagName == "p" || startTag.TagName == "section" || startTag.TagName == "summary" || startTag.TagName == "ul")) { //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element) // Insert an HTML element for the token. InsertHtmlElement(startTag, doc); return this; } if (startTag != null && ( startTag.TagName == "h1" || startTag.TagName == "h2" || startTag.TagName == "h3" || startTag.TagName == "h4" || startTag.TagName == "h5" || startTag.TagName == "h6")) { //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element) //TODO - If the current node is an HTML element whose tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", //TODO - then this is a parse error; pop the current node off the stack of open elements. InsertHtmlElement(startTag, doc); return this; } if (startTag != null && ( startTag.TagName == "pre" || startTag.TagName == "listing")) { //TODO return this; } if (startTag != null && startTag.TagName == "form") { //TODO return this; } if (startTag != null && startTag.TagName == "li") { //TODO return this; } if (startTag != null && ( startTag.TagName == "dd" || startTag.TagName == "dt")) { //TODO return this; } if (startTag != null && startTag.TagName == "plaintext") { //TODO - If the stack of open elements has a p element in button scope, then close a p element. (http://www.w3.org/TR/html5/syntax.html#close-a-p-element) // Insert an HTML element for the token. InsertHtmlElement(startTag, doc); // Switch the tokenizer to the PLAINTEXT state. tokenizer.SetNextState(WebEngineSharp.Tokenizer.States.PLAINTEXTState.Instance); //TODO - NOTE: Once a start tag with the tag name "plaintext" has been seen, that will be the last //TODO - token ever seen other than character tokens (and the end-of-file token), because there is //TODO - no way to switch out of the PLAINTEXT state. return this; } if (startTag != null && startTag.TagName == "button") { //TODO return this; } if (endTag != null && ( endTag.TagName == "address" || endTag.TagName == "article" || endTag.TagName == "aside" || endTag.TagName == "blockquote" || endTag.TagName == "button" || endTag.TagName == "center" || endTag.TagName == "details" || endTag.TagName == "dialog" || endTag.TagName == "dir" || endTag.TagName == "div" || endTag.TagName == "dl" || endTag.TagName == "fieldset" || endTag.TagName == "figcaption" || endTag.TagName == "figure" || endTag.TagName == "footer" || endTag.TagName == "header" || endTag.TagName == "hgroup" || endTag.TagName == "listing" || endTag.TagName == "main" || endTag.TagName == "nav" || endTag.TagName == "ol" || endTag.TagName == "pre" || endTag.TagName == "section" || endTag.TagName == "summary" || endTag.TagName == "ul")) { //TODO return this; } if (endTag != null && endTag.TagName == "form") { //TODO return this; } if (endTag != null && endTag.TagName == "p") { //TODO return this; } if (endTag != null && endTag.TagName == "li") { //TODO return this; } if (endTag != null && ( endTag.TagName == "dd" || endTag.TagName == "dt")) { //TODO return this; } if (endTag != null && ( endTag.TagName == "h1" || endTag.TagName == "h2" || endTag.TagName == "h3" || endTag.TagName == "h4" || endTag.TagName == "h5" || endTag.TagName == "h6")) { //TODO return this; } if (endTag != null && endTag.TagName == "sarcasm") { //TODO - Take a deep breath, then act as described in the "any other end tag" entry below. return this; } if (startTag != null && startTag.TagName == "a") { //TODO return this; } if (startTag != null && ( startTag.TagName == "b" || startTag.TagName == "big" || startTag.TagName == "code" || startTag.TagName == "em" || startTag.TagName == "font" || startTag.TagName == "i" || startTag.TagName == "s" || startTag.TagName == "small" || startTag.TagName == "strike" || startTag.TagName == "strong" || startTag.TagName == "tt" || startTag.TagName == "u")) { //TODO return this; } if (startTag != null && startTag.TagName == "nobr") { //TODO return this; } if (endTag != null && ( endTag.TagName == "a" || endTag.TagName == "b" || endTag.TagName == "big" || endTag.TagName == "code" || endTag.TagName == "em" || endTag.TagName == "font" || endTag.TagName == "i" || endTag.TagName == "nobr" || endTag.TagName == "s" || endTag.TagName == "small" || endTag.TagName == "strike" || endTag.TagName == "strong" || endTag.TagName == "tt" || endTag.TagName == "u")) { //TODO - Run the adoption agency algorithm (http://www.w3.org/TR/html5/syntax.html#adoption-agency-algorithm) for the token's tag name. return this; } if (startTag != null && ( startTag.TagName == "applet" || startTag.TagName == "marquee" || startTag.TagName == "object")) { //TODO return this; } if (endTag != null && ( endTag.TagName == "applet" || endTag.TagName == "marquee" || endTag.TagName == "object")) { //TODO return this; } if (startTag != null && startTag.TagName == "table") { //TODO return this; } if (endTag != null && endTag.TagName == "br") { //TODO return this; } if (startTag != null && ( startTag.TagName == "area" || startTag.TagName == "br" || startTag.TagName == "embed" || startTag.TagName == "img" || startTag.TagName == "keygen" || startTag.TagName == "wbr")) { //TODO return this; } if (startTag != null && startTag.TagName == "input") { //TODO return this; } if (startTag != null && ( startTag.TagName == "param" || startTag.TagName == "source" || startTag.TagName == "track")) { //TODO return this; } if (startTag != null && startTag.TagName == "hr") { //TODO return this; } if (startTag != null && startTag.TagName == "image") { //Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.) ReportParseError(); startTag.TagName = "img"; queue.EnqueueTokenForReprocessing(startTag); return this; } if (startTag != null && startTag.TagName == "isindex") { //TODO return this; } if (startTag != null && startTag.TagName == "textarea") { //TODO return this; } if (startTag != null && startTag.TagName == "xmp") { //TODO return this; } if (startTag != null && startTag.TagName == "iframe") { //TODO - Set the frameset-ok flag (http://www.w3.org/TR/html5/syntax.html#frameset-ok-flag) to "not ok". //TODO - Follow the generic raw text element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm) return this; } if (startTag != null && (startTag.TagName == "noembed" || (startTag.TagName == "noscript" && tokenizer.ScriptingEnabled))) { //TODO - Follow the generic raw text element parsing algorithm. (http://www.w3.org/TR/html5/syntax.html#generic-raw-text-element-parsing-algorithm) return this; } if (startTag != null && startTag.TagName == "select") { //TODO return this; } if (startTag != null && ( startTag.TagName == "optgroup" || startTag.TagName == "option")) { //TODO return this; } if (startTag != null && ( startTag.TagName == "rb" || startTag.TagName == "rp" || startTag.TagName == "rtc")) { //TODO return this; } if (startTag != null && startTag.TagName == "rt") { //TODO return this; } if (startTag != null && startTag.TagName == "math") { //TODO return this; } if (startTag != null && startTag.TagName == "svg") { //TODO return this; } if (startTag != null && ( startTag.TagName == "caption" || startTag.TagName == "col" || startTag.TagName == "colgroup" || startTag.TagName == "frame" || startTag.TagName == "head" || startTag.TagName == "tbody" || startTag.TagName == "td" || startTag.TagName == "tfoot" || startTag.TagName == "th" || startTag.TagName == "thead" || startTag.TagName == "tr")) { ReportParseError(); return this; } if (startTag != null) { //TODO - Reconstruct the active formatting elements, if any. // Insert an HTML element for the token. InsertHtmlElement(startTag, doc); //NOTE: This element will be an ordinary element. return this; } if (endTag != null) { //TODO return this; } return this; }