/// <summary> /// Gets the next available token. /// </summary> /// <returns>The next available token.</returns> public HtmlToken Get() { var token = _buffered; if (token != null) { _buffered = null; return token; } if (_src.IsEnded) return HtmlToken.EOF; switch (_model) { case HtmlParseMode.PCData: token = Data(_src.Current); break; case HtmlParseMode.RCData: token = RCData(_src.Current); break; case HtmlParseMode.Plaintext: token = Plaintext(_src.Current); break; case HtmlParseMode.Rawtext: token = Rawtext(_src.Current); break; case HtmlParseMode.Script: token = ScriptData(_src.Current); break; } if (_buffer.Length > 0) { _buffered = token; token = HtmlToken.Character(_buffer.ToString()); _buffer.Clear(); } _src.Advance(); return token; }
/// <summary> /// See 8.2.5.4.6 The "after head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterHead(HtmlToken token) { if (token.IsIgnorable) InsertCharacter(((HtmlCharacterToken)token).Data); else if (token.Type == HtmlTokenType.Comment) AddComment(CurrentNode, token); else if (token.Type == HtmlTokenType.DOCTYPE) RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) InBody(token); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLBodyElement.Tag) AfterHeadStartTagBody((HtmlTagToken)token); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLFrameSetElement.Tag) { var element = new HTMLFrameSetElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InFrameset; } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name.IsOneOf(HTMLBaseElement.Tag, HTMLBaseFontElement.Tag, HTMLBgsoundElement.Tag, HTMLLinkElement.Tag, HTMLMetaElement.Tag, HTMLNoElement.NoFramesTag, HTMLScriptElement.Tag, HTMLStyleElement.Tag, HTMLTitleElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagMustBeInHead); var index = open.Count; open.Add(doc.Head); InHead(token); open.RemoveAt(index); } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHeadElement.Tag) RaiseErrorOccurred(ErrorCode.HeadTagMisplaced); else if (token.Type == HtmlTokenType.EndTag && !(((HtmlTagToken)token).Name.IsHtmlBodyOrBreakRowElement())) RaiseErrorOccurred(ErrorCode.TagCannotEndHere); else { AfterHeadStartTagBody(HtmlToken.OpenTag(HTMLBodyElement.Tag)); frameset = true; Consume(token); } }
void EnqueueToken(HtmlToken token) { buffered = true; tokenBuffer.Enqueue(token); }
/// <summary> /// See 8.2.5.4.2 The "before html" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHtml(HtmlToken token) { if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(doc, token); return; } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) { AddRoot(token); //TODO //If the Document is being loaded as part of navigation of a browsing context, then: // if the newly created element has a manifest attribute whose value is not the empty string, // then resolve the value of that attribute to an absolute URL, relative to the newly created element, // and if that is successful, run the application cache selection algorithm with the result of applying // the URL serializer algorithm to the resulting parsed URL with the exclude fragment flag set; // otherwise, if there is no such attribute, or its value is the empty string, or resolving its value fails, // run the application cache selection algorithm with no manifest. The algorithm must be passed the Document object. insert = HtmlTreeMode.BeforeHead; return; } else if(token.IsEndTagInv(HTMLHtmlElement.Tag, HTMLBodyElement.Tag, HTMLBRElement.Tag, HTMLHeadElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); return; } else if(token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; chars.TrimStart(); if (chars.IsEmpty) return; } BeforeHtml(HtmlToken.OpenTag(HTMLHtmlElement.Tag)); //TODO //If the Document is being loaded as part of navigation of a browsing context, then: // run the application cache selection algorithm with no manifest, passing it the Document object. BeforeHead(token); }
/// <summary> /// See 8.2.5.4.6 The "after head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterHead(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; var str = chars.TrimStart(); InsertCharacters(str); if (chars.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.IsStartTag(HTMLHtmlElement.Tag)) { InBody(token); return; } else if (token.IsStartTag(HTMLBodyElement.Tag)) { AfterHeadStartTagBody((HtmlTagToken)token); return; } else if (token.IsStartTag(HTMLFrameSetElement.Tag)) { var element = new HTMLFrameSetElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InFrameset; return; } else if (token.IsStartTag(HTMLBaseElement.Tag, HTMLBaseFontElement.Tag, HTMLBgsoundElement.Tag, HTMLLinkElement.Tag, HTMLMetaElement.Tag, HTMLNoElement.NoFramesTag, HTMLScriptElement.Tag, HTMLStyleElement.Tag, HTMLTitleElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagMustBeInHead); var index = open.Count; open.Add(doc.Head); InHead(token); open.RemoveAt(index); return; } else if (token.IsStartTag(HTMLHeadElement.Tag)) { RaiseErrorOccurred(ErrorCode.HeadTagMisplaced); return; } else if (token.IsEndTagInv(HTMLHtmlElement.Tag, HTMLBodyElement.Tag, HTMLBRElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); return; } AfterHeadStartTagBody(HtmlToken.OpenTag(HTMLBodyElement.Tag)); frameset = true; Consume(token); }
/// <summary> /// See 8.2.5.4.18 The "after body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterBody(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; var str = chars.TrimStart(); ReconstructFormatting(); InsertCharacters(str); if (chars.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(open[0], token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if(token.IsTag(HTMLHtmlElement.Tag)) { if (token.Type == HtmlTokenType.StartTag) InBody(token); else if (IsFragmentCase) RaiseErrorOccurred(ErrorCode.TagInvalidInFragmentMode); else insert = HtmlTreeMode.AfterAfterBody; return; } else if (token.Type == HtmlTokenType.EOF) { End(); return; } RaiseErrorOccurred(ErrorCode.TokenNotPossible); insert = HtmlTreeMode.InBody; InBody(token); }
/// <summary> /// Adds the root element (html) to the document. /// </summary> /// <param name="token">The token which started this process.</param> void AddRoot(HtmlToken token) { var element = new HTMLHtmlElement(); doc.AppendChild(element); SetupElement(element, token, false); open.Add(element); tokenizer.AcceptsCharacterData = !element.IsInHtml; }
/// <summary> /// Modifies the node by appending all attributes and /// acknowledging the self-closing flag if set. /// </summary> /// <param name="element">The node which will be added to the list.</param> /// <param name="elementToken">The associated tag token.</param> /// <param name="acknowledgeSelfClosing">Should the self-closing be acknowledged?</param> void SetupElement(Element element, HtmlToken elementToken, bool acknowledgeSelfClosing) { var tag = (HtmlTagToken)elementToken; element.NodeName = tag.Name; if (tag.IsSelfClosing && !acknowledgeSelfClosing) RaiseErrorOccurred(ErrorCode.TagCannotBeSelfClosed); for (var i = 0; i < tag.Attributes.Count; i++) element.SetAttribute(tag.Attributes[i].Key, tag.Attributes[i].Value); }
/// <summary> /// See 8.2.5.4.19 The "in frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InFrameset(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chrs = (HtmlCharacterToken)token; var str = chrs.TrimStart(); InsertCharacters(str); if (chrs.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; if (tag.Name == HTMLHtmlElement.Tag) { InBody(token); return; } else if (tag.Name == HTMLFrameSetElement.Tag) { var element = new HTMLFrameSetElement(); AddElementToCurrentNode(element, token); return; } else if (tag.Name == HTMLFrameElement.Tag) { var element = new HTMLFrameElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); return; } else if (tag.Name == HTMLNoElement.NoFramesTag) { InHead(token); return; } } else if (token.Type == HtmlTokenType.EndTag && ((HtmlTagToken)token).Name == HTMLFrameSetElement.Tag) { if (CurrentNode != doc.DocumentElement) { CloseCurrentNode(); if (IsFragmentCase && CurrentNode.NodeName != HTMLFrameSetElement.Tag) insert = HtmlTreeMode.AfterFrameset; } else RaiseErrorOccurred(ErrorCode.CurrentNodeIsRoot); return; } else if (token.Type == HtmlTokenType.EOF) { if (CurrentNode != doc.DocumentElement) RaiseErrorOccurred(ErrorCode.CurrentNodeIsNotRoot); End(); return; } RaiseErrorOccurred(ErrorCode.TokenNotPossible); }
/// <summary> /// See 8.2.5.4.12 The "in column group" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InColumnGroup(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; var str = chars.TrimStart(); InsertCharacters(str); } else if (token.Type == HtmlTokenType.Comment) AddComment(CurrentNode, token); else if (token.Type == HtmlTokenType.DOCTYPE) RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); else if (token.IsStartTag(HTMLHtmlElement.Tag)) InBody(token); else if (token.IsStartTag(HTMLTableColElement.ColTag)) { var element = new HTMLTableColElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); } else if (token.IsEndTag(HTMLTableColElement.ColgroupTag)) InColumnGroupEndTagColgroup(); else if (token.IsEndTag(HTMLTableColElement.ColTag)) RaiseErrorOccurred(ErrorCode.TagClosedWrong); else if (token.Type == HtmlTokenType.EOF && CurrentNode == doc.DocumentElement) End(); else if (InColumnGroupEndTagColgroup()) InTable(token); }
/// <summary> /// See 8.2.5.4.15 The "in cell" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InCell(HtmlToken token) { if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; if (tag.Name.IsTableCellElement()) { InCellEndTagCell(((HtmlTagToken)token).Name); } else if (tag.Name.IsSpecialTableElement()) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere);} else if (tag.Name.IsTableElement()) { if (IsInTableScope(tag.Name)) { CloseTheCell(); Consume(token); } else { RaiseErrorOccurred(ErrorCode.TableNotInScope); } } else { InBody(token); } } else if (token.Type == HtmlTokenType.StartTag && (((HtmlTagToken)token).Name.IsGeneralTableElement(true) || ((HtmlTagToken)token).Name.IsTableCellElement())) { var tag = (HtmlTagToken)token; if (IsInTableScope(HTMLTableCellElement.NormalTag) || IsInTableScope(HTMLTableCellElement.HeadTag)) { CloseTheCell(); Consume(token); } else { RaiseErrorOccurred(ErrorCode.TableCellNotInScope); } } else { InBody(token); } }
/// <summary> /// See 8.2.5.4.11 The "in caption" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InCaption(HtmlToken token) { if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLTableCaptionElement.Tag: { InCaptionEndTagCaption(); break; } case HTMLBodyElement.Tag: case HTMLTableCellElement.HeadTag: case HTMLTableColElement.ColgroupTag: case HTMLHtmlElement.Tag: case HTMLTableSectionElement.BodyTag: case HTMLTableColElement.ColTag: case HTMLTableSectionElement.FootTag: case HTMLTableCellElement.NormalTag: case HTMLTableSectionElement.HeadTag: case HTMLTableRowElement.Tag: { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); break; } case HTMLTableElement.Tag: { RaiseErrorOccurred(ErrorCode.TableNesting); if (InCaptionEndTagCaption()) InTable(token); break; } default: { InBody(token); break; } } } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLTableCaptionElement.Tag: case HTMLTableColElement.ColTag: case HTMLTableColElement.ColgroupTag: case HTMLTableSectionElement.BodyTag: case HTMLTableCellElement.NormalTag: case HTMLTableSectionElement.FootTag: case HTMLTableCellElement.HeadTag: case HTMLTableSectionElement.HeadTag: case HTMLTableRowElement.Tag: RaiseErrorOccurred(ErrorCode.TagCannotStartHere); if (InCaptionEndTagCaption()) InTable(token); break; default: InBody(token); break; } } else InBody(token); }
/// <summary> /// Performs the InBody state with foster parenting. /// </summary> /// <param name="token">The given token.</param> void InBodyWithFoster(HtmlToken token) { foster = true; InBody(token); foster = false; }
/// <summary> /// See 8.2.5.4.7 The "in body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InBody(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chrs = (HtmlCharacterToken)token; ReconstructFormatting(); InsertCharacters(chrs.Data); if(chrs.HasContent) frameset = false; } else if (token.Type == HtmlTokenType.Comment) AddComment(CurrentNode, token); else if (token.Type == HtmlTokenType.DOCTYPE) RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLHtmlElement.Tag: { RaiseErrorOccurred(ErrorCode.HtmlTagMisplaced); AppendAttributes(tag, open[0]); break; } case HTMLBaseElement.Tag: case HTMLBaseFontElement.Tag: case HTMLBgsoundElement.Tag: case HTMLLinkElement.Tag: case HTMLMenuItemElement.Tag: case HTMLMetaElement.Tag: case HTMLNoElement.NoFramesTag: case HTMLScriptElement.Tag: case HTMLStyleElement.Tag: case HTMLTitleElement.Tag: { InHead(token); break; } case HTMLBodyElement.Tag: { RaiseErrorOccurred(ErrorCode.BodyTagMisplaced); if (open.Count > 1 && open[1] is HTMLBodyElement) { frameset = false; AppendAttributes(tag, open[1]); } break; } case HTMLFrameSetElement.Tag: { RaiseErrorOccurred(ErrorCode.FramesetMisplaced); if (open.Count != 1 && open[1] is HTMLBodyElement && frameset) { open[1].ParentNode.RemoveChild(open[1]); while (open.Count > 1) CloseCurrentNode(); var element = new HTMLFrameSetElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InFrameset; } break; } case HTMLSemanticElement.AddressTag: case HTMLSemanticElement.ArticleTag: case HTMLSemanticElement.AsideTag: case HTMLQuoteElement.BlockTag: case HTMLSemanticElement.CenterTag: case HTMLDetailsElement.Tag: case HTMLDialogElement.Tag: case HTMLDirectoryElement.Tag: case HTMLDivElement.Tag: case HTMLDListElement.Tag: case HTMLFieldSetElement.Tag: case HTMLSemanticElement.FigcaptionTag: case HTMLSemanticElement.FigureTag: case HTMLSemanticElement.FooterTag: case HTMLSemanticElement.HeaderTag: case HTMLSemanticElement.HgroupTag: case HTMLMenuElement.Tag: case HTMLSemanticElement.NavTag: case HTMLOListElement.Tag: case HTMLParagraphElement.Tag: case HTMLSemanticElement.SectionTag: case HTMLSemanticElement.SummaryTag: case HTMLUListElement.Tag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token); break; } case HTMLHeadingElement.ChapterTag: case HTMLHeadingElement.SubSubSubSubSectionTag: case HTMLHeadingElement.SubSubSubSectionTag: case HTMLHeadingElement.SubSubSectionTag: case HTMLHeadingElement.SubSectionTag: case HTMLHeadingElement.SectionTag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); if (CurrentNode is HTMLHeadingElement) { RaiseErrorOccurred(ErrorCode.HeadingNested); CloseCurrentNode(); } var element = new HTMLHeadingElement(); AddElementToCurrentNode(element, token); break; } case HTMLPreElement.Tag: case HTMLSemanticElement.ListingTag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = new HTMLPreElement(); AddElementToCurrentNode(element, token); frameset = false; PreventNewLine(); break; } case HTMLFormElement.Tag: { if (form == null) { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = new HTMLFormElement(); AddElementToCurrentNode(element, token); form = element; } else RaiseErrorOccurred(ErrorCode.FormAlreadyOpen); break; } case HTMLLIElement.ItemTag: { InBodyStartTagListItem(tag); break; } case HTMLLIElement.DefinitionTag: case HTMLLIElement.DescriptionTag: { InBodyStartTagDefinitionItem(tag); break; } case HTMLSemanticElement.PlaintextTag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var plaintext = new HTMLElement(); AddElementToCurrentNode(plaintext, token); tokenizer.Switch(HtmlParseMode.Plaintext); break; } case HTMLButtonElement.Tag: { if (IsInScope(tag.Name)) { RaiseErrorOccurred(ErrorCode.ButtonInScope); InBodyEndTagBlock(tag.Name); InBody(token); } else { ReconstructFormatting(); var element = new HTMLButtonElement(); AddElementToCurrentNode(element, token); frameset = false; } break; } case HTMLAnchorElement.Tag: { for (var i = formatting.Count - 1; i >= 0; i--) { if (formatting[i] is ScopeMarkerNode) break; else if (formatting[i].NodeName == HTMLAnchorElement.Tag) { var format = formatting[i]; RaiseErrorOccurred(ErrorCode.AnchorNested); HeisenbergAlgorithm(HtmlToken.CloseTag(HTMLAnchorElement.Tag)); if(open.Contains(format)) open.Remove(format); if(formatting.Contains(format)) formatting.RemoveAt(i); break; } } ReconstructFormatting(); var element = new HTMLAnchorElement(); AddElementToCurrentNode(element, token); AddFormattingElement(element); break; } case HTMLFormattingElement.BTag: case HTMLFormattingElement.BigTag: case HTMLFormattingElement.CodeTag: case HTMLFormattingElement.EmTag: case HTMLFontElement.Tag: case HTMLFormattingElement.ITag: case HTMLFormattingElement.STag: case HTMLFormattingElement.SmallTag: case HTMLFormattingElement.StrikeTag: case HTMLFormattingElement.StrongTag: case HTMLFormattingElement.TtTag: case HTMLFormattingElement.UTag: { ReconstructFormatting(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token); AddFormattingElement(element); break; } case HTMLFormattingElement.NobrTag: { ReconstructFormatting(); if (IsInScope(HTMLFormattingElement.NobrTag)) { RaiseErrorOccurred(ErrorCode.NobrInScope); HeisenbergAlgorithm(tag); ReconstructFormatting(); } var element = new HTMLElement(); AddElementToCurrentNode(element, token); AddFormattingElement(element); break; } case HTMLAppletElement.Tag: case HTMLMarqueeElement.Tag: case HTMLObjectElement.Tag: { ReconstructFormatting(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token); InsertScopeMarker(); frameset = false; break; } case HTMLTableElement.Tag: { if (doc.QuirksMode == QuirksMode.Off && IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = new HTMLTableElement(); AddElementToCurrentNode(element, token); frameset = false; insert = HtmlTreeMode.InTable; break; } case HTMLAreaElement.Tag: case HTMLBRElement.Tag: case HTMLEmbedElement.Tag: case HTMLImageElement.Tag: case HTMLKeygenElement.Tag: case HTMLWbrElement.Tag: { InBodyStartTagBreakrow(tag); break; } case HTMLInputElement.Tag: { ReconstructFormatting(); var element = new HTMLInputElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); if (!tag.GetAttribute("type").Equals("hidden", StringComparison.OrdinalIgnoreCase)) frameset = false; break; } case HTMLParamElement.Tag: case HTMLSourceElement.Tag: case HTMLTrackElement.Tag: { var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); break; } case HTMLHRElement.Tag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = new HTMLHRElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); frameset = false; break; } case HTMLImageElement.FalseTag: { RaiseErrorOccurred(ErrorCode.ImageTagNamedWrong); tag.Name = HTMLImageElement.Tag; goto case HTMLImageElement.Tag; } case HTMLIsIndexElement.Tag: { RaiseErrorOccurred(ErrorCode.TagInappropriate); if (form == null) { InBody(HtmlToken.OpenTag(HTMLFormElement.Tag)); if (tag.GetAttribute("action") != String.Empty) form.SetAttribute("action", tag.GetAttribute("action")); InBody(HtmlToken.OpenTag(HTMLHRElement.Tag)); InBody(HtmlToken.OpenTag(HTMLLabelElement.Tag)); if (tag.GetAttribute("prompt") != String.Empty) InsertCharacters(tag.GetAttribute("prompt")); else InsertCharacters("This is a searchable index. Enter search keywords:"); var input = HtmlToken.OpenTag(HTMLInputElement.Tag); input.AddAttribute("name", HTMLIsIndexElement.Tag); for (int i = 0; i < tag.Attributes.Count; i++) { if (tag.Attributes[i].Key == "name" || tag.Attributes[i].Key == "action" || tag.Attributes[i].Key == "prompt") continue; input.AddAttribute(tag.Attributes[i].Key, tag.Attributes[i].Value); } InBody(input); InBody(HtmlToken.CloseTag(HTMLLabelElement.Tag)); InBody(HtmlToken.OpenTag(HTMLHRElement.Tag)); InBody(HtmlToken.CloseTag(HTMLFormElement.Tag)); } break; } case HTMLTextAreaElement.Tag: { var element = new HTMLTextAreaElement(); AddElementToCurrentNode(element, token); tokenizer.Switch(HtmlParseMode.RCData); originalInsert = insert; frameset = false; insert = HtmlTreeMode.Text; PreventNewLine(); break; } case HTMLSemanticElement.XmpTag: { if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); ReconstructFormatting(); frameset = false; RawtextAlgorithm(tag); break; } case HTMLIFrameElement.Tag: { frameset = false; RawtextAlgorithm(tag); break; } case HTMLSelectElement.Tag: { ReconstructFormatting(); var element = new HTMLSelectElement(); AddElementToCurrentNode(element, token); frameset = false; switch (insert) { case HtmlTreeMode.InTable: case HtmlTreeMode.InCaption: case HtmlTreeMode.InRow: case HtmlTreeMode.InCell: insert = HtmlTreeMode.InSelectInTable; break; default: insert = HtmlTreeMode.InSelect; break; } break; } case HTMLOptGroupElement.Tag: case HTMLOptionElement.Tag: { if (CurrentNode.NodeName == HTMLOptionElement.Tag) InBodyEndTagAnythingElse(HtmlToken.CloseTag(HTMLOptionElement.Tag)); ReconstructFormatting(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token); break; } case "rp": case "rt": { if (IsInScope("ruby")) { GenerateImpliedEndTags(); if (CurrentNode.NodeName != "ruby") RaiseErrorOccurred(ErrorCode.TagDoesNotMatchCurrentNode); } var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, token); break; } case HTMLNoElement.NoEmbedTag: { RawtextAlgorithm(tag); break; } case HTMLNoElement.NoScriptTag: { if (!doc.IsScripting) goto default; RawtextAlgorithm(tag); break; } case MathMLElement.RootTag: { var element = new MathMLElement(); element.NodeName = tag.Name; ReconstructFormatting(); for (int i = 0; i < tag.Attributes.Count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; element.SetAttribute(ForeignHelpers.AdjustAttributeName(MathMLHelpers.AdjustAttributeName(name)), value); } CurrentNode.AppendChild(element); if (!tag.IsSelfClosing) open.Add(element); break; } case SVGElement.RootTag: { var element = new SVGElement(); element.NodeName = tag.Name; ReconstructFormatting(); for (int i = 0; i < tag.Attributes.Count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; element.SetAttribute(ForeignHelpers.AdjustAttributeName(MathMLHelpers.AdjustAttributeName(name)), value); } CurrentNode.AppendChild(element); if (!tag.IsSelfClosing) { open.Add(element); tokenizer.AcceptsCharacterData = true; } break; } case HTMLTableCaptionElement.Tag: case HTMLTableColElement.ColTag: case HTMLTableColElement.ColgroupTag: case HTMLFrameElement.Tag: case HTMLHeadElement.Tag: case HTMLTableSectionElement.BodyTag: case HTMLTableCellElement.NormalTag: case HTMLTableSectionElement.FootTag: case HTMLTableCellElement.HeadTag: case HTMLTableSectionElement.HeadTag: case HTMLTableRowElement.Tag: { RaiseErrorOccurred(ErrorCode.TagCannotStartHere); break; } default: { ReconstructFormatting(); var element = new HTMLUnknownElement(); AddElementToCurrentNode(element, token); break; } } } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLBodyElement.Tag: { InBodyEndTagBody(); break; } case HTMLHtmlElement.Tag: { if (InBodyEndTagBody()) AfterBody(token); break; } case HTMLSemanticElement.AddressTag: case HTMLSemanticElement.ArticleTag: case HTMLSemanticElement.AsideTag: case HTMLQuoteElement.BlockTag: case HTMLButtonElement.Tag: case HTMLSemanticElement.CenterTag: case HTMLDetailsElement.Tag: case HTMLDialogElement.Tag: case HTMLDirectoryElement.Tag: case HTMLDivElement.Tag: case HTMLDListElement.Tag: case HTMLFieldSetElement.Tag: case HTMLSemanticElement.FigcaptionTag: case HTMLSemanticElement.FigureTag: case HTMLSemanticElement.FooterTag: case HTMLSemanticElement.HeaderTag: case HTMLSemanticElement.HgroupTag: case HTMLSemanticElement.ListingTag: case HTMLSemanticElement.MainTag: case HTMLMenuElement.Tag: case HTMLSemanticElement.NavTag: case HTMLOListElement.Tag: case HTMLPreElement.Tag: case HTMLSemanticElement.SectionTag: case HTMLSemanticElement.SummaryTag: case HTMLUListElement.Tag: { InBodyEndTagBlock(tag.Name); break; } case HTMLFormElement.Tag: { var node = form; form = null; if (node != null && IsInScope(node.NodeName)) { GenerateImpliedEndTags(); if (CurrentNode != node) RaiseErrorOccurred(ErrorCode.FormClosedWrong); open.Remove(node); } else RaiseErrorOccurred(ErrorCode.FormNotInScope); break; } case HTMLParagraphElement.Tag: { InBodyEndTagParagraph(); break; } case HTMLLIElement.ItemTag: { if (IsInListItemScope(tag.Name)) { GenerateImpliedEndTagsExceptFor(tag.Name); if (CurrentNode.NodeName != tag.Name) RaiseErrorOccurred(ErrorCode.TagDoesNotMatchCurrentNode); ClearStackBackTo(tag.Name); CloseCurrentNode(); } else RaiseErrorOccurred(ErrorCode.ListItemNotInScope); break; } case HTMLLIElement.DefinitionTag: case HTMLLIElement.DescriptionTag: { if (IsInScope(tag.Name)) { GenerateImpliedEndTagsExceptFor(tag.Name); if (CurrentNode.NodeName != tag.Name) RaiseErrorOccurred(ErrorCode.TagDoesNotMatchCurrentNode); ClearStackBackTo(tag.Name); CloseCurrentNode(); } else RaiseErrorOccurred(ErrorCode.ListItemNotInScope); break; } case HTMLHeadingElement.ChapterTag: case HTMLHeadingElement.SubSubSubSubSectionTag: case HTMLHeadingElement.SubSubSubSectionTag: case HTMLHeadingElement.SubSubSectionTag: case HTMLHeadingElement.SubSectionTag: case HTMLHeadingElement.SectionTag: { if (IsHeadingInScope()) { GenerateImpliedEndTags(); if (CurrentNode.NodeName != tag.Name) RaiseErrorOccurred(ErrorCode.TagDoesNotMatchCurrentNode); ClearStackBackToHeading(); CloseCurrentNode(); } else RaiseErrorOccurred(ErrorCode.HeadingNotInScope); break; } case HTMLAnchorElement.Tag: case HTMLFormattingElement.BTag: case HTMLFormattingElement.BigTag: case HTMLFormattingElement.CodeTag: case HTMLFormattingElement.EmTag: case HTMLFontElement.Tag: case HTMLFormattingElement.ITag: case HTMLFormattingElement.NobrTag: case HTMLFormattingElement.STag: case HTMLFormattingElement.SmallTag: case HTMLFormattingElement.StrikeTag: case HTMLFormattingElement.StrongTag: case HTMLFormattingElement.TtTag: case HTMLFormattingElement.UTag: { HeisenbergAlgorithm(tag); break; } case HTMLAppletElement.Tag: case HTMLMarqueeElement.Tag: case HTMLObjectElement.Tag: { if (IsInScope(tag.Name)) { GenerateImpliedEndTags(); if (CurrentNode.NodeName != tag.Name) RaiseErrorOccurred(ErrorCode.TagDoesNotMatchCurrentNode); ClearStackBackTo(tag.Name); CloseCurrentNode(); ClearFormattingElements(); } else RaiseErrorOccurred(ErrorCode.ObjectNotInScope); break; } case HTMLBRElement.Tag: { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); InBodyStartTagBreakrow(HtmlToken.OpenTag(HTMLBRElement.Tag)); break; } default: { InBodyEndTagAnythingElse(tag); break; } } } else if (token.Type == HtmlTokenType.EOF) { for (var i = 0; i < open.Count; i++) { switch (open[i].NodeName) { case HTMLLIElement.DescriptionTag: case HTMLLIElement.DefinitionTag: case HTMLLIElement.ItemTag: case HTMLParagraphElement.Tag: case HTMLTableSectionElement.BodyTag: case HTMLTableCellElement.HeadTag: case HTMLTableSectionElement.FootTag: case HTMLTableCellElement.NormalTag: case HTMLTableSectionElement.HeadTag: case HTMLTableRowElement.Tag: case HTMLBodyElement.Tag: case HTMLHtmlElement.Tag: break; default: RaiseErrorOccurred(ErrorCode.BodyClosedWrong); i = open.Count; break; } } End(); } }
/// <summary> /// 8.2.5.5 The rules for parsing tokens in foreign content /// </summary> /// <param name="token">The token to examine.</param> void Foreign(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chrs = (HtmlCharacterToken)token; InsertCharacters(chrs.Data.Replace(Specification.NULL, Specification.REPLACEMENT)); if(chrs.HasContent) frameset = false; } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLFormattingElement.BTag: case HTMLFormattingElement.BigTag: case HTMLQuoteElement.BlockTag: case HTMLBodyElement.Tag: case HTMLBRElement.Tag: case HTMLSemanticElement.CenterTag: case HTMLFormattingElement.CodeTag: case HTMLLIElement.DefinitionTag: case HTMLDivElement.Tag: case HTMLDListElement.Tag: case HTMLLIElement.DescriptionTag: case HTMLFormattingElement.EmTag: case HTMLEmbedElement.Tag: case HTMLHeadingElement.ChapterTag: case HTMLHeadingElement.SubSubSubSubSectionTag: case HTMLHeadingElement.SubSubSubSectionTag: case HTMLHeadingElement.SubSubSectionTag: case HTMLHeadingElement.SubSectionTag: case HTMLHeadingElement.SectionTag: case HTMLHeadElement.Tag: case HTMLHRElement.Tag: case HTMLFormattingElement.ITag: case HTMLImageElement.Tag: case HTMLLIElement.ItemTag: case HTMLSemanticElement.ListingTag: case HTMLSemanticElement.MainTag: case HTMLMenuElement.Tag: case HTMLMetaElement.Tag: case HTMLFormattingElement.NobrTag: case HTMLOListElement.Tag: case HTMLParagraphElement.Tag: case HTMLPreElement.Tag: case "ruby": case HTMLFormattingElement.STag: case HTMLFormattingElement.SmallTag: case HTMLSpanElement.Tag: case HTMLFormattingElement.StrongTag: case HTMLFormattingElement.StrikeTag: case "sub": case "sup": case HTMLTableElement.Tag: case HTMLFormattingElement.TtTag: case HTMLFormattingElement.UTag: case HTMLUListElement.Tag: case "var": { RaiseErrorOccurred(ErrorCode.TagCannotStartHere); CloseCurrentNode(); while (!CurrentNode.IsHtmlTIP && !CurrentNode.IsMathMLTIP && !CurrentNode.IsInHtml) CloseCurrentNode(); Consume(token); break; } case HTMLFontElement.Tag: { for (var i = 0; i != tag.Attributes.Count; i++) { if (tag.Attributes[i].Key == "color" || tag.Attributes[i].Key == "face" || tag.Attributes[i].Key == "size") goto case "var"; } goto default; } default: { Element node; if (AdjustedCurrentNode.IsInMathML) { node = new MathMLElement(); node.NodeName = tag.Name; for (int i = 0; i < tag.Attributes.Count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; node.SetAttribute(ForeignHelpers.AdjustAttributeName(MathMLHelpers.AdjustAttributeName(name)), value); } } else if (AdjustedCurrentNode.IsInSvg) { node = new SVGElement(); node.NodeName = SVGHelpers.AdjustTagName(tag.Name); for (int i = 0; i < tag.Attributes.Count; i++) { var name = tag.Attributes[i].Key; var value = tag.Attributes[i].Value; node.SetAttribute(ForeignHelpers.AdjustAttributeName(SVGHelpers.AdjustAttributeName(name)), value); } } else break; node.NamespaceURI = AdjustedCurrentNode.NamespaceURI; CurrentNode.AppendChild(node); open.Add(node); if (!tag.IsSelfClosing) tokenizer.AcceptsCharacterData = true; else if (tag.Name == HTMLScriptElement.Tag) Foreign(HtmlToken.CloseTag(HTMLScriptElement.Tag)); break; } } } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; if (CurrentNode != null && CurrentNode is HTMLScriptElement && tag.Name == HTMLScriptElement.Tag) { CloseCurrentNode(); var oldInsert = tokenizer.Stream.InsertionPoint; nesting++; pause = true; InSvg(tag); nesting--; if (nesting == 0) pause = false; tokenizer.Stream.InsertionPoint = oldInsert; } else { var node = CurrentNode; if (node.NodeName != tag.Name) RaiseErrorOccurred(ErrorCode.TagClosingMismatch); while (open.Count > 0) { open.RemoveAt(open.Count - 1); if (node.NodeName.ToLower() == tag.Name) break; node = CurrentNode; if (node == null || node.IsInHtml) break; } Reset(); Consume(token); } } }
/// <summary> /// Appends a node to the current node and /// modifies the node by appending all attributes and /// acknowledging the self-closing flag if set. /// </summary> /// <param name="element">The node which will be added to the list.</param> /// <param name="elementToken">The associated tag token.</param> /// <param name="acknowledgeSelfClosing">Should the self-closing be acknowledged?</param> void AddElementToCurrentNode(Element element, HtmlToken elementToken, Boolean acknowledgeSelfClosing = false) { SetupElement(element, elementToken, acknowledgeSelfClosing); AddElementToCurrentNode(element); }
/// <summary> /// See 8.2.5.4.13 The "in table body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InTableBody(HtmlToken token) { if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; if (tag.Name == HTMLTableRowElement.Tag) { ClearStackBackToTableSection(); var element = new HTMLTableRowElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InRow; } else if (tag.Name.IsTableCellElement()) { InTableBody(HtmlToken.OpenTag(HTMLTableRowElement.Tag)); InRow(token); } else if (tag.Name.IsGeneralTableElement()) { InTableBodyCloseTable(tag); } else { InTable(token); } } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; if (tag.Name.IsTableSectionElement()) { if (IsInTableScope(((HtmlTagToken)token).Name)) { ClearStackBackToTableSection(); CloseCurrentNode(); insert = HtmlTreeMode.InTable; } else { RaiseErrorOccurred(ErrorCode.TableSectionNotInScope); } } else if (tag.Name.IsSpecialTableElement(true)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); } else if(tag.Name == HTMLTableElement.Tag) { InTableBodyCloseTable(tag); } else { InTable(token); } } else { InTable(token); } }
/// <summary> /// See 8.2.5.4.4 The "in head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHead(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; var str = chars.TrimStart(); InsertCharacters(str); if (chars.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.IsStartTag(HTMLHtmlElement.Tag)) { InBody(token); return; } else if (token.IsStartTag(HTMLBaseElement.Tag, HTMLBaseFontElement.Tag, HTMLBgsoundElement.Tag, HTMLLinkElement.Tag)) { var name = ((HtmlTagToken)token).Name; var element = HTMLElement.Factory(name); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); return; } else if (token.IsStartTag(HTMLMetaElement.Tag)) { var element = new HTMLMetaElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); var charset = element.GetAttribute(HtmlEncoding.CHARSET); if (charset != null && HtmlEncoding.IsSupported(charset)) { SetCharset(charset); return; } charset = element.GetAttribute("http-equiv"); if (charset != null && charset.Equals("Content-Type", StringComparison.OrdinalIgnoreCase)) { charset = element.GetAttribute("content") ?? string.Empty; charset = HtmlEncoding.Extract(charset); if (HtmlEncoding.IsSupported(charset)) SetCharset(charset); } return; } else if (token.IsStartTag(HTMLTitleElement.Tag)) { RCDataAlgorithm((HtmlTagToken)token); return; } else if (token.IsStartTag(HTMLNoElement.NoFramesTag, HTMLStyleElement.Tag) || (doc.IsScripting && token.IsStartTag(HTMLNoElement.NoScriptTag))) { RawtextAlgorithm((HtmlTagToken)token); return; } else if (token.IsStartTag(HTMLNoElement.NoScriptTag)) { var element = new HTMLElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InHeadNoScript; return; } else if (token.IsStartTag(HTMLScriptElement.Tag)) { var element = new HTMLScriptElement(); //element.IsParserInserted = true; //element.IsAlreadyStarted = fragment; AddElementToCurrentNode(element, token); tokenizer.Switch(HtmlParseMode.Script); originalInsert = insert; insert = HtmlTreeMode.Text; return; } else if (token.IsEndTag(HTMLHeadElement.Tag)) { CloseCurrentNode(); insert = HtmlTreeMode.AfterHead; return; } else if (token.IsStartTag(HTMLHeadElement.Tag)) { RaiseErrorOccurred(ErrorCode.HeadTagMisplaced); return; } else if (token.IsEndTagInv(HTMLHtmlElement.Tag, HTMLBodyElement.Tag, HTMLBRElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); return; } CloseCurrentNode(); insert = HtmlTreeMode.AfterHead; AfterHead(token); }
/// <summary> /// See 8.2.5.4.8 The "text" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void Text(HtmlToken token) { if (token.Type == HtmlTokenType.Character) InsertCharacters(((HtmlCharacterToken)token).Data); else if (token.Type == HtmlTokenType.EOF) { RaiseErrorOccurred(ErrorCode.EOF); CloseCurrentNode(); insert = originalInsert; Consume(token); } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; if (tag.Name == HTMLScriptElement.Tag) { PerformMicrotaskCheckpoint(); ProvideStableState(); var script = (HTMLScriptElement)CurrentNode; CloseCurrentNode(); insert = originalInsert; var oldInsertion = tokenizer.Stream.InsertionPoint; nesting++; //script.Prepare(); nesting--; if (nesting == 0) pause = false; tokenizer.Stream.InsertionPoint = oldInsertion; if (pendingParsingBlock != null) { if (nesting != 0) { pause = true; return; } do { script = pendingParsingBlock; pendingParsingBlock = null; //TODO Do not call Tokenizer HERE //TODO // 3. If the parser's Document has a style sheet that is blocking scripts or the script's "ready to be parser-executed" // flag is not set: spin the event loop until the parser's Document has no style sheet that is blocking scripts and // the script's "ready to be parser-executed" flag is set. //TODO From here on Tokenizer can be called again oldInsertion = tokenizer.Stream.InsertionPoint; nesting++; //script.Execute(); nesting--; if (nesting == 0) pause = false; tokenizer.Stream.ResetInsertionPoint(); } while (pendingParsingBlock != null); } } else { CloseCurrentNode(); insert = originalInsert; } } }
/// <summary> /// See 8.2.5.4.5 The "in head noscript" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHeadNoScript(HtmlToken token) { if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.IsStartTag(HTMLHtmlElement.Tag)) { InBody(token); return; } else if (token.IsEndTag(HTMLNoElement.NoScriptTag)) { CloseCurrentNode(); insert = HtmlTreeMode.InHead; return; } else if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; var str = chars.TrimStart(); InsertCharacters(str); if (chars.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { InHead(token); return; } else if (token.IsStartTag(HTMLBaseFontElement.Tag, HTMLBgsoundElement.Tag, HTMLLinkElement.Tag, HTMLMetaElement.Tag, HTMLNoElement.NoFramesTag, HTMLStyleElement.Tag)) { InHead(token); return; } else if (token.IsStartTag(HTMLHeadElement.Tag, HTMLNoElement.NoScriptTag)) { RaiseErrorOccurred(ErrorCode.TagInappropriate); return; } else if (token.IsEndTagInv(HTMLBRElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); return; } RaiseErrorOccurred(ErrorCode.TokenNotPossible); CloseCurrentNode(); insert = HtmlTreeMode.InHead; InHead(token); }
/// <summary> /// See 8.2.5.4.22 The "after after frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterAfterFrameset(HtmlToken token) { if (token.Type == HtmlTokenType.Comment) { AddComment(doc, token); return; } else if (token.Type == HtmlTokenType.Character) { var chrs = (HtmlCharacterToken)token; var str = chrs.TrimStart(); ReconstructFormatting(); InsertCharacters(str); if (chrs.IsEmpty) return; } else if (token.Type == HtmlTokenType.DOCTYPE || token.IsStartTag(HTMLHtmlElement.Tag)) { InBody(token); return; } else if (token.IsStartTag(HTMLNoElement.NoFramesTag)) { InHead(token); return; } else if (token.Type == HtmlTokenType.EOF) { End(); return; } RaiseErrorOccurred(ErrorCode.TokenNotPossible); }
/// <summary> /// See 8.2.5.4.1 The "initial" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void Initial(HtmlToken token) { if (token.Type == HtmlTokenType.Comment) { AddComment(doc, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { var doctype = (HtmlDoctypeToken)token; if (!doctype.IsValid) RaiseErrorOccurred(ErrorCode.DoctypeInvalid); AddDoctype(doctype); if (doctype.IsFullQuirks) doc.QuirksMode = QuirksMode.On; else if (doctype.IsLimitedQuirks) doc.QuirksMode = QuirksMode.Limited; insert = HtmlTreeMode.BeforeHtml; return; } else if (token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; chars.TrimStart(); if (chars.IsEmpty) return; } if (!doc.IsEmbedded) { RaiseErrorOccurred(ErrorCode.DoctypeMissing); doc.QuirksMode = QuirksMode.On; } insert = HtmlTreeMode.BeforeHtml; BeforeHtml(token); }
/// <summary> /// See 8.2.5.4.20 The "after frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterFrameset(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { var chrs = (HtmlCharacterToken)token; var str = chrs.TrimStart(); InsertCharacters(str); if (chrs.IsEmpty) return; } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; if (tag.Name == HTMLHtmlElement.Tag) { InBody(token); return; } else if (tag.Name == HTMLNoElement.NoFramesTag) { InHead(token); return; } } else if (token.Type == HtmlTokenType.EndTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) { insert = HtmlTreeMode.AfterAfterFrameset; return; } else if (token.Type == HtmlTokenType.EOF) { End(); return; } RaiseErrorOccurred(ErrorCode.TokenNotPossible); }
/// <summary> /// Appends a comment node to the specified node. /// </summary> /// <param name="parent">The node which will contain the comment node.</param> /// <param name="commentToken">The comment token.</param> void AddComment(Node parent, HtmlToken commentToken) { var tag = (HtmlCommentToken)commentToken; var comment = new Comment(); comment.Data = tag.Data; parent.AppendChild(comment); }
/// <summary> /// See 8.2.5.4.3 The "before head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHead(HtmlToken token) { if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); return; } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); return; } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) { InBody(token); return; } else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHeadElement.Tag) { var element = new HTMLHeadElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InHead; return; } else if (token.IsEndTagInv(HTMLHtmlElement.Tag, HTMLBodyElement.Tag, HTMLBRElement.Tag, HTMLHeadElement.Tag)) { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); return; } else if(token.Type == HtmlTokenType.Character) { var chars = (HtmlCharacterToken)token; chars.TrimStart(); if (chars.IsEmpty) return; } BeforeHead(HtmlToken.OpenTag(HTMLHeadElement.Tag)); InHead(token); }
/// <summary> /// See 8.2.5.4.16 The "in select" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InSelect(HtmlToken token) { if (token.Type == HtmlTokenType.Character) { InsertCharacters(((HtmlCharacterToken)token).Data); } else if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLHtmlElement.Tag: { InBody(token); break; } case HTMLOptionElement.Tag: { if (CurrentNode.NodeName == HTMLOptionElement.Tag) InSelectEndTagOption(); var element = new HTMLOptionElement(); AddElementToCurrentNode(element, token); break; } case HTMLOptGroupElement.Tag: { if (CurrentNode.NodeName == HTMLOptionElement.Tag) InSelectEndTagOption(); if (CurrentNode.NodeName == HTMLOptGroupElement.Tag) InSelectEndTagOptgroup(); var element = new HTMLOptGroupElement(); AddElementToCurrentNode(element, token); break; } case HTMLSelectElement.Tag: { RaiseErrorOccurred(ErrorCode.SelectNesting); InSelectEndTagSelect(); break; } case HTMLInputElement.Tag: case HTMLKeygenElement.Tag: case HTMLTextAreaElement.Tag: { RaiseErrorOccurred(ErrorCode.IllegalElementInSelectDetected); if (IsInSelectScope(HTMLSelectElement.Tag)) { InSelectEndTagSelect(); Consume(token); } break; } case HTMLScriptElement.Tag: { InHead(token); break; } default: { RaiseErrorOccurred(ErrorCode.IllegalElementInSelectDetected); break; } } } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLOptGroupElement.Tag: InSelectEndTagOptgroup(); break; case HTMLOptionElement.Tag: InSelectEndTagOption(); break; case HTMLSelectElement.Tag: if (IsInSelectScope(HTMLSelectElement.Tag)) InSelectEndTagSelect(); else RaiseErrorOccurred(ErrorCode.SelectNotInScope); break; default: RaiseErrorOccurred(ErrorCode.TagCannotEndHere); break; } } else if (token.Type == HtmlTokenType.EOF) { if (CurrentNode != doc.DocumentElement) RaiseErrorOccurred(ErrorCode.CurrentNodeIsNotRoot); End(); } else { RaiseErrorOccurred(ErrorCode.TokenNotPossible); } }
/// <summary> /// Consumes a token and processes it. /// </summary> /// <param name="token">The token to consume.</param> void Consume(HtmlToken token) { var node = AdjustedCurrentNode; if (node == null || node.IsInHtml || token.IsEof || (node.IsHtmlTIP && token.IsHtmlCompatible) || (node.IsMathMLTIP && token.IsMathCompatible) || (node.IsInMathMLSVGReady && token.IsSvg)) { switch (insert) { case HtmlTreeMode.Initial: Initial(token); break; case HtmlTreeMode.BeforeHtml: BeforeHtml(token); break; case HtmlTreeMode.BeforeHead: BeforeHead(token); break; case HtmlTreeMode.InHead: InHead(token); break; case HtmlTreeMode.InHeadNoScript: InHeadNoScript(token); break; case HtmlTreeMode.AfterHead: AfterHead(token); break; case HtmlTreeMode.InBody: InBody(token); break; case HtmlTreeMode.Text: Text(token); break; case HtmlTreeMode.InTable: InTable(token); break; case HtmlTreeMode.InCaption: InCaption(token); break; case HtmlTreeMode.InColumnGroup: InColumnGroup(token); break; case HtmlTreeMode.InTableBody: InTableBody(token); break; case HtmlTreeMode.InRow: InRow(token); break; case HtmlTreeMode.InCell: InCell(token); break; case HtmlTreeMode.InSelect: InSelect(token); break; case HtmlTreeMode.InSelectInTable: InSelectInTable(token); break; case HtmlTreeMode.AfterBody: AfterBody(token); break; case HtmlTreeMode.InFrameset: InFrameset(token); break; case HtmlTreeMode.AfterFrameset: AfterFrameset(token); break; case HtmlTreeMode.AfterAfterBody: AfterAfterBody(token); break; case HtmlTreeMode.AfterAfterFrameset: AfterAfterFrameset(token); break; } } else Foreign(token); }
/// <summary> /// See 8.2.5.4.17 The "in select in table" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InSelectInTable(HtmlToken token) { var tag = token as HtmlTagToken; if (tag != null && (tag.Name.IsTableCellElement() || tag.Name.IsTableElement() || tag.Name == HTMLTableCaptionElement.Tag)) { if (token.Type == HtmlTokenType.StartTag) { RaiseErrorOccurred(ErrorCode.IllegalElementInSelectDetected); InSelectEndTagSelect(); Consume(token); } else { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); if (IsInTableScope(tag.Name)) { InSelectEndTagSelect(); Consume(token); } } } else { InSelect(token); } }
/// <summary> /// See 8.2.5.4.9 The "in table" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InTable(HtmlToken token) { if (token.Type == HtmlTokenType.Comment) { AddComment(CurrentNode, token); } else if (token.Type == HtmlTokenType.DOCTYPE) { RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); } else if (token.Type == HtmlTokenType.StartTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLTableCaptionElement.Tag: { ClearStackBackToTable(); InsertScopeMarker(); var element = new HTMLTableCaptionElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InCaption; break; } case HTMLTableColElement.ColgroupTag: { ClearStackBackToTable(); var element = new HTMLTableColElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InColumnGroup; break; } case HTMLTableColElement.ColTag: { InTable(HtmlToken.OpenTag(HTMLTableColElement.ColgroupTag)); InColumnGroup(token); break; } case HTMLTableSectionElement.BodyTag: case HTMLTableSectionElement.HeadTag: case HTMLTableSectionElement.FootTag: { ClearStackBackToTable(); var element = new HTMLTableSectionElement(); AddElementToCurrentNode(element, token); insert = HtmlTreeMode.InTableBody; break; } case HTMLTableCellElement.NormalTag: case HTMLTableCellElement.HeadTag: case HTMLTableRowElement.Tag: { InTable(HtmlToken.OpenTag(HTMLTableSectionElement.BodyTag)); InTableBody(token); break; } case HTMLTableElement.Tag: { RaiseErrorOccurred(ErrorCode.TableNesting); if (InTableEndTagTable()) Consume(token); break; } case HTMLScriptElement.Tag: case HTMLStyleElement.Tag: { InHead(token); break; } case HTMLInputElement.Tag: { if (tag.GetAttribute("type").Equals("hidden", StringComparison.OrdinalIgnoreCase)) { RaiseErrorOccurred(ErrorCode.InputUnexpected); var element = new HTMLInputElement(); AddElementToCurrentNode(element, token, true); CloseCurrentNode(); } else { RaiseErrorOccurred(ErrorCode.TokenNotPossible); InBodyWithFoster(token); } break; } case HTMLFormElement.Tag: { RaiseErrorOccurred(ErrorCode.FormInappropriate); if (form == null) { var element = new HTMLFormElement(); AddElementToCurrentNode(element, token); form = element; CloseCurrentNode(); } break; } default: { RaiseErrorOccurred(ErrorCode.IllegalElementInTableDetected); InBodyWithFoster(token); break; } } } else if (token.Type == HtmlTokenType.EndTag) { var tag = (HtmlTagToken)token; switch (tag.Name) { case HTMLTableElement.Tag: { InTableEndTagTable(); break; } case HTMLBodyElement.Tag: case HTMLTableColElement.ColgroupTag: case HTMLTableColElement.ColTag: case HTMLTableCaptionElement.Tag: case HTMLHtmlElement.Tag: case HTMLTableSectionElement.BodyTag: case HTMLTableRowElement.Tag: case HTMLTableSectionElement.HeadTag: case HTMLTableCellElement.HeadTag: case HTMLTableSectionElement.FootTag: case HTMLTableCellElement.NormalTag: { RaiseErrorOccurred(ErrorCode.TagCannotEndHere); break; } default: { RaiseErrorOccurred(ErrorCode.IllegalElementInTableDetected); InBodyWithFoster(token); break; } } } else if (token.Type == HtmlTokenType.Character && CurrentNode != null && CurrentNode.IsTableElement()) { InTableText((HtmlCharacterToken)token); } else if (token.Type == HtmlTokenType.EOF) { if (CurrentNode != doc.DocumentElement) RaiseErrorOccurred(ErrorCode.CurrentNodeIsNotRoot); End(); } else { RaiseErrorOccurred(ErrorCode.TokenNotPossible); InBodyWithFoster(token); } }
/// <summary> /// See 8.2.5.4.5 The "in head noscript" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHeadNoScript(HtmlToken token) { if (token.Type == HtmlTokenType.DOCTYPE) RaiseErrorOccurred(ErrorCode.DoctypeTagInappropriate); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name == HTMLHtmlElement.Tag) InBody(token); else if (token.Type == HtmlTokenType.EndTag && ((HtmlTagToken)token).Name == HTMLNoElement.NoScriptTag) { CloseCurrentNode(); insert = HtmlTreeMode.InHead; } else if (token.IsIgnorable) InHead(token); else if (token.Type == HtmlTokenType.Comment) InHead(token); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name.IsOneOf(HTMLBaseFontElement.Tag, HTMLBgsoundElement.Tag, HTMLLinkElement.Tag, HTMLMetaElement.Tag, HTMLNoElement.NoFramesTag, HTMLStyleElement.Tag)) InHead(token); else if (token.Type == HtmlTokenType.StartTag && ((HtmlTagToken)token).Name.IsOneOf(HTMLHeadElement.Tag, HTMLNoElement.NoScriptTag)) RaiseErrorOccurred(ErrorCode.TagInappropriate); else if (token.Type == HtmlTokenType.EndTag && ((HtmlTagToken)token).Name != HTMLBRElement.Tag) RaiseErrorOccurred(ErrorCode.TagCannotEndHere); else { RaiseErrorOccurred(ErrorCode.TokenNotPossible); CloseCurrentNode(); insert = HtmlTreeMode.InHead; InHead(token); } }