/// <summary> /// Creates a new instance of the HTML parser with the specified /// document based on the given source manager. /// </summary> /// <param name="document"> /// The document instance to be constructed. /// </param> internal HtmlDomBuilder(HtmlDocument document) { _tokenizer = new HtmlTokenizer(document.Source, document.Options.Events); _document = document; _openElements = new List<Element>(); _templateModes = new Stack<HtmlTreeMode>(); _formattingElements = new List<Element>(); _frameset = true; _currentMode = HtmlTreeMode.Initial; }
/// <summary> /// Creates a new instance of the HTML parser with the specified /// document based on the given source manager. /// </summary> /// <param name="document"> /// The document instance to be constructed. /// </param> internal HtmlDomBuilder(HtmlDocument document) { var resolver = document.Options.GetService<IEntityService>() ?? HtmlEntityService.Resolver; _tokenizer = new HtmlTokenizer(document.Source, document.Options.Events, resolver); _document = document; _openElements = new List<Element>(); _templateModes = new Stack<HtmlTreeMode>(); _formattingElements = new List<Element>(); _frameset = true; _currentMode = HtmlTreeMode.Initial; }
/// <summary> /// Creates a new instance of the HTML parser with the specified /// document based on the given source manager. /// </summary> /// <param name="document"> /// The document instance to be constructed. /// </param> public HtmlDomBuilder(HtmlDocument document) { var options = document.Options; var context = document.Context; var resolver = options.GetProvider<IEntityProvider>() ?? HtmlEntityService.Resolver; _tokenizer = new HtmlTokenizer(document.Source, resolver); _tokenizer.Error += (_, error) => context.Fire(error); _document = document; _openElements = new List<Element>(); _templateModes = new Stack<HtmlTreeMode>(); _formattingElements = new List<Element>(); _frameset = true; _currentMode = HtmlTreeMode.Initial; _htmlFactory = options.GetFactory<IElementFactory<HtmlElement>>(); _mathFactory = options.GetFactory<IElementFactory<MathElement>>(); _svgFactory = options.GetFactory<IElementFactory<SvgElement>>(); }
/// <summary> /// Act as if an body end tag has been found in the InBody state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InBodyEndTagBody(HtmlToken token) { if (IsInScope(TagNames.Body)) { CheckBodyOnClosing(token); _currentMode = HtmlTreeMode.AfterBody; return true; } else { RaiseErrorOccurred(HtmlParseError.BodyNotInScope, token); return false; } }
/// <summary> /// Follows the generic RCData parsing algorithm. /// </summary> /// <param name="tag">The given tag token.</param> void RCDataAlgorithm(HtmlTagToken tag) { AddElement(tag); _previousMode = _currentMode; _currentMode = HtmlTreeMode.Text; _tokenizer.State = HtmlParseMode.RCData; }
/// <summary> /// See 8.2.5.4.3 The "before head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Head)) { AddElement(new HtmlHeadElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InHead; return; } break; } case HtmlTokenType.EndTag: { if (TagNames.AllBeforeHead.Contains(token.Name)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } BeforeHead(HtmlTagToken.Open(TagNames.Head)); InHead(token); }
/// <summary> /// See 8.2.5.4.9 The "in table" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InTable(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Caption)) { ClearStackBackTo(TagNames.Table); _formattingElements.AddScopeMarker(); AddElement(new HtmlTableCaptionElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InCaption; } else if (tagName.Is(TagNames.Colgroup)) { ClearStackBackTo(TagNames.Table); AddElement(new HtmlTableColgroupElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InColumnGroup; } else if (tagName.Is(TagNames.Col)) { InTable(HtmlTagToken.Open(TagNames.Colgroup)); InColumnGroup(token); } else if (TagNames.AllTableSections.Contains(tagName)) { ClearStackBackTo(TagNames.Table); AddElement(new HtmlTableSectionElement(_document, tagName), token.AsTag()); _currentMode = HtmlTreeMode.InTableBody; } else if (TagNames.AllTableCellsRows.Contains(tagName)) { InTable(HtmlTagToken.Open(TagNames.Tbody)); InTableBody(token); } else if (tagName.Is(TagNames.Table)) { RaiseErrorOccurred(HtmlParseError.TableNesting, token); if (InTableEndTagTable(token)) Home(token); } else if (tagName.Is(TagNames.Input)) { var tag = token.AsTag(); if (tag.GetAttribute(AttributeNames.Type).Isi(AttributeNames.Hidden)) { RaiseErrorOccurred(HtmlParseError.InputUnexpected, token); AddElement(new HtmlInputElement(_document), tag, true); CloseCurrentNode(); } else { RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); InBodyWithFoster(token); } } else if (tagName.Is(TagNames.Form)) { RaiseErrorOccurred(HtmlParseError.FormInappropriate, token); if (_currentFormElement == null) { _currentFormElement = new HtmlFormElement(_document); AddElement(_currentFormElement, token.AsTag()); CloseCurrentNode(); } } else if (TagNames.AllTableHead.Contains(tagName)) { InHead(token); } else { RaiseErrorOccurred(HtmlParseError.IllegalElementInTableDetected, token); InBodyWithFoster(token); } return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.Table)) { InTableEndTagTable(token); } else if (tagName.Is(TagNames.Template)) { InHead(token); } else if (TagNames.AllTableSpecial.Contains(tagName) || TagNames.AllTableInner.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); } else { RaiseErrorOccurred(HtmlParseError.IllegalElementInTableDetected, token); InBodyWithFoster(token); } return; } case HtmlTokenType.EndOfFile: { InBody(token); return; } case HtmlTokenType.Character: { if (TagNames.AllTableMajor.Contains(CurrentNode.LocalName)) { InTableText(token); return; } break; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); InBodyWithFoster(token); }
/// <summary> /// Act as if an td or th end tag has been found in the InCell state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InCellEndTagCell(HtmlToken token) { if (IsInTableScope(TagNames.AllTableCells)) { GenerateImpliedEndTags(); if (!TagNames.AllTableCells.Contains(CurrentNode.LocalName)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); ClearStackBackTo(TagNames.AllTableCells); CloseCurrentNode(); _formattingElements.ClearFormatting(); _currentMode = HtmlTreeMode.InRow; return true; } else { RaiseErrorOccurred(HtmlParseError.TableCellNotInScope, token); return false; } }
/// <summary> /// See 8.2.5.4.1 The "initial" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void Initial(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Doctype: { var doctype = (HtmlDoctypeToken)token; if (!doctype.IsValid) RaiseErrorOccurred(HtmlParseError.DoctypeInvalid, token); _document.AddNode(new DocumentType(_document, doctype.Name ?? String.Empty) { SystemIdentifier = doctype.SystemIdentifier, PublicIdentifier = doctype.PublicIdentifier }); if (doctype.IsFullQuirks) _document.QuirksMode = QuirksMode.On; else if (doctype.IsLimitedQuirks) _document.QuirksMode = QuirksMode.Limited; _currentMode = HtmlTreeMode.BeforeHtml; return; } case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } } if (_options.IsEmbedded == false) { RaiseErrorOccurred(HtmlParseError.DoctypeMissing, token); _document.QuirksMode = QuirksMode.On; } _currentMode = HtmlTreeMode.BeforeHtml; BeforeHtml(token); }
/// <summary> /// See 8.2.5.4.21 The "after frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterFrameset(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.Is(TagNames.NoFrames)) InHead(token); else break; return; } case HtmlTokenType.EndTag: { if (!token.Name.Is(TagNames.Html)) break; _currentMode = HtmlTreeMode.AfterAfterFrameset; return; } case HtmlTokenType.EndOfFile: { End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); }
/// <summary> /// Resets the current insertation mode to the rules according to the /// algorithm specified in 8.2.3.1 The insertion mode. /// http://www.w3.org/html/wg/drafts/html/master/syntax.html#the-insertion-mode /// </summary> void Reset(Element context = null) { var last = false; var node = default(Element); for (var i = _openElements.Count - 1; i >= 0; i--) { node = _openElements[i]; if (i == 0) { last = true; node = context ?? node; } var tagName = node.LocalName; if (tagName.Is(TagNames.Select)) _currentMode = HtmlTreeMode.InSelect; else if (TagNames.AllTableCells.Contains(tagName)) _currentMode = last ? HtmlTreeMode.InBody : HtmlTreeMode.InCell; else if (tagName.Is(TagNames.Tr)) _currentMode = HtmlTreeMode.InRow; else if (TagNames.AllTableSections.Contains(tagName)) _currentMode = HtmlTreeMode.InTableBody; else if (tagName.Is(TagNames.Body)) _currentMode = HtmlTreeMode.InBody; else if (tagName.Is(TagNames.Table)) _currentMode = HtmlTreeMode.InTable; else if (tagName.Is(TagNames.Caption)) _currentMode = HtmlTreeMode.InCaption; else if (tagName.Is(TagNames.Colgroup)) _currentMode = HtmlTreeMode.InColumnGroup; else if (tagName.Is(TagNames.Template)) _currentMode = _templateModes.Peek(); else if (tagName.Is(TagNames.Html)) _currentMode = HtmlTreeMode.BeforeHead; else if (tagName.Is(TagNames.Head)) _currentMode = last ? HtmlTreeMode.InBody : HtmlTreeMode.InHead; else if (tagName.Is(TagNames.Frameset)) _currentMode = HtmlTreeMode.InFrameset; else if (last) _currentMode = HtmlTreeMode.InBody; else continue; break; } }
/// <summary> /// Restarts the parser by resetting the internal state. /// </summary> void Restart() { _currentMode = HtmlTreeMode.Initial; _tokenizer.State = HtmlParseMode.PCData; _document.ReplaceAll(null, true); _frameset = true; _openElements.Clear(); _formattingElements.Clear(); _templateModes.Clear(); }
/// <summary> /// See 8.2.5.4.20 The "in frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InFrameset(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.Is(TagNames.Frameset)) AddElement(new HtmlFrameSetElement(_document), token.AsTag()); else if (tagName.Is(TagNames.Frame)) { AddElement(new HtmlFrameElement(_document), token.AsTag(), true); CloseCurrentNode(); } else if (tagName.Is(TagNames.NoFrames)) InHead(token); else break; return; } case HtmlTokenType.EndTag: { if (!token.Name.Is(TagNames.Frameset)) break; if (CurrentNode != _openElements[0]) { CloseCurrentNode(); if (!IsFragmentCase && !CurrentNode.LocalName.Is(TagNames.Frameset)) _currentMode = HtmlTreeMode.AfterFrameset; } else RaiseErrorOccurred(HtmlParseError.CurrentNodeIsRoot, token); return; } case HtmlTokenType.EndOfFile: { if (CurrentNode != _document.DocumentElement) RaiseErrorOccurred(HtmlParseError.CurrentNodeIsNotRoot, token); End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); }
/// <summary> /// See 8.2.5.4.19 The "after body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterBody(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); ReconstructFormatting(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _openElements[0].AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { if (token.Name.Is(TagNames.Html)) { InBody(token); return; } break; } case HtmlTokenType.EndTag: { if (token.Name.Is(TagNames.Html)) { if (IsFragmentCase) RaiseErrorOccurred(HtmlParseError.TagInvalidInFragmentMode, token); else _currentMode = HtmlTreeMode.AfterAfterBody; return; } break; } case HtmlTokenType.EndOfFile: { End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); _currentMode = HtmlTreeMode.InBody; InBody(token); }
/// <summary> /// See 8.2.5.4.14 The "in row" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InRow(HtmlToken token) { switch (token.Type) { case HtmlTokenType.StartTag: { var tagName = token.Name; if (TagNames.AllTableCells.Contains(tagName)) { ClearStackBackTo(TagNames.Tr); AddElement(token.AsTag()); _currentMode = HtmlTreeMode.InCell; _formattingElements.AddScopeMarker(); } else if (tagName.Is(TagNames.Tr) || TagNames.AllTableGeneral.Contains(tagName)) { if (InRowEndTagTablerow(token)) InTableBody(token); } else { break; } return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.Tr)) { InRowEndTagTablerow(token); } else if (tagName.Is(TagNames.Table)) { if (InRowEndTagTablerow(token)) InTableBody(token); } else if (TagNames.AllTableSections.Contains(tagName)) { if (IsInTableScope(tagName)) { InRowEndTagTablerow(token); InTableBody(token); } else RaiseErrorOccurred(HtmlParseError.TableSectionNotInScope, token); } else if (TagNames.AllTableSpecial.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); } else { break; } return; } } InTable(token); }
/// <summary> /// See 8.2.5.4.13 The "in table body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InTableBody(HtmlToken token) { switch (token.Type) { case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Tr)) { ClearStackBackTo(TagNames.AllTableSections); AddElement(new HtmlTableRowElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InRow; } else if (TagNames.AllTableCells.Contains(tagName)) { InTableBody(HtmlTagToken.Open(TagNames.Tr)); InRow(token); } else if (TagNames.AllTableGeneral.Contains(tagName)) InTableBodyCloseTable(token.AsTag()); else break; return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (TagNames.AllTableSections.Contains(tagName)) { if (IsInTableScope(tagName)) { ClearStackBackTo(TagNames.AllTableSections); CloseCurrentNode(); _currentMode = HtmlTreeMode.InTable; } else RaiseErrorOccurred(HtmlParseError.TableSectionNotInScope, token); } else if (tagName.Is(TagNames.Tr) || TagNames.AllTableSpecial.Contains(tagName)) RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); else if (tagName.Is(TagNames.Table)) InTableBodyCloseTable(token.AsTag()); else break; return; } } InTable(token); }
/// <summary> /// Act as if an tr end tag has been found in the InRow state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InRowEndTagTablerow(HtmlToken token) { if (IsInTableScope(TagNames.Tr)) { ClearStackBackTo(TagNames.Tr); CloseCurrentNode(); _currentMode = HtmlTreeMode.InTableBody; return true; } else { RaiseErrorOccurred(HtmlParseError.TableRowNotInScope, token); return false; } }
/// <summary> /// See 8.2.5.4.22 The "after after body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterAfterBody(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); ReconstructFormatting(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.EndOfFile: { End(); return; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } case HtmlTokenType.Doctype: { InBody(token); return; } case HtmlTokenType.StartTag: { if (!token.Name.Is(TagNames.Html)) break; InBody(token); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); _currentMode = HtmlTreeMode.InBody; InBody(token); }
/// <summary> /// Act as if an caption end tag has been found in the InCaption state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InCaptionEndTagCaption(HtmlToken token) { if (IsInTableScope(TagNames.Caption)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Caption)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); ClearStackBackTo(TagNames.Caption); CloseCurrentNode(); _formattingElements.ClearFormatting(); _currentMode = HtmlTreeMode.InTable; return true; } else { RaiseErrorOccurred(HtmlParseError.CaptionNotInScope, token); return false; } }
/// <summary> /// Inserting something in the template. /// </summary> /// <param name="token">The token to insert.</param> /// <param name="mode">The mode to push.</param> void TemplateStep(HtmlToken token, HtmlTreeMode mode) { _templateModes.Pop(); _templateModes.Push(mode); _currentMode = mode; Home(token); }
/// <summary> /// Runs a script given by the current node. /// </summary> void HandleScript(HtmlScriptElement script) { //Disable scripting for HTML fragments (security reasons) if (script != null && !IsFragmentCase) { _document.PerformMicrotaskCheckpoint(); _document.ProvideStableState(); CloseCurrentNode(); _currentMode = _previousMode; if (script.Prepare(_document)) { _waiting = RunScript(script); } } }
/// <summary> /// See 8.2.5.4.4 The "in head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Meta)) { var element = new HtmlMetaElement(_document); AddElement(element, token.AsTag(), true); var encoding = element.GetEncoding(); CloseCurrentNode(); if (encoding != null) { try { _document.Source.CurrentEncoding = encoding; } catch (NotSupportedException) { Restart(); } } return; } else if (TagNames.AllHeadBase.Contains(tagName)) { AddElement(token.AsTag(), true); CloseCurrentNode(); return; } else if (tagName.Is(TagNames.Title)) { RCDataAlgorithm(token.AsTag()); return; } else if (tagName.IsOneOf(TagNames.Style, TagNames.NoFrames) || (_options.IsScripting && tagName.Is(TagNames.NoScript))) { RawtextAlgorithm(token.AsTag()); return; } else if (tagName.Is(TagNames.NoScript)) { AddElement(token.AsTag()); _currentMode = HtmlTreeMode.InHeadNoScript; return; } else if (tagName.Is(TagNames.Script)) { var script = new HtmlScriptElement(_document, parserInserted: true, started: IsFragmentCase); AddElement(script, token.AsTag()); _tokenizer.State = HtmlParseMode.Script; _previousMode = _currentMode; _currentMode = HtmlTreeMode.Text; return; } else if (tagName.Is(TagNames.Head)) { RaiseErrorOccurred(HtmlParseError.HeadTagMisplaced, token); return; } else if (tagName.Is(TagNames.Template)) { AddElement(new HtmlTemplateElement(_document), token.AsTag()); _formattingElements.AddScopeMarker(); _frameset = false; _currentMode = HtmlTreeMode.InTemplate; _templateModes.Push(HtmlTreeMode.InTemplate); return; } break; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.Head)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.AfterHead; _waiting = _document.WaitForReady(); return; } else if (tagName.Is(TagNames.Template)) { if (TagCurrentlyOpen(TagNames.Template)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Template)) RaiseErrorOccurred(HtmlParseError.TagClosingMismatch, token); CloseTemplate(); } else RaiseErrorOccurred(HtmlParseError.TagInappropriate, token); return; } else if (!tagName.IsOneOf(TagNames.Html, TagNames.Body, TagNames.Br)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } break; } } CloseCurrentNode(); _currentMode = HtmlTreeMode.AfterHead; AfterHead(token); }
/// <summary> /// See 8.2.5.4.2 The "before html" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHtml(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } case HtmlTokenType.StartTag: { if (!token.Name.Is(TagNames.Html)) break; AddRoot(token.AsTag()); _currentMode = HtmlTreeMode.BeforeHead; return; } case HtmlTokenType.EndTag: { if (TagNames.AllBeforeHead.Contains(token.Name)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } BeforeHtml(HtmlTagToken.Open(TagNames.Html)); BeforeHead(token); }
/// <summary> /// See 8.2.5.4.6 The "after head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Body)) { AfterHeadStartTagBody(token.AsTag()); return; } else if (tagName.Is(TagNames.Frameset)) { AddElement(new HtmlFrameSetElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InFrameset; return; } else if (TagNames.AllHeadNoTemplate.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagMustBeInHead, token); var index = _openElements.Count; var head = _document.Head as Element; _openElements.Add(head); InHead(token); _openElements.Remove(head); return; } else if (tagName.Is(TagNames.Head)) { RaiseErrorOccurred(HtmlParseError.HeadTagMisplaced, token); return; } break; } case HtmlTokenType.EndTag: { if (token.Name.IsOneOf(TagNames.Html, TagNames.Body, TagNames.Br)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } } AfterHeadStartTagBody(HtmlTagToken.Open(TagNames.Body)); _frameset = true; Home(token); }
/// <summary> /// Closes the table if the section is in table scope. /// </summary> /// <param name="tag">The tag to insert (closes table).</param> void InTableBodyCloseTable(HtmlTagToken tag) { if (IsInTableScope(TagNames.AllTableSections)) { ClearStackBackTo(TagNames.AllTableSections); CloseCurrentNode(); _currentMode = HtmlTreeMode.InTable; InTable(tag); } else RaiseErrorOccurred(HtmlParseError.TableSectionNotInScope, tag); }
/// <summary> /// See 8.2.5.4.8 The "text" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void Text(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { AddCharacters(token.Data); return; } case HtmlTokenType.EndTag: { if (!token.Name.Is(TagNames.Script)) { CloseCurrentNode(); _currentMode = _previousMode; } else { HandleScript(CurrentNode as HtmlScriptElement); } return; } case HtmlTokenType.EndOfFile: { RaiseErrorOccurred(HtmlParseError.EOF, token); CloseCurrentNode(); _currentMode = _previousMode; Consume(token); return; } } }
/// <summary> /// See 8.2.5.4.5 The "in head noscript" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHeadNoScript(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { InHead(token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (TagNames.AllNoScript.Contains(tagName)) InHead(token); else if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.IsOneOf(TagNames.Head, TagNames.NoScript)) RaiseErrorOccurred(HtmlParseError.TagInappropriate, token); else break; return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.NoScript)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.InHead; return; } else if (!tagName.Is(TagNames.Br)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } break; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); CloseCurrentNode(); _currentMode = HtmlTreeMode.InHead; InHead(token); }
/// <summary> /// Act as if an colgroup end tag has been found in the InColumnGroup state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InColumnGroupEndTagColgroup(HtmlToken token) { if (CurrentNode.LocalName.Is(TagNames.Colgroup)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.InTable; return true; } else { RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); return false; } }
void InBodyStartTag(HtmlTagToken tag) { var tagName = tag.Name; if (tagName.Is(TagNames.Div)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (tagName.Is(TagNames.A)) { for (var i = _formattingElements.Count - 1; i >= 0; i--) { if (_formattingElements[i] == null) break; if (_formattingElements[i].LocalName.Is(TagNames.A)) { var format = _formattingElements[i]; RaiseErrorOccurred(HtmlParseError.AnchorNested, tag); HeisenbergAlgorithm(HtmlTagToken.Close(TagNames.A)); _openElements.Remove(format); _formattingElements.Remove(format); break; } } ReconstructFormatting(); var element = new HtmlAnchorElement(_document); AddElement(element, tag); _formattingElements.AddFormatting(element); } else if (tagName.Is(TagNames.Span)) { ReconstructFormatting(); AddElement(tag); } else if (tagName.Is(TagNames.Li)) { InBodyStartTagListItem(tag); } else if (tagName.Is(TagNames.Img)) { InBodyStartTagBreakrow(tag); } else if (tagName.IsOneOf(TagNames.Ul, TagNames.P)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (TagNames.AllSemanticFormatting.Contains(tagName)) { ReconstructFormatting(); _formattingElements.AddFormatting(AddElement(tag)); } else if (tagName.Is(TagNames.Script)) { InHead(tag); } else if (TagNames.AllHeadings.Contains(tagName)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); if (TagNames.AllHeadings.Contains(CurrentNode.LocalName)) { RaiseErrorOccurred(HtmlParseError.HeadingNested, tag); CloseCurrentNode(); } AddElement(new HtmlHeadingElement(_document, tagName), tag); } else if (tagName.Is(TagNames.Input)) { ReconstructFormatting(); AddElement(new HtmlInputElement(_document), tag, true); CloseCurrentNode(); if (!tag.GetAttribute(AttributeNames.Type).Isi(AttributeNames.Hidden)) _frameset = false; } else if (tagName.Is(TagNames.Form)) { if (_currentFormElement == null) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); _currentFormElement = new HtmlFormElement(_document); AddElement(_currentFormElement, tag); } else RaiseErrorOccurred(HtmlParseError.FormAlreadyOpen, tag); } else if (TagNames.AllBody.Contains(tagName)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); } else if (TagNames.AllClassicFormatting.Contains(tagName)) { ReconstructFormatting(); _formattingElements.AddFormatting(AddElement(tag)); } else if (TagNames.AllHead.Contains(tagName)) { InHead(tag); } else if (tagName.IsOneOf(TagNames.Pre, TagNames.Listing)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(tag); _frameset = false; PreventNewLine(); } else if (tagName.Is(TagNames.Button)) { if (IsInScope(TagNames.Button)) { RaiseErrorOccurred(HtmlParseError.ButtonInScope, tag); InBodyEndTagBlock(tag); InBody(tag); } else { ReconstructFormatting(); AddElement(new HtmlButtonElement(_document), tag); _frameset = false; } } else if (tagName.Is(TagNames.Table)) { if (_document.QuirksMode != QuirksMode.On && IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(new HtmlTableElement(_document), tag); _frameset = false; _currentMode = HtmlTreeMode.InTable; } else if (TagNames.AllBodyBreakrow.Contains(tagName)) { InBodyStartTagBreakrow(tag); } else if (TagNames.AllBodyClosed.Contains(tagName)) { AddElement(tag, true); CloseCurrentNode(); } else if (tagName.Is(TagNames.Hr)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); AddElement(new HtmlHrElement(_document), tag, true); CloseCurrentNode(); _frameset = false; } else if (tagName.Is(TagNames.Textarea)) { AddElement(new HtmlTextAreaElement(_document), tag); _tokenizer.State = HtmlParseMode.RCData; _previousMode = _currentMode; _frameset = false; _currentMode = HtmlTreeMode.Text; PreventNewLine(); } else if (tagName.Is(TagNames.Select)) { ReconstructFormatting(); AddElement(new HtmlSelectElement(_document), tag); _frameset = false; switch (_currentMode) { case HtmlTreeMode.InTable: case HtmlTreeMode.InTableBody: case HtmlTreeMode.InCaption: case HtmlTreeMode.InRow: case HtmlTreeMode.InCell: _currentMode = HtmlTreeMode.InSelectInTable; break; default: _currentMode = HtmlTreeMode.InSelect; break; } } else if (tagName.IsOneOf(TagNames.Optgroup, TagNames.Option)) { if (CurrentNode.LocalName.Is(TagNames.Option)) InBodyEndTagAnythingElse(HtmlTagToken.Close(TagNames.Option)); ReconstructFormatting(); AddElement(tag); } else if (tagName.IsOneOf(TagNames.Dd, TagNames.Dt)) { InBodyStartTagDefinitionItem(tag); } else if (tagName.Is(TagNames.Iframe)) { _frameset = false; RawtextAlgorithm(tag); } else if (TagNames.AllBodyObsolete.Contains(tagName)) { ReconstructFormatting(); AddElement(tag); _formattingElements.AddScopeMarker(); _frameset = false; } else if (tagName.Is(TagNames.Image)) { RaiseErrorOccurred(HtmlParseError.ImageTagNamedWrong, tag); tag.Name = TagNames.Img; InBodyStartTagBreakrow(tag); } else if (tagName.Is(TagNames.NoBr)) { ReconstructFormatting(); if (IsInScope(TagNames.NoBr)) { RaiseErrorOccurred(HtmlParseError.NobrInScope, tag); HeisenbergAlgorithm(tag); ReconstructFormatting(); } _formattingElements.AddFormatting(AddElement(tag)); } else if (tagName.Is(TagNames.Xmp)) { if (IsInButtonScope()) InBodyEndTagParagraph(tag); ReconstructFormatting(); _frameset = false; RawtextAlgorithm(tag); } else if (tagName.IsOneOf(TagNames.Rb, TagNames.Rtc)) { if (IsInScope(TagNames.Ruby)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Ruby)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); } AddElement(tag); } else if (tagName.IsOneOf(TagNames.Rp, TagNames.Rt)) { if (IsInScope(TagNames.Ruby)) { GenerateImpliedEndTagsExceptFor(TagNames.Rtc); if (CurrentNode.LocalName.IsOneOf(TagNames.Ruby, TagNames.Rtc) == false) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, tag); } AddElement(tag); } else if (tagName.Is(TagNames.NoEmbed)) { RawtextAlgorithm(tag); } else if (tagName.Is(TagNames.NoScript)) { if (_options.IsScripting) { RawtextAlgorithm(tag); return; } ReconstructFormatting(); AddElement(tag); } else if (tagName.Is(TagNames.Math)) { var element = new MathElement(_document, tagName); ReconstructFormatting(); AddElement(element.Setup(tag)); if (tag.IsSelfClosing) { _openElements.Remove(element); } } else if (tagName.Is(TagNames.Svg)) { var element = new SvgElement(_document, tagName); ReconstructFormatting(); AddElement(element.Setup(tag)); if (tag.IsSelfClosing) { _openElements.Remove(element); } } else if (tagName.Is(TagNames.Plaintext)) { if (IsInButtonScope()) { InBodyEndTagParagraph(tag); } AddElement(tag); _tokenizer.State = HtmlParseMode.Plaintext; } else if (tagName.Is(TagNames.Frameset)) { RaiseErrorOccurred(HtmlParseError.FramesetMisplaced, tag); if (_openElements.Count != 1 && _openElements[1].LocalName.Is(TagNames.Body) && _frameset) { _openElements[1].RemoveFromParent(); while (_openElements.Count > 1) { CloseCurrentNode(); } AddElement(new HtmlFrameSetElement(_document), tag); _currentMode = HtmlTreeMode.InFrameset; } } else if (tagName.Is(TagNames.Html)) { RaiseErrorOccurred(HtmlParseError.HtmlTagMisplaced, tag); if (_templateModes.Count == 0) { _openElements[0].SetUniqueAttributes(tag.Attributes); } } else if (tagName.Is(TagNames.Body)) { RaiseErrorOccurred(HtmlParseError.BodyTagMisplaced, tag); if (_templateModes.Count == 0 && _openElements.Count > 1 && _openElements[1].LocalName.Is(TagNames.Body)) { _frameset = false; _openElements[1].SetUniqueAttributes(tag.Attributes); } } else if (tagName.Is(TagNames.IsIndex)) { RaiseErrorOccurred(HtmlParseError.TagInappropriate, tag); if (_currentFormElement == null) { InBody(HtmlTagToken.Open(TagNames.Form)); if (tag.GetAttribute(AttributeNames.Action).Length > 0) _currentFormElement.SetAttribute(AttributeNames.Action, tag.GetAttribute(AttributeNames.Action)); InBody(HtmlTagToken.Open(TagNames.Hr)); InBody(HtmlTagToken.Open(TagNames.Label)); if (tag.GetAttribute(AttributeNames.Prompt).Length > 0) AddCharacters(tag.GetAttribute(AttributeNames.Prompt)); else AddCharacters("This is a searchable index. Enter search keywords: "); var input = HtmlTagToken.Open(TagNames.Input); input.AddAttribute(AttributeNames.Name, TagNames.IsIndex); for (int i = 0; i < tag.Attributes.Count; i++) { if (tag.Attributes[i].Key.IsOneOf(AttributeNames.Name, AttributeNames.Action, AttributeNames.Prompt) == false) input.AddAttribute(tag.Attributes[i].Key, tag.Attributes[i].Value); } InBody(input); InBody(HtmlTagToken.Close(TagNames.Label)); InBody(HtmlTagToken.Open(TagNames.Hr)); InBody(HtmlTagToken.Close(TagNames.Form)); } } else if (TagNames.AllNested.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagCannotStartHere, tag); } else { ReconstructFormatting(); AddElement(tag); } }
/// <summary> /// Act as if a body start tag has been found in the AfterHead state. /// </summary> /// <param name="token">The actual tag token.</param> void AfterHeadStartTagBody(HtmlTagToken token) { AddElement(new HtmlBodyElement(_document), token); _frameset = false; _currentMode = HtmlTreeMode.InBody; }