/// <summary> /// Emits the current token as a tag token. /// </summary> HtmlTagToken EmitTag(HtmlTagToken tag) { _model = HtmlParseMode.PCData; if (tag.Type == HtmlTokenType.StartTag) { for (var i = tag.Attributes.Count - 1; i > 0; i--) { for (var j = i - 1; j >= 0; j--) { if (tag.Attributes[j].Key == tag.Attributes[i].Key) { tag.Attributes.RemoveAt(i); RaiseErrorOccurred(ErrorCode.AttributeDuplicateOmitted); break; } } } _lastStartTag = tag.Name; } else { if (tag.IsSelfClosing) RaiseErrorOccurred(ErrorCode.EndTagCannotBeSelfClosed); if (tag.Attributes.Count != 0) RaiseErrorOccurred(ErrorCode.EndTagCannotHaveAttributes); } return tag; }
/// <summary> /// See 8.2.4.19 Script data end tag name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken ScriptDataNameEndTag(Char c, HtmlTagToken tag) { var name = stringBuffer.ToString(); var appropriateEndTag = name == lastStartTag; if (appropriateEndTag && Specification.IsSpaceCharacter(c)) { tag.Name = name; return AttributeBeforeName(src.Next, tag); } else if (appropriateEndTag && c == Specification.SOLIDUS) { tag.Name = name; return TagSelfClosing(src.Next, tag); } else if (appropriateEndTag && c == Specification.GT) { tag.Name = name; return EmitTag(tag); } else if (Specification.IsUppercaseAscii(c)) { stringBuffer.Append(c.ToLower()); return ScriptDataNameEndTag(src.Next, tag); } else if (Specification.IsLowercaseAscii(c)) { stringBuffer.Append(c); return ScriptDataNameEndTag(src.Next, tag); } stringBuffer.Insert(0, Specification.LT).Insert(1, Specification.SOLIDUS); EnqueueToken(HtmlToken.Characters(stringBuffer.ToString())); return ScriptData(c); }
/// <summary> /// See 8.2.4.10 Tag name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken TagName(Char c, HtmlTagToken tag) { if (Specification.IsSpaceCharacter(c)) { tag.Name = stringBuffer.ToString(); return AttributeBeforeName(src.Next, tag); } else if (c == Specification.SOLIDUS) { tag.Name = stringBuffer.ToString(); return TagSelfClosing(src.Next, tag); } else if (c == Specification.GT) { tag.Name = stringBuffer.ToString(); return EmitTag(tag); } else if (Specification.IsUppercaseAscii(c)) { stringBuffer.Append(c.ToLower()); return TagName(src.Next, tag); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); stringBuffer.Append(Specification.REPLACEMENT); return TagName(src.Next, tag); } else if (c == Specification.EOF) { RaiseErrorOccurred(ErrorCode.EOF); return HtmlToken.EOF; } else { stringBuffer.Append(c); return TagName(src.Next, tag); } }
/// <summary> /// See 8.2.4.39 Attribute value (single-quoted) state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeSingleQuotedValue(Char c, HtmlTagToken tag) { while (true) { if (c == Specification.SQ) { tag.SetAttributeValue(_stringBuffer.ToString()); return AttributeAfterValue(_src.Next, tag); } else if (c == Specification.AMPERSAND) { var value = CharacterReference(_src.Next, Specification.SQ); if (value == null) _stringBuffer.Append(Specification.AMPERSAND); else _stringBuffer.Append(value); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); _stringBuffer.Append(Specification.REPLACEMENT); } else if (c == Specification.EOF) return HtmlToken.EOF; else _stringBuffer.Append(c); c = _src.Next; } }
/// <summary> /// Appends the attributes of the given tag token to the given node. /// </summary> /// <param name="elementToken">The tag token which carries the modifications.</param> /// <param name="element">The node which should be modified.</param> void AddAttributesToElement(HtmlTagToken elementToken, Element element) { for (var i = 0; i < elementToken.Attributes.Count; i++) element.SetAttribute(elementToken.Attributes[i].Key, elementToken.Attributes[i].Value); }
/// <summary> /// See 8.2.4.19 Script data end tag name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken ScriptDataNameEndTag(Char c, HtmlTagToken tag) { var name = _stringBuffer.ToString().ToLower(); var appropriateEndTag = name == _lastStartTag; if (appropriateEndTag && c.IsSpaceCharacter()) { tag.Name = name; return AttributeBeforeName(_src.Next, tag); } else if (appropriateEndTag && c == Specification.SOLIDUS) { tag.Name = name; return TagSelfClosing(_src.Next, tag); } else if (appropriateEndTag && c == Specification.GT) { tag.Name = name; return EmitTag(tag); } else if (c.IsLetter()) { _stringBuffer.Append(c); return ScriptDataNameEndTag(_src.Next, tag); } _buffer.Append(Specification.LT).Append(Specification.SOLIDUS); _buffer.Append(_stringBuffer.ToString()); return ScriptData(c); }
/// <summary> /// See 8.2.4.37 Before attribute value state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeBeforeValue(Char c, HtmlTagToken tag) { while (c.IsSpaceCharacter()) c = _src.Next; if (c == Specification.DQ) { _stringBuffer.Clear(); return AttributeDoubleQuotedValue(_src.Next, tag); } else if (c == Specification.AMPERSAND) { _stringBuffer.Clear(); return AttributeUnquotedValue(c, tag); } else if (c == Specification.SQ) { _stringBuffer.Clear(); return AttributeSingleQuotedValue(_src.Next, tag); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); _stringBuffer.Append(Specification.REPLACEMENT); return AttributeUnquotedValue(_src.Next, tag); } else if (c == Specification.GT) { RaiseErrorOccurred(ErrorCode.TagClosedWrong); return EmitTag(tag); } else if (c == Specification.LT || c == Specification.EQ || c == Specification.CQ) { RaiseErrorOccurred(ErrorCode.AttributeValueInvalid); _stringBuffer.Clear().Append(c); return AttributeUnquotedValue(_src.Next, tag); } else if (c == Specification.EOF) { return HtmlToken.EOF; } else { _stringBuffer.Clear().Append(c); return AttributeUnquotedValue(_src.Next, tag); } }
/// <summary> /// Closes the table if the section is in table scope. /// </summary> /// <param name="tag">The tag to insert which triggers the closing of the table.</param> void InTableBodyCloseTable(HtmlTagToken tag) { if (IsSectionInTableScope()) { ClearStackBackToTableSection(); CloseCurrentNode(); insert = HtmlTreeMode.InTable; InTable(tag); } else { RaiseErrorOccurred(ErrorCode.TableSectionNotInScope); } }
/// <summary> /// Follows the generic RCData parsing algorithm. /// </summary> /// <param name="tag">The given tag token.</param> void RCDataAlgorithm(HtmlTagToken tag) { var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, tag); originalInsert = insert; insert = HtmlTreeMode.Text; tokenizer.Switch(HtmlParseMode.RCData); }
/// <summary> /// Acts if a li start tag in the InBody state has been found. /// </summary> /// <param name="tag">The actual tag given.</param> void InBodyStartTagListItem(HtmlTagToken tag) { frameset = false; var index = open.Count - 1; var node = open[index]; while (true) { if (node.NodeName == HTMLLIElement.ItemTag) { InBody(HtmlToken.CloseTag(node.NodeName)); break; } if (node.IsSpecial && node.NodeName != HTMLSemanticElement.AddressTag && !(node is HTMLDivElement) && !(node is HTMLParagraphElement)) break; node = open[--index]; } if (IsInButtonScope(HTMLParagraphElement.Tag)) InBodyEndTagParagraph(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, tag); }
/// <summary> /// Processes the element according to the SVG rules. /// </summary> /// <param name="tag">The tag to process.</param> void InSvg(HtmlTagToken tag) { //TODO //Process the script element according to the SVG rules, if the user agent supports SVG. [SVG] }
/// <summary> /// Act as if an br start tag has been found in the InBody state. /// </summary> /// <param name="tag">The actual tag found.</param> void InBodyStartTagBreakrow(HtmlTagToken tag) { ReconstructFormatting(); var element = HTMLElement.Factory(tag.Name); AddElementToCurrentNode(element, tag); CloseCurrentNode(); frameset = false; }
/// <summary> /// Act as if an anything else end tag has been found in the InBody state. /// </summary> /// <param name="tag">The actual tag found.</param> void InBodyEndTagAnythingElse(HtmlTagToken tag) { var index = open.Count - 1; var node = CurrentNode; do { if (node.NodeName == tag.Name) { GenerateImpliedEndTagsExceptFor(tag.Name); if (node.NodeName == tag.Name) RaiseErrorOccurred(ErrorCode.TagClosedWrong); for (int i = open.Count - 1; index <= i; i--) CloseCurrentNode(); break; } else if (node.IsSpecial) { RaiseErrorOccurred(ErrorCode.TagClosedWrong); break; } node = open[--index]; } while (true); }
/// <summary> /// Acts if a nobr tag had been seen in the InBody state. /// </summary> /// <param name="tag">The actual tag given.</param> void HeisenbergAlgorithm(HtmlTagToken tag) { var outer = 0; var inner = 0; var bookmark = 0; var index = 0; Element formattingElement; Element furthestBlock; Element commonAncestor; Element node; Element lastNode; while(outer < 8) { outer++; index = 0; formattingElement = null; for (var j = formatting.Count - 1; j >= 0; j--) { if (formatting[j] is ScopeMarkerNode) break; else if (formatting[j].NodeName == tag.Name) { index = j; formattingElement = formatting[j]; break; } } if (formattingElement == null) { InBodyEndTagAnythingElse(tag); break; } var openIndex = open.IndexOf(formattingElement); if (openIndex == -1) { RaiseErrorOccurred(ErrorCode.FormattingElementNotFound); formatting.Remove(formattingElement); break; } if (!IsInScope(formattingElement.NodeName)) { RaiseErrorOccurred(ErrorCode.ElementNotInScope); break; } if (openIndex != open.Count - 1) RaiseErrorOccurred(ErrorCode.TagClosedWrong); furthestBlock = null; bookmark = index; for (var j = openIndex + 1; j < open.Count; j++) { if (open[j].IsSpecial) { index = j; furthestBlock = open[j]; break; } } if (furthestBlock == null) { do { furthestBlock = CurrentNode; CloseCurrentNode(); } while (furthestBlock != formattingElement); formatting.Remove(formattingElement); break; } commonAncestor = open[openIndex - 1]; inner = 0; node = furthestBlock; lastNode = furthestBlock; while (true) { inner++; node = open[--index]; if (node == formattingElement) break; if (inner > 3 && formatting.Contains(node)) formatting.Remove(node); if (!formatting.Contains(node)) { open.Remove(node); continue; } var newElement = CopyElement(node); commonAncestor.AppendChild(newElement); open[index] = newElement; for(var l = 0; l != formatting.Count; l++) { if(formatting[l] == node) { formatting[l] = newElement; break; } } node = newElement; if (lastNode == furthestBlock) bookmark++; if(lastNode.ParentNode != null) lastNode.ParentNode.RemoveChild(lastNode); node.AppendChild(lastNode); lastNode = node; } if (commonAncestor.IsTableElement()) AddElementWithFoster(lastNode); else { if (lastNode.ParentNode != null) lastNode.ParentNode.RemoveChild(lastNode); commonAncestor.AppendChild(lastNode); } var element = CopyElement(formattingElement); while(furthestBlock.ChildNodes.Length > 0) element.AppendChild(furthestBlock.RemoveChild(furthestBlock.ChildNodes[0])); furthestBlock.AppendChild(element); formatting.Remove(formattingElement); formatting.Insert(bookmark, element); open.Remove(formattingElement); open.Insert(open.IndexOf(furthestBlock) + 1, element); } }
/// <summary> /// See 8.2.4.42 After attribute value (quoted) state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeAfterValue(Char c, HtmlTagToken tag) { if (c.IsSpaceCharacter()) return AttributeBeforeName(_src.Next, tag); else if (c == Specification.SOLIDUS) return TagSelfClosing(_src.Next, tag); else if (c == Specification.GT) return EmitTag(tag); else if (c == Specification.EOF) return HtmlTagToken.EOF; RaiseErrorOccurred(ErrorCode.AttributeNameExpected); return AttributeBeforeName(c, tag); }
/// <summary> /// Act as if a body start tag has been found in the AfterHead state. /// </summary> /// <param name="token"></param> void AfterHeadStartTagBody(HtmlTagToken token) { var element = new HTMLBodyElement(); AddElementToCurrentNode(element, token); frameset = false; insert = HtmlTreeMode.InBody; }
/// <summary> /// See 8.2.4.26 Script data escaped end tag open state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken ScriptDataEscapedEndTag(Char c, HtmlTagToken tag) { if (c.IsLetter()) { _stringBuffer.Clear(); _stringBuffer.Append(c); return ScriptDataEscapedEndTag(_src.Next, tag); } _buffer.Append(Specification.LT).Append(Specification.SOLIDUS); return ScriptDataEscaped(c); }
/// <summary> /// Checks for each attribute on the token if the attribute is already present on the node. /// If it is not, the attribute and its corresponding value is added to the node. /// </summary> /// <param name="elementToken">The token with the source attributes.</param> /// <param name="element">The node with the target attributes.</param> void AppendAttributes(HtmlTagToken elementToken, Element element) { foreach (var attr in elementToken.Attributes) { if (!element.HasAttribute(attr.Key)) element.SetAttribute(attr.Key, attr.Value); } }
/// <summary> /// See 8.2.4.43 Self-closing start tag state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken TagSelfClosing(Char c, HtmlTagToken tag) { if (c == Specification.GT) { tag.IsSelfClosing = true; return EmitTag(tag); } else if (c == Specification.EOF) { RaiseErrorOccurred(ErrorCode.EOF); return HtmlToken.EOF; } else { RaiseErrorOccurred(ErrorCode.ClosingSlashMisplaced); return AttributeBeforeName(c, tag); } }
/// <summary> /// See 8.2.4.34 Before attribute name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeBeforeName(Char c, HtmlTagToken tag) { while (Specification.IsSpaceCharacter(c)) c = src.Next; if (c == Specification.SOLIDUS) { return TagSelfClosing(src.Next, tag); } else if (c == Specification.GT) { return EmitTag(tag); } else if (Specification.IsUppercaseAscii(c)) { stringBuffer.Clear(); stringBuffer.Append(c.ToLower()); return AttributeName(src.Next, tag); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); stringBuffer.Clear(); stringBuffer.Append(Specification.REPLACEMENT); return AttributeName(src.Next, tag); } else if (c == Specification.SQ || c == Specification.DQ || c == Specification.EQ || c == Specification.LT) { RaiseErrorOccurred(ErrorCode.AttributeNameInvalid); stringBuffer.Clear(); stringBuffer.Append(c); return AttributeName(src.Next, tag); } else if (c == Specification.EOF) { return HtmlToken.EOF; } else { stringBuffer.Clear(); stringBuffer.Append(c); return AttributeName(src.Next, tag); } }
/// <summary> /// See 8.2.4.35 Attribute name state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeName(Char c, HtmlTagToken tag) { while (true) { if (c.IsSpaceCharacter()) { tag.AddAttribute(_stringBuffer.ToString()); return AttributeAfterName(_src.Next, tag); } else if (c == Specification.SOLIDUS) { tag.AddAttribute(_stringBuffer.ToString()); return TagSelfClosing(_src.Next, tag); } else if (c == Specification.EQ) { tag.AddAttribute(_stringBuffer.ToString()); return AttributeBeforeValue(_src.Next, tag); } else if (c == Specification.GT) { tag.AddAttribute(_stringBuffer.ToString()); return EmitTag(tag); } else if (c == Specification.EOF) return HtmlToken.EOF; else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); _stringBuffer.Append(Specification.REPLACEMENT); } else if (c.IsUppercaseAscii()) _stringBuffer.Append(Char.ToLower(c)); else if (c == Specification.DQ || c == Specification.SQ || c == Specification.LT) { RaiseErrorOccurred(ErrorCode.AttributeNameInvalid); _stringBuffer.Append(c); } else _stringBuffer.Append(c); c = _src.Next; } }
/// <summary> /// See 8.2.4.26 Script data escaped end tag open state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken ScriptDataEscapedEndTag(Char c, HtmlTagToken tag) { if (Specification.IsUppercaseAscii(c)) { stringBuffer.Clear(); stringBuffer.Append(c.ToLower()); return ScriptDataEscapedEndTag(src.Next, tag); } else if (Specification.IsLowercaseAscii(c)) { stringBuffer.Clear(); stringBuffer.Append(c); return ScriptDataEscapedEndTag(src.Next, tag); } EnqueueToken(HtmlToken.Character(Specification.LT)); EnqueueToken(HtmlToken.Character(Specification.SOLIDUS)); return ScriptDataEscaped(c); }
/// <summary> /// See 8.2.4.40 Attribute value (unquoted) state /// </summary> /// <param name="c">The next input character.</param> /// <param name="tag">The current tag token.</param> /// <returns>The emitted token.</returns> HtmlToken AttributeUnquotedValue(Char c, HtmlTagToken tag) { while (true) { if (c.IsSpaceCharacter()) { tag.SetAttributeValue(_stringBuffer.ToString()); return AttributeBeforeName(_src.Next, tag); } else if (c == Specification.AMPERSAND) { var value = CharacterReference(_src.Next, Specification.GT); if (value == null) _stringBuffer.Append(Specification.AMPERSAND); else _stringBuffer.Append(value); } else if (c == Specification.GT) { tag.SetAttributeValue(_stringBuffer.ToString()); return EmitTag(tag); } else if (c == Specification.NULL) { RaiseErrorOccurred(ErrorCode.NULL); _stringBuffer.Append(Specification.REPLACEMENT); } else if (c == Specification.DQ || c == Specification.SQ || c == Specification.LT || c == Specification.EQ || c == Specification.CQ) { RaiseErrorOccurred(ErrorCode.AttributeValueInvalid); _stringBuffer.Append(c); } else if (c == Specification.EOF) return HtmlToken.EOF; else _stringBuffer.Append(c); c = _src.Next; } }
/// <summary> /// Acts if a nobr tag had been seen in the InBody state. /// </summary> /// <param name="tag">The actual tag given.</param> void HeisenbergAlgorithm(HtmlTagToken tag) { var outer = 0; while(outer < 8) { outer++; var index = 0; Element formattingElement = null; for (var j = formatting.Count - 1; j >= 0; j--) { if (formatting[j] is ScopeMarkerNode) break; else if (formatting[j].NodeName == tag.Name) { index = j; formattingElement = formatting[j]; break; } } if (formattingElement == null) { InBodyEndTagAnythingElse(tag); break; } var openIndex = -1; for(var j = 0; j < open.Count; j++) { if(open[j] == formattingElement) { openIndex = j; break; } } if (openIndex == -1) { RaiseErrorOccurred(ErrorCode.FormattingElementNotFound); formatting.Remove(formattingElement); break; } if (!IsInScope(formattingElement.NodeName)) { RaiseErrorOccurred(ErrorCode.ElementNotInScope); break; } if (openIndex != open.Count - 1) RaiseErrorOccurred(ErrorCode.TagClosedWrong); Element furthestBlock = null; var bookmark = index; for (var j = openIndex + 1; j < open.Count; j++) { if (open[j].IsSpecial) { index = j; furthestBlock = open[j]; break; } } if (furthestBlock == null) { do { furthestBlock = CurrentNode; CloseCurrentNode(); } while (furthestBlock != formattingElement); formatting.Remove(formattingElement); break; } var commonAncestor = open[openIndex - 1]; var inner = 0; var node = furthestBlock; var lastNode = furthestBlock; while (inner < 3) { inner++; node = open[--index]; if (!formatting.Contains(node)) { open.Remove(node); continue; } else if (node == formattingElement) break; var newel = HTMLElement.Factory(node.NodeName); newel.NodeName = node.NodeName; for (int i = 0; i < node.Attributes.Length; i++) { var attr = node.Attributes[i]; newel.SetAttribute(attr.NodeName, attr.NodeValue); } open[index] = newel; for(var l = 0; l != formatting.Count; l++) { if(formatting[l] == node) { formatting[l] = newel; break; } } node = newel; if (lastNode == furthestBlock) bookmark++; node.AppendChild(lastNode); lastNode = node; } if (commonAncestor.IsTableElement()) AddElementWithFoster(lastNode); else { if (lastNode.ParentNode != null) lastNode.ParentNode.RemoveChild(lastNode); commonAncestor.AppendChild(lastNode); } var element = HTMLElement.Factory(formattingElement.NodeName); element.NodeName = formattingElement.NodeName; for (int i = 0; i < formattingElement.Attributes.Length; i++) { var attr = formattingElement.Attributes[i]; element.SetAttribute(attr.NodeName, attr.NodeValue); } for (var j = furthestBlock.ChildNodes.Length - 1; j >= 0; j--) element.AppendChild(furthestBlock.RemoveChild(furthestBlock.ChildNodes[j])); furthestBlock.AppendChild(element); formatting.Remove(formattingElement); formatting.Insert(bookmark, furthestBlock); open.Remove(formattingElement); open.Insert(index + 1, element); } }