public void ShiftTest() { var target = new HtmlToken(17, 5); Assert.Equal(17, target.Start); Assert.Equal(22, target.End); target.Shift(8); Assert.Equal(25, target.Start); Assert.Equal(30, target.End); target.Shift(-5); Assert.Equal(20, target.Start); Assert.Equal(25, target.End); }
private void OutputText(HtmlToken token) { foreach (TokenRun tokenRun in token.Runs) { if (tokenRun.IsTextRun) { if (tokenRun.IsAnyWhitespace) { RunTextType textType = tokenRun.TextType; if (textType != RunTextType.Space) { if (textType == RunTextType.NewLine) { base.AddLineBreak(1); continue; } if (textType == RunTextType.Tabulation) { base.AddTabulation(tokenRun.Length); continue; } } base.AddSpace(tokenRun.Length); } else if (tokenRun.TextType == RunTextType.Nbsp) { if (this.treatNbspAsBreakable) { base.AddSpace(tokenRun.Length); } else { base.AddNbsp(tokenRun.Length); } } else if (tokenRun.IsLiteral) { base.AddNonSpaceText(this.literalBuffer, 0, tokenRun.ReadLiteral(this.literalBuffer)); } else { base.AddNonSpaceText(tokenRun.RawBuffer, tokenRun.RawOffset, tokenRun.RawLength); } } } }
public void ParseSimpleClosedTag() { HtmlParser parser = new HtmlParser("<html/>"); HtmlToken token = parser.NextToken(); Assert.IsNotNull(token); Assert.AreEqual(HtmlTokenType.Tag, token.TokenType); Assert.AreEqual("html", token.Name); token = parser.NextToken(); Assert.IsNotNull(token); Assert.AreEqual(HtmlTokenType.CloseTag, token.TokenType); Assert.AreEqual("html", token.Name); Assert.IsNull(parser.NextToken()); }
// Token: 0x0600139C RID: 5020 RVA: 0x0008A038 File Offset: 0x00088238 private void OutputText(HtmlToken token) { foreach (TokenRun tokenRun in token.Runs) { if (tokenRun.IsTextRun) { if (tokenRun.IsAnyWhitespace) { RunTextType textType = tokenRun.TextType; if (textType != RunTextType.Space) { if (textType == RunTextType.NewLine) { this.output.OutputNewLine(); continue; } if (textType == RunTextType.Tabulation) { this.output.OutputTabulation(tokenRun.Length); continue; } } this.output.OutputSpace(tokenRun.Length); } else if (tokenRun.TextType == RunTextType.Nbsp) { if (this.treatNbspAsBreakable) { this.output.OutputSpace(tokenRun.Length); } else { this.output.OutputNbsp(tokenRun.Length); } } else if (tokenRun.IsLiteral) { this.output.OutputNonspace(tokenRun.Literal, TextMapping.Unicode); } else { this.output.OutputNonspace(tokenRun.RawBuffer, tokenRun.RawOffset, tokenRun.RawLength, TextMapping.Unicode); } } } }
public void ParseSimpleTagWithSimpleAttribute() { HtmlParser parser = new HtmlParser("<html attr>"); HtmlToken token = parser.NextToken(); Assert.IsNotNull(token); Assert.AreEqual(HtmlTokenType.Tag, token.TokenType); Assert.AreEqual("html", token.Name); token = parser.NextToken(); Assert.IsNotNull(token); Assert.AreEqual(HtmlTokenType.Attribute, token.TokenType); Assert.AreEqual("attr", token.Name); Assert.IsNull(token.Value); Assert.IsNull(parser.NextToken()); }
void IDisposable.Dispose() { if (parser != null) { ((IDisposable)parser).Dispose(); } if (!convertFragment && output != null && output != null) { ((IDisposable)output).Dispose(); } if (token != null && token is IDisposable) { ((IDisposable)token).Dispose(); } parser = null; output = null; token = null; GC.SuppressFinalize(this); }
public HtmlTokenBuilder(char[] buffer, int maxRuns, int maxAttrs, bool testBoundaryConditions) : base(new HtmlToken(), buffer, maxRuns, testBoundaryConditions) { htmlToken = (HtmlToken)base.Token; int num = 8; if (maxAttrs != 0) { if (!testBoundaryConditions) { this.maxAttrs = maxAttrs; } else { num = 1; this.maxAttrs = 5; } htmlToken.attributeList = new HtmlToken.AttributeEntry[num]; } htmlToken.nameIndex = HtmlNameIndex._NOTANAME; }
public void EndTagName(int nameLength) { if (htmlToken.localName.head == htmlToken.whole.tail) { AddNullRun(HtmlRunKind.Name); if (htmlToken.localName.head == htmlToken.name.head) { HtmlToken expr_55 = htmlToken; expr_55.flags |= HtmlToken.TagFlags.EmptyTagName; } } HtmlToken expr_69 = htmlToken; expr_69.partMinor |= HtmlToken.TagPartMinor.EndName; if (htmlToken.IsTagBegin) { AddSentinelRun(); htmlToken.nameIndex = LookupName(nameLength, htmlToken.name); htmlToken.tagIndex = (htmlToken.originalTagIndex = HtmlNameData.names[(int)htmlToken.nameIndex].tagIndex); } state = 23; }
public void Refresh(IEnumerable <IBaseToken> tokens) { _entries.Clear(); foreach (var token in tokens) { if (token.Length == 0) { continue; } if (typeof(CppToken).Equals(token.GetType())) { CppToken cppToken = (CppToken)token; if (cppTokenTypeToStyleDict.ContainsKey(cppToken.Kind)) { int style = cppTokenTypeToStyleDict[cppToken.Kind]; _entries.Add(new StyleEntry(token, style)); } } else if (typeof(DoxygenToken).Equals(token.GetType())) { DoxygenToken doxygenToken = (DoxygenToken)token; if (doxygenTokenTypeToStyleDict.ContainsKey(doxygenToken.Kind)) { int style = doxygenTokenTypeToStyleDict[doxygenToken.Kind]; _entries.Add(new StyleEntry(token, style)); } } else if (typeof(HtmlToken).Equals(token.GetType())) { HtmlToken htmlToken = (HtmlToken)token; if (htmlTokenTypeToStyleDict.ContainsKey(htmlToken.Kind)) { int style = htmlTokenTypeToStyleDict[htmlToken.Kind]; _entries.Add(new StyleEntry(token, style)); } } } }
/// <summary> /// See 8.2.5.4.6 The "after head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Body)) { AfterHeadStartTagBody(token.AsTag()); return; } else if (tagName.Is(TagNames.Frameset)) { AddElement(new HtmlFrameSetElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InFrameset; return; } else if (TagNames.AllHeadNoTemplate.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagMustBeInHead, token); var index = _openElements.Count; var head = _document.Head as Element; _openElements.Add(head); InHead(token); _openElements.Remove(head); return; } else if (tagName.Is(TagNames.Head)) { RaiseErrorOccurred(HtmlParseError.HeadTagMisplaced, token); return; } break; } case HtmlTokenType.EndTag: { if (token.Name.IsOneOf(TagNames.Html, TagNames.Body, TagNames.Br)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } } AfterHeadStartTagBody(HtmlTagToken.Open(TagNames.Body)); _frameset = true; Home(token); }
public void SetEmptyScope() { HtmlToken expr_06 = htmlToken; expr_06.flags |= HtmlToken.TagFlags.EmptyScope; }
/// <summary> /// See 8.2.5.4.4 The "in head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Meta)) { var element = new HtmlMetaElement(_document); AddElement(element, token.AsTag(), true); var encoding = element.GetEncoding(); CloseCurrentNode(); if (encoding != null) { try { _document.Source.CurrentEncoding = encoding; } catch (NotSupportedException) { Restart(); } } return; } else if (TagNames.AllHeadBase.Contains(tagName)) { AddElement(token.AsTag(), true); CloseCurrentNode(); return; } else if (tagName.Is(TagNames.Title)) { RCDataAlgorithm(token.AsTag()); return; } else if (tagName.IsOneOf(TagNames.Style, TagNames.NoFrames) || (_options.IsScripting && tagName.Is(TagNames.NoScript))) { RawtextAlgorithm(token.AsTag()); return; } else if (tagName.Is(TagNames.NoScript)) { AddElement(token.AsTag()); _currentMode = HtmlTreeMode.InHeadNoScript; return; } else if (tagName.Is(TagNames.Script)) { var script = new HtmlScriptElement(_document, parserInserted: true, started: IsFragmentCase); AddElement(script, token.AsTag()); _tokenizer.State = HtmlParseMode.Script; _previousMode = _currentMode; _currentMode = HtmlTreeMode.Text; return; } else if (tagName.Is(TagNames.Head)) { RaiseErrorOccurred(HtmlParseError.HeadTagMisplaced, token); return; } else if (tagName.Is(TagNames.Template)) { AddElement(new HtmlTemplateElement(_document), token.AsTag()); _formattingElements.AddScopeMarker(); _frameset = false; _currentMode = HtmlTreeMode.InTemplate; _templateModes.Push(HtmlTreeMode.InTemplate); return; } break; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.Head)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.AfterHead; _waiting = _document.WaitForReady(); return; } else if (tagName.Is(TagNames.Template)) { if (TagCurrentlyOpen(TagNames.Template)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Template)) RaiseErrorOccurred(HtmlParseError.TagClosingMismatch, token); CloseTemplate(); } else RaiseErrorOccurred(HtmlParseError.TagInappropriate, token); return; } else if (!tagName.IsOneOf(TagNames.Html, TagNames.Body, TagNames.Br)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } break; } } CloseCurrentNode(); _currentMode = HtmlTreeMode.AfterHead; AfterHead(token); }
/// <summary> /// Act as if an colgroup end tag has been found in the InColumnGroup state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InColumnGroupEndTagColgroup(HtmlToken token) { if (CurrentNode.LocalName.Is(TagNames.Colgroup)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.InTable; return true; } else { RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); return false; } }
/// <summary> /// Act as if an body end tag has been found in the InBody state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InBodyEndTagBody(HtmlToken token) { if (IsInScope(TagNames.Body)) { CheckBodyOnClosing(token); _currentMode = HtmlTreeMode.AfterBody; return true; } else { RaiseErrorOccurred(HtmlParseError.BodyNotInScope, token); return false; } }
/// <summary> /// See 8.2.5.4.23 The "after after frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterAfterFrameset(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Comment: { _document.AddComment(token); return; } case HtmlTokenType.Character: { var str = token.TrimStart(); ReconstructFormatting(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Doctype: { InBody(token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.Is(TagNames.NoFrames)) InHead(token); else break; return; } case HtmlTokenType.EndOfFile: { End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); }
/// <summary> /// Acts if a option end tag had been seen in the InSelect state. /// </summary> /// <param name="token">The actual tag token.</param> void InSelectEndTagOption(HtmlToken token) { if (CurrentNode.LocalName.Is(TagNames.Option)) CloseCurrentNode(); else RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); }
public static void AddComment(this Document parent, HtmlToken token) { parent.AddNode(token.IsProcessingInstruction ? (Node)ProcessingInstruction.Create(parent, token.Data) : new Comment(parent, token.Data)); }
/// <summary> /// Act as if an p end tag has been found in the InBody state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was found, otherwise false.</returns> Boolean InBodyEndTagParagraph(HtmlToken token) { if (IsInButtonScope()) { GenerateImpliedEndTagsExceptFor(TagNames.P); if (!CurrentNode.LocalName.Is(TagNames.P)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); ClearStackBackTo(TagNames.P); CloseCurrentNode(); return true; } else { RaiseErrorOccurred(HtmlParseError.ParagraphNotInScope, token); InBody(HtmlTagToken.Open(TagNames.P)); InBodyEndTagParagraph(token); return false; } }
public void EndTest() { var target = new HtmlToken(17, 5); Assert.Equal(22, target.End); }
public void StartTest() { var target = new HtmlToken(17, 22); Assert.Equal(17, target.Start); }
public static void AddComment(this Document parent, HtmlToken token) { parent.AddNode(new Comment(parent, token.Data)); }
public void TokenConstructorTest() { var target = new HtmlToken(-1, 0); Assert.Equal(0, target.Length); }
public void SetEndTag() { HtmlToken expr_06 = htmlToken; expr_06.flags |= HtmlToken.TagFlags.EndTag; }
public static void AddComment(this Element parent, HtmlToken token) { parent.AddNode(new Comment(parent.Owner, token.Data)); }
/// <summary> /// Act as if an tr end tag has been found in the InRow state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InRowEndTagTablerow(HtmlToken token) { if (IsInTableScope(TagNames.Tr)) { ClearStackBackTo(TagNames.Tr); CloseCurrentNode(); _currentMode = HtmlTreeMode.InTableBody; return true; } else { RaiseErrorOccurred(HtmlParseError.TableRowNotInScope, token); return false; } }
/// <summary> /// See 8.2.5.4.22 The "after after body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterAfterBody(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); ReconstructFormatting(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.EndOfFile: { End(); return; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } case HtmlTokenType.Doctype: { InBody(token); return; } case HtmlTokenType.StartTag: { if (!token.Name.Is(TagNames.Html)) break; InBody(token); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); _currentMode = HtmlTreeMode.InBody; InBody(token); }
/// <summary> /// Act as if an td or th end tag has been found in the InCell state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InCellEndTagCell(HtmlToken token) { if (IsInTableScope(TagNames.AllTableCells)) { GenerateImpliedEndTags(); if (!TagNames.AllTableCells.Contains(CurrentNode.LocalName)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); ClearStackBackTo(TagNames.AllTableCells); CloseCurrentNode(); _formattingElements.ClearFormatting(); _currentMode = HtmlTreeMode.InRow; return true; } else { RaiseErrorOccurred(HtmlParseError.TableCellNotInScope, token); return false; } }
/// <summary> /// Inserting something in the template. /// </summary> /// <param name="token">The token to insert.</param> /// <param name="mode">The mode to push.</param> void TemplateStep(HtmlToken token, HtmlTreeMode mode) { _templateModes.Pop(); _templateModes.Push(mode); _currentMode = mode; Home(token); }
/// <summary> /// Takes the method corresponding to the current insertation mode. /// </summary> /// <param name="token">The token to insert / use.</param> void Home(HtmlToken token) { switch (_currentMode) { case HtmlTreeMode.Initial: Initial(token); return; case HtmlTreeMode.BeforeHtml: BeforeHtml(token); return; case HtmlTreeMode.BeforeHead: BeforeHead(token); return; case HtmlTreeMode.InHead: InHead(token); return; case HtmlTreeMode.InHeadNoScript: InHeadNoScript(token); return; case HtmlTreeMode.AfterHead: AfterHead(token); return; case HtmlTreeMode.InBody: InBody(token); return; case HtmlTreeMode.Text: Text(token); return; case HtmlTreeMode.InTable: InTable(token); return; case HtmlTreeMode.InCaption: InCaption(token); return; case HtmlTreeMode.InColumnGroup: InColumnGroup(token); return; case HtmlTreeMode.InTableBody: InTableBody(token); return; case HtmlTreeMode.InRow: InRow(token); return; case HtmlTreeMode.InCell: InCell(token); return; case HtmlTreeMode.InSelect: InSelect(token); return; case HtmlTreeMode.InSelectInTable: InSelectInTable(token); return; case HtmlTreeMode.InTemplate: InTemplate(token); return; case HtmlTreeMode.AfterBody: AfterBody(token); return; case HtmlTreeMode.InFrameset: InFrameset(token); return; case HtmlTreeMode.AfterFrameset: AfterFrameset(token); return; case HtmlTreeMode.AfterAfterBody: AfterAfterBody(token); return; case HtmlTreeMode.AfterAfterFrameset: AfterAfterFrameset(token); return; } }
/// <summary> /// Acts if a optgroup end tag had been seen in the InSelect state. /// </summary> /// <param name="token">The actual tag token.</param> void InSelectEndTagOptgroup(HtmlToken token) { if (_openElements.Count > 1 && _openElements[_openElements.Count - 1].LocalName.Is(TagNames.Option) && _openElements[_openElements.Count - 2].LocalName.Is(TagNames.Optgroup)) CloseCurrentNode(); if (CurrentNode.LocalName.Is(TagNames.Optgroup)) CloseCurrentNode(); else RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); }
/// <summary> /// Fires an error occurred event. /// </summary> /// <param name="code">The associated error code.</param> /// <param name="token">The associated token.</param> void RaiseErrorOccurred(HtmlParseError code, HtmlToken token) { _tokenizer.RaiseErrorOccurred(code, token.Position); }
/// <summary> /// Performs the InBody state with foster parenting. /// </summary> /// <param name="token">The given token.</param> void InBodyWithFoster(HtmlToken token) { _foster = true; InBody(token); _foster = false; }
/// <summary> /// See 8.2.5.4.2 The "before html" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHtml(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } case HtmlTokenType.StartTag: { if (!token.Name.Is(TagNames.Html)) break; AddRoot(token.AsTag()); _currentMode = HtmlTreeMode.BeforeHead; return; } case HtmlTokenType.EndTag: { if (TagNames.AllBeforeHead.Contains(token.Name)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } BeforeHtml(HtmlTagToken.Open(TagNames.Html)); BeforeHead(token); }
/// <summary> /// Consumes a token and processes it. /// </summary> /// <param name="token">The token to consume.</param> void Consume(HtmlToken token) { var node = AdjustedCurrentNode; if (node == null || token.Type == HtmlTokenType.EndOfFile || node.Flags.HasFlag(NodeFlags.HtmlMember) || (node.Flags.HasFlag(NodeFlags.HtmlTip) && token.IsHtmlCompatible) || (node.Flags.HasFlag(NodeFlags.MathTip) && token.IsMathCompatible) || (node.Flags.HasFlag(NodeFlags.MathMember) && token.IsSvg && node.LocalName.Is(TagNames.AnnotationXml))) Home(token); else Foreign(token); }
public void EndTag(bool complete) { if (complete) { if (state != 23) { if (state == 21) { EndTagText(); } else if (state == 22) { EndTagName(0); } else { if (state == 24) { EndAttributeName(0); } else if (state == 26) { EndValue(); } if (state == 25 || state == 27) { EndAttribute(); } } } AddSentinelRun(); state = 6; HtmlToken expr_85 = htmlToken; expr_85.partMajor |= HtmlToken.TagPartMajor.End; } else if (state >= 24) { if (htmlToken.attributeTail != 0 || htmlToken.name.head != -1 || htmlToken.attributeList[htmlToken.attributeTail].name.head > 0) { AddSentinelRun(); numCarryOverRuns = htmlToken.whole.tail - htmlToken.attributeList[htmlToken.attributeTail].name.head; carryOverRunsHeadOffset = htmlToken.attributeList[htmlToken.attributeTail].name.headOffset; carryOverRunsLength = tailOffset - carryOverRunsHeadOffset; HtmlToken expr_17B_cp_0 = htmlToken; expr_17B_cp_0.whole.tail = expr_17B_cp_0.whole.tail - numCarryOverRuns; } else { if (state == 24) { if (htmlToken.attributeList[htmlToken.attributeTail].name.head == htmlToken.whole.tail) { AddNullRun(HtmlRunKind.Name); } } else if (state == 26 && htmlToken.attributeList[htmlToken.attributeTail].value.head == htmlToken.whole.tail) { AddNullRun(HtmlRunKind.AttrValue); } AddSentinelRun(); htmlToken.attributeTail++; } } else { if (state == 22) { if (htmlToken.name.head == htmlToken.whole.tail) { AddNullRun(HtmlRunKind.Name); } } else if (state == 21 && htmlToken.unstructured.head == htmlToken.whole.tail) { AddNullRun(HtmlRunKind.TagText); } AddSentinelRun(); } tokenValid = true; }
/// <summary> /// Act as if an table end tag has been found in the InTable state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InTableEndTagTable(HtmlToken token) { if (IsInTableScope(TagNames.Table)) { ClearStackBackTo(TagNames.Table); CloseCurrentNode(); Reset(); return true; } else { RaiseErrorOccurred(HtmlParseError.TableNotInScope, token); return false; } }
private void Process(HtmlTokenId tokenId) { token = parser.Token; switch (tokenId) { case HtmlTokenId.EndOfFile: if (lineStarted) { output.OutputNewLine(); lineStarted = false; } if (!convertFragment) { if (injection != null && injection.HaveHead) { if (wideGap) { output.OutputNewLine(); wideGap = false; } injection.Inject(false, output); } output.CloseDocument(); output.Flush(); } endOfFile = true; break; case HtmlTokenId.Text: if (!insideComment) { if (insideAnchor && urlCompareSink.IsActive) { token.Text.WriteTo(urlCompareSink); } if (insidePre) { ProcessPreformatedText(); return; } if (normalizedInput) { ProcessText(); return; } NormalizeProcessText(); return; } break; case HtmlTokenId.EncodingChange: if (output.OutputCodePageSameAsInput) { int argument = token.Argument; output.OutputEncoding = Charset.GetEncoding(argument); return; } break; case HtmlTokenId.Tag: { if (token.TagIndex <= HtmlTagIndex.Unknown) { return; } HtmlDtd.TagDefinition tagDefinition = GetTagDefinition(token.TagIndex); if (normalizedInput) { if (!token.IsEndTag) { if (token.IsTagBegin) { PushElement(tagDefinition); } ProcessStartTagAttributes(tagDefinition); return; } if (token.IsTagBegin) { PopElement(tagDefinition); return; } } else { if (!token.IsEndTag) { if (token.IsTagBegin) { LFillTagB(tagDefinition); PushElement(tagDefinition); RFillTagB(tagDefinition); } ProcessStartTagAttributes(tagDefinition); return; } if (token.IsTagBegin) { LFillTagE(tagDefinition); PopElement(tagDefinition); RFillTagE(tagDefinition); return; } } break; } case HtmlTokenId.Restart: case HtmlTokenId.OverlappedClose: case HtmlTokenId.OverlappedReopen: break; default: return; } }
/// <summary> /// Act as if an caption end tag has been found in the InCaption state. /// </summary> /// <param name="token">The actual tag token.</param> /// <returns>True if the token was not ignored, otherwise false.</returns> Boolean InCaptionEndTagCaption(HtmlToken token) { if (IsInTableScope(TagNames.Caption)) { GenerateImpliedEndTags(); if (!CurrentNode.LocalName.Is(TagNames.Caption)) RaiseErrorOccurred(HtmlParseError.TagDoesNotMatchCurrentNode, token); ClearStackBackTo(TagNames.Caption); CloseCurrentNode(); _formattingElements.ClearFormatting(); _currentMode = HtmlTreeMode.InTable; return true; } else { RaiseErrorOccurred(HtmlParseError.CaptionNotInScope, token); return false; } }
/// <summary> /// See 8.2.5.4.16 The "in select" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InSelect(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { AddCharacters(token.Data); return; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); } else if (tagName.Is(TagNames.Option)) { if (CurrentNode.LocalName.Is(TagNames.Option)) InSelectEndTagOption(token); AddElement(new HtmlOptionElement(_document), token.AsTag()); } else if (tagName.Is(TagNames.Optgroup)) { if (CurrentNode.LocalName.Is(TagNames.Option)) InSelectEndTagOption(token); if (CurrentNode.LocalName.Is(TagNames.Optgroup)) InSelectEndTagOptgroup(token); AddElement(new HtmlOptionsGroupElement(_document), token.AsTag()); } else if (tagName.Is(TagNames.Select)) { RaiseErrorOccurred(HtmlParseError.SelectNesting, token); InSelectEndTagSelect(); } else if (TagNames.AllInput.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.IllegalElementInSelectDetected, token); if (IsInSelectScope(TagNames.Select)) { InSelectEndTagSelect(); Home(token); } } else if (tagName.IsOneOf(TagNames.Template, TagNames.Script)) { InHead(token); } else { RaiseErrorOccurred(HtmlParseError.IllegalElementInSelectDetected, token); } return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.Template)) InHead(token); else if (tagName.Is(TagNames.Optgroup)) InSelectEndTagOptgroup(token); else if (tagName.Is(TagNames.Option)) InSelectEndTagOption(token); else if (tagName.Is(TagNames.Select) && IsInSelectScope(TagNames.Select)) InSelectEndTagSelect(); else if (tagName.Is(TagNames.Select)) RaiseErrorOccurred(HtmlParseError.SelectNotInScope, token); else RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.EndOfFile: { InBody(token); return; } default: { RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); return; } } }
/// <summary> /// 8.2.5.5 The rules for parsing tokens in foreign content /// </summary> /// <param name="token">The token to examine.</param> void Foreign(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { AddCharacters(token.Data.Replace(Symbols.Null, Symbols.Replacement)); if (token.HasContent) _frameset = false; return; } case HtmlTokenType.StartTag: { var tagName = token.Name; var tag = token.AsTag(); if (tagName.Is(TagNames.Font)) { for (var i = 0; i != tag.Attributes.Count; i++) { if (tag.Attributes[i].Key.IsOneOf(AttributeNames.Color, AttributeNames.Face, AttributeNames.Size)) { ForeignNormalTag(tag); return; } } ForeignSpecialTag(tag); } else if (TagNames.AllForeignExceptions.Contains(tagName)) { ForeignNormalTag(tag); } else { ForeignSpecialTag(tag); } return; } case HtmlTokenType.EndTag: { var tagName = token.Name; var node = CurrentNode; var script = node as HtmlScriptElement; if (script != null) { HandleScript(script); return; } if (!node.LocalName.Is(tagName)) RaiseErrorOccurred(HtmlParseError.TagClosingMismatch, token); for (int i = _openElements.Count - 1; i > 0; i--) { if (node.LocalName.Isi(tagName)) { _openElements.RemoveRange(i + 1, _openElements.Count - i - 1); CloseCurrentNode(); break; } node = _openElements[i - 1]; if (node.Flags.HasFlag(NodeFlags.HtmlMember)) { Home(token); break; } } return; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } }
/// <summary> /// See 8.2.5.4.17 The "in select in table" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InSelectInTable(HtmlToken token) { switch (token.Type) { case HtmlTokenType.StartTag: { var tagName = token.Name; if (TagNames.AllTableSelects.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.IllegalElementInSelectDetected, token); InSelectEndTagSelect(); Home(token); return; } break; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (TagNames.AllTableSelects.Contains(tagName)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); if (IsInTableScope(tagName)) { InSelectEndTagSelect(); Home(token); } return; } break; } } InSelect(token); }
/// <summary> /// If there is a node in the stack of open elements that is not either /// a dd element, a dt element, an li element, a p element, a tbody /// element, a td element, a tfoot element, a th element, a thead /// element, a tr element, the body element, or the html element, then /// this is a parse error. /// </summary> void CheckBodyOnClosing(HtmlToken token) { for (var i = 0; i < _openElements.Count; i++) { if (!_openElements[i].Flags.HasFlag(NodeFlags.ImplicitelyClosed)) { RaiseErrorOccurred(HtmlParseError.BodyClosedWrong, token); break; } } }
/// <summary> /// See 8.2.5.4.18 The "in template" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InTemplate(HtmlToken token) { switch (token.Type) { case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Script) || TagNames.AllHead.Contains(tagName)) InHead(token); else if (TagNames.AllTableRoot.Contains(tagName)) TemplateStep(token, HtmlTreeMode.InTable); else if (tagName.Is(TagNames.Col)) TemplateStep(token, HtmlTreeMode.InColumnGroup); else if (tagName.Is(TagNames.Tr)) TemplateStep(token, HtmlTreeMode.InTableBody); else if (TagNames.AllTableCells.Contains(tagName)) TemplateStep(token, HtmlTreeMode.InRow); else TemplateStep(token, HtmlTreeMode.InBody); return; } case HtmlTokenType.EndTag: { if (token.Name.Is(TagNames.Template)) InHead(token); else RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.EndOfFile: { if (TagCurrentlyOpen(TagNames.Template)) { RaiseErrorOccurred(HtmlParseError.EOF, token); CloseTemplate(); Home(token); return; } End(); return; } default: { InBody(token); return; } } }
/// <summary> /// See 8.2.5.4.1 The "initial" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void Initial(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Doctype: { var doctype = (HtmlDoctypeToken)token; if (!doctype.IsValid) RaiseErrorOccurred(HtmlParseError.DoctypeInvalid, token); _document.AddNode(new DocumentType(_document, doctype.Name ?? String.Empty) { SystemIdentifier = doctype.SystemIdentifier, PublicIdentifier = doctype.PublicIdentifier }); if (doctype.IsFullQuirks) _document.QuirksMode = QuirksMode.On; else if (doctype.IsLimitedQuirks) _document.QuirksMode = QuirksMode.Limited; _currentMode = HtmlTreeMode.BeforeHtml; return; } case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _document.AddComment(token); return; } } if (_options.IsEmbedded == false) { RaiseErrorOccurred(HtmlParseError.DoctypeMissing, token); _document.QuirksMode = QuirksMode.On; } _currentMode = HtmlTreeMode.BeforeHtml; BeforeHtml(token); }
/// <summary> /// See 8.2.5.4.19 The "after body" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterBody(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); ReconstructFormatting(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { _openElements[0].AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { if (token.Name.Is(TagNames.Html)) { InBody(token); return; } break; } case HtmlTokenType.EndTag: { if (token.Name.Is(TagNames.Html)) { if (IsFragmentCase) RaiseErrorOccurred(HtmlParseError.TagInvalidInFragmentMode, token); else _currentMode = HtmlTreeMode.AfterAfterBody; return; } break; } case HtmlTokenType.EndOfFile: { End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); _currentMode = HtmlTreeMode.InBody; InBody(token); }
/// <summary> /// See 8.2.5.4.3 The "before head" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void BeforeHead(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { token.TrimStart(); if (token.IsEmpty) return; break; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) { InBody(token); return; } else if (tagName.Is(TagNames.Head)) { AddElement(new HtmlHeadElement(_document), token.AsTag()); _currentMode = HtmlTreeMode.InHead; return; } break; } case HtmlTokenType.EndTag: { if (TagNames.AllBeforeHead.Contains(token.Name)) break; RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } BeforeHead(HtmlTagToken.Open(TagNames.Head)); InHead(token); }
/// <summary> /// See 8.2.5.4.20 The "in frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InFrameset(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.Is(TagNames.Frameset)) AddElement(new HtmlFrameSetElement(_document), token.AsTag()); else if (tagName.Is(TagNames.Frame)) { AddElement(new HtmlFrameElement(_document), token.AsTag(), true); CloseCurrentNode(); } else if (tagName.Is(TagNames.NoFrames)) InHead(token); else break; return; } case HtmlTokenType.EndTag: { if (!token.Name.Is(TagNames.Frameset)) break; if (CurrentNode != _openElements[0]) { CloseCurrentNode(); if (!IsFragmentCase && !CurrentNode.LocalName.Is(TagNames.Frameset)) _currentMode = HtmlTreeMode.AfterFrameset; } else RaiseErrorOccurred(HtmlParseError.CurrentNodeIsRoot, token); return; } case HtmlTokenType.EndOfFile: { if (CurrentNode != _document.DocumentElement) RaiseErrorOccurred(HtmlParseError.CurrentNodeIsNotRoot, token); End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); }
/// <summary> /// See 8.2.5.4.5 The "in head noscript" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void InHeadNoScript(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { InHead(token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (TagNames.AllNoScript.Contains(tagName)) InHead(token); else if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.IsOneOf(TagNames.Head, TagNames.NoScript)) RaiseErrorOccurred(HtmlParseError.TagInappropriate, token); else break; return; } case HtmlTokenType.EndTag: { var tagName = token.Name; if (tagName.Is(TagNames.NoScript)) { CloseCurrentNode(); _currentMode = HtmlTreeMode.InHead; return; } else if (!tagName.Is(TagNames.Br)) { RaiseErrorOccurred(HtmlParseError.TagCannotEndHere, token); return; } break; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); CloseCurrentNode(); _currentMode = HtmlTreeMode.InHead; InHead(token); }
/// <summary> /// See 8.2.5.4.21 The "after frameset" insertion mode. /// </summary> /// <param name="token">The passed token.</param> void AfterFrameset(HtmlToken token) { switch (token.Type) { case HtmlTokenType.Character: { var str = token.TrimStart(); AddCharacters(str); if (token.IsEmpty) return; break; } case HtmlTokenType.Comment: { CurrentNode.AddComment(token); return; } case HtmlTokenType.Doctype: { RaiseErrorOccurred(HtmlParseError.DoctypeTagInappropriate, token); return; } case HtmlTokenType.StartTag: { var tagName = token.Name; if (tagName.Is(TagNames.Html)) InBody(token); else if (tagName.Is(TagNames.NoFrames)) InHead(token); else break; return; } case HtmlTokenType.EndTag: { if (!token.Name.Is(TagNames.Html)) break; _currentMode = HtmlTreeMode.AfterAfterFrameset; return; } case HtmlTokenType.EndOfFile: { End(); return; } } RaiseErrorOccurred(HtmlParseError.TokenNotPossible, token); }
// 從網路取得股價 public static Dictionary <string, object> GetStockPriceFromHttp(object StockID) { string strStockID = StockID.ToString(); // 建立目錄 Utility.MakeDir("HttpTmp"); // 建出檔案名稱 string strLogName = "HttpTmp/" + strStockID + ".tmp"; Utility.ResetFile(strLogName); // Wait System.Threading.Thread.Sleep(300); // Get from Network string strTargetURL = "http://tw.stock.yahoo.com/q/q?s=" + strStockID; string strData = Utility.HttpGet(strTargetURL); Utility.WriteFile(strLogName, strData); // Parser // 做檔案處理 string strStartToken = "/pf/pfsel?stocklist=" + strStockID; string strEndToken = "/q/ts?s=" + strStockID; // 開檔 System.Text.Encoding encode = System.Text.Encoding.GetEncoding("utf-8"); System.IO.StreamReader File = new System.IO.StreamReader(strLogName, encode); string strLine = ""; List <string> listToken = null; while ((strLine = File.ReadLine()) != null) { strLine = Utility.Strip(strLine); if (strLine.Length < 3) { continue; } if (strLine.Contains(strEndToken) == true) { break; } if (strLine.Contains(strStartToken) == true) { listToken = new List <string>(); continue; } // 還沒有使用到的 Token 也不需要使用 if (listToken == null) { continue; } // 切開看看 List <string> listHtmlToken = ChangeHtmlToken(strLine); foreach (string HtmlToken in listHtmlToken) { if (Utility.IsDigit(HtmlToken[0]) == true) { listToken.Add(HtmlToken); } else if (HtmlToken.Contains("▽") == true) { listToken.Add(HtmlToken); } else if (HtmlToken.Contains("△") == true) { listToken.Add(HtmlToken); } // 漲/跌停處理 else if (HtmlToken.Contains("▲") == true) { listToken.Add(HtmlToken); } else if (HtmlToken.Contains("▼") == true) { listToken.Add(HtmlToken); } // 沒交易處理 else if (HtmlToken.Contains("-") == true) { listToken.Add(HtmlToken); } } } File.Close(); // 把資料存出去 Dictionary <string, object> dictResult = new Dictionary <string, object>(); dictResult["UpdateTie"] = listToken[0]; // 最後更新時間 dictResult["NowPrice"] = listToken[1]; // 可能是 - dictResult["Result"] = listToken[4]; // 要處理-▼▲▽△ dictResult["Number"] = listToken[5]; // 要處理 1,200 這種 // 處理 Result 變成純數字 string strToken = listToken[4]; if (strToken.IndexOf("-") != -1) { dictResult["ResultNumber"] = 0; } else if (strToken.IndexOf("△") != -1) { dictResult["ResultNumber"] = Utility.SubString(strToken, 1); } else if (strToken.IndexOf("▲") != -1) { dictResult["ResultNumber"] = Utility.SubString(strToken, 1); } else if (strToken.IndexOf("▽") != -1) { dictResult["ResultNumber"] = "-" + Utility.SubString(strToken, 1); } else if (strToken.IndexOf("▼") != -1) { dictResult["ResultNumber"] = "-" + Utility.SubString(strToken, 1); } else { dictResult["ResultNumber"] = 0; } return(dictResult); }
private static string SfromTagPart(HtmlToken token) { return(token.IsTagComplete ? "" : token.IsTagBegin ? "(begin)" : token.IsTagEnd ? "(end)" : "(middle)"); }