private void Parse() { int num = 0; if (this.OptionComputeChecksum) this._crc32 = new Crc32(); this.Lastnodes = new Dictionary<string, HtmlNode>(); this._c = 0; this._fullcomment = false; this._parseerrors = new List<HtmlParseError>(); this._line = 1; this._lineposition = 1; this._maxlineposition = 1; this._state = HtmlDocument.ParseState.Text; this._oldstate = this._state; this._documentnode._innerlength = this.Text.Length; this._documentnode._outerlength = this.Text.Length; this._remainderOffset = this.Text.Length; this._lastparentnode = this._documentnode; this._currentnode = this.CreateNode(HtmlNodeType.Text, 0); this._currentattribute = (HtmlAttribute) null; this._index = 0; this.PushNodeStart(HtmlNodeType.Text, 0); while (this._index < this.Text.Length) { this._c = (int) this.Text[this._index]; this.IncrementPosition(); switch (this._state) { case HtmlDocument.ParseState.Text: if (!this.NewCheck()) continue; continue; case HtmlDocument.ParseState.WhichTag: if (!this.NewCheck()) { if (this._c == 47) { this.PushNodeNameStart(false, this._index); } else { this.PushNodeNameStart(true, this._index - 1); this.DecrementPosition(); } this._state = HtmlDocument.ParseState.Tag; continue; } continue; case HtmlDocument.ParseState.Tag: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; } if (this._c == 47) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } continue; } if (this._c == 62) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; } continue; case HtmlDocument.ParseState.BetweenAttributes: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 47 || this._c == 63) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.BetweenAttributes) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeNameStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeName; continue; } continue; case HtmlDocument.ParseState.EmptyTag: if (!this.NewCheck()) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, true)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.EmptyTag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; case HtmlDocument.ParseState.AttributeName: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeBeforeEquals; continue; } if (this._c == 61) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } if (this._c == 62) { this.PushAttributeNameEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeName) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.AttributeBeforeEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeBeforeEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } if (this._c == 61) { this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; this.DecrementPosition(); continue; } continue; case HtmlDocument.ParseState.AttributeAfterEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 39 || this._c == 34) { this._state = HtmlDocument.ParseState.QuotedAttributeValue; this.PushAttributeValueStart(this._index, this._c); num = this._c; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeAfterEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeValueStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeValue; continue; } continue; case HtmlDocument.ParseState.AttributeValue: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 62) { this.PushAttributeValueEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeValue) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.Comment: if (this._c == 62 && (!this._fullcomment || (int) this.Text[this._index - 2] == 45 && (int) this.Text[this._index - 3] == 45)) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; case HtmlDocument.ParseState.QuotedAttributeValue: if (this._c == num) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 60 && this._index < this.Text.Length && (int) this.Text[this._index] == 37) { this._oldstate = this._state; this._state = HtmlDocument.ParseState.ServerSideCode; continue; } continue; case HtmlDocument.ParseState.ServerSideCode: if (this._c == 37 && this._index < this.Text.Length && (int) this.Text[this._index] == 62) { switch (this._oldstate) { case HtmlDocument.ParseState.BetweenAttributes: this.PushAttributeNameEnd(this._index + 1); this._state = HtmlDocument.ParseState.BetweenAttributes; break; case HtmlDocument.ParseState.AttributeAfterEquals: this._state = HtmlDocument.ParseState.AttributeValue; break; default: this._state = this._oldstate; break; } this.IncrementPosition(); continue; } continue; case HtmlDocument.ParseState.PcData: if (this._currentnode._namelength + 3 <= this.Text.Length - (this._index - 1) && string.Compare(this.Text.Substring(this._index - 1, this._currentnode._namelength + 2), "</" + this._currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0) { int c = (int) this.Text[this._index - 1 + 2 + this._currentnode.Name.Length]; if (c == 62 || HtmlDocument.IsWhiteSpace(c)) { HtmlNode node = this.CreateNode(HtmlNodeType.Text, this._currentnode._outerstartindex + this._currentnode._outerlength); node._outerlength = this._index - 1 - node._outerstartindex; this._currentnode.AppendChild(node); this.PushNodeStart(HtmlNodeType.Element, this._index - 1); this.PushNodeNameStart(false, this._index - 1 + 2); this._state = HtmlDocument.ParseState.Tag; this.IncrementPosition(); continue; } continue; } continue; default: continue; } } if (this._currentnode._namestartindex > 0) this.PushNodeNameEnd(this._index); this.PushNodeEnd(this._index, false); this.Lastnodes.Clear(); }
private bool PushNodeEnd(int index, bool close) { this._currentnode._outerlength = index - this._currentnode._outerstartindex; if (this._currentnode._nodetype == HtmlNodeType.Text || this._currentnode._nodetype == HtmlNodeType.Comment) { if (this._currentnode._outerlength > 0) { this._currentnode._innerlength = this._currentnode._outerlength; this._currentnode._innerstartindex = this._currentnode._outerstartindex; if (this._lastparentnode != null) this._lastparentnode.AppendChild(this._currentnode); } } else if (this._currentnode._starttag && this._lastparentnode != this._currentnode) { if (this._lastparentnode != null) this._lastparentnode.AppendChild(this._currentnode); this.ReadDocumentEncoding(this._currentnode); this._currentnode._prevwithsamename = Utilities.GetDictionaryValueOrNull<string, HtmlNode>(this.Lastnodes, this._currentnode.Name); this.Lastnodes[this._currentnode.Name] = this._currentnode; if (this._currentnode.NodeType == HtmlNodeType.Document || this._currentnode.NodeType == HtmlNodeType.Element) this._lastparentnode = this._currentnode; if (HtmlNode.IsCDataElement(this.CurrentNodeName())) { this._state = HtmlDocument.ParseState.PcData; return true; } if (HtmlNode.IsClosedElement(this._currentnode.Name) || HtmlNode.IsEmptyElement(this._currentnode.Name)) close = true; } if (close || !this._currentnode._starttag) { if (this.OptionStopperNodeName != null && this._remainder == null && string.Compare(this._currentnode.Name, this.OptionStopperNodeName, StringComparison.OrdinalIgnoreCase) == 0) { this._remainderOffset = index; this._remainder = this.Text.Substring(this._remainderOffset); this.CloseCurrentNode(); return false; } this.CloseCurrentNode(); } return true; }
private bool NewCheck() { if (this._c != 60) return false; if (this._index < this.Text.Length && (int) this.Text[this._index] == 37) { switch (this._state) { case HtmlDocument.ParseState.WhichTag: this.PushNodeNameStart(true, this._index - 1); this._state = HtmlDocument.ParseState.Tag; break; case HtmlDocument.ParseState.BetweenAttributes: this.PushAttributeNameStart(this._index - 1); break; case HtmlDocument.ParseState.AttributeAfterEquals: this.PushAttributeValueStart(this._index - 1); break; } this._oldstate = this._state; this._state = HtmlDocument.ParseState.ServerSideCode; return true; } if (!this.PushNodeEnd(this._index - 1, true)) { this._index = this.Text.Length; return true; } this._state = HtmlDocument.ParseState.WhichTag; if (this._index - 1 <= this.Text.Length - 2 && (int) this.Text[this._index] == 33) { this.PushNodeStart(HtmlNodeType.Comment, this._index - 1); this.PushNodeNameStart(true, this._index); this.PushNodeNameEnd(this._index + 1); this._state = HtmlDocument.ParseState.Comment; if (this._index < this.Text.Length - 2) this._fullcomment = (int) this.Text[this._index + 1] == 45 && (int) this.Text[this._index + 2] == 45; return true; } this.PushNodeStart(HtmlNodeType.Element, this._index - 1); return true; }