private void Parse() { int num = 0; if (this.OptionComputeChecksum) this._crc32 = new Crc32(); this.Lastnodes = new Dictionary<string, HtmlNode>(); this._c = 0; this._fullcomment = false; this._parseerrors = new List<HtmlParseError>(); this._line = 1; this._lineposition = 1; this._maxlineposition = 1; this._state = HtmlDocument.ParseState.Text; this._oldstate = this._state; this._documentnode._innerlength = this.Text.Length; this._documentnode._outerlength = this.Text.Length; this._remainderOffset = this.Text.Length; this._lastparentnode = this._documentnode; this._currentnode = this.CreateNode(HtmlNodeType.Text, 0); this._currentattribute = (HtmlAttribute) null; this._index = 0; this.PushNodeStart(HtmlNodeType.Text, 0); while (this._index < this.Text.Length) { this._c = (int) this.Text[this._index]; this.IncrementPosition(); switch (this._state) { case HtmlDocument.ParseState.Text: if (!this.NewCheck()) continue; continue; case HtmlDocument.ParseState.WhichTag: if (!this.NewCheck()) { if (this._c == 47) { this.PushNodeNameStart(false, this._index); } else { this.PushNodeNameStart(true, this._index - 1); this.DecrementPosition(); } this._state = HtmlDocument.ParseState.Tag; continue; } continue; case HtmlDocument.ParseState.Tag: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; } if (this._c == 47) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } continue; } if (this._c == 62) { this.PushNodeNameEnd(this._index - 1); if (this._state == HtmlDocument.ParseState.Tag) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.Tag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; } continue; case HtmlDocument.ParseState.BetweenAttributes: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 47 || this._c == 63) { this._state = HtmlDocument.ParseState.EmptyTag; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.BetweenAttributes) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeNameStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeName; continue; } continue; case HtmlDocument.ParseState.EmptyTag: if (!this.NewCheck()) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, true)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.EmptyTag) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } continue; case HtmlDocument.ParseState.AttributeName: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeBeforeEquals; continue; } if (this._c == 61) { this.PushAttributeNameEnd(this._index - 1); this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } if (this._c == 62) { this.PushAttributeNameEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeName) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.AttributeBeforeEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeBeforeEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } if (this._c == 61) { this._state = HtmlDocument.ParseState.AttributeAfterEquals; continue; } this._state = HtmlDocument.ParseState.BetweenAttributes; this.DecrementPosition(); continue; } continue; case HtmlDocument.ParseState.AttributeAfterEquals: if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c)) { if (this._c == 39 || this._c == 34) { this._state = HtmlDocument.ParseState.QuotedAttributeValue; this.PushAttributeValueStart(this._index, this._c); num = this._c; continue; } if (this._c == 62) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeAfterEquals) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } this.PushAttributeValueStart(this._index - 1); this._state = HtmlDocument.ParseState.AttributeValue; continue; } continue; case HtmlDocument.ParseState.AttributeValue: if (!this.NewCheck()) { if (HtmlDocument.IsWhiteSpace(this._c)) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 62) { this.PushAttributeValueEnd(this._index - 1); if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } if (this._state == HtmlDocument.ParseState.AttributeValue) { this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; } continue; } continue; case HtmlDocument.ParseState.Comment: if (this._c == 62 && (!this._fullcomment || (int) this.Text[this._index - 2] == 45 && (int) this.Text[this._index - 3] == 45)) { if (!this.PushNodeEnd(this._index, false)) { this._index = this.Text.Length; continue; } this._state = HtmlDocument.ParseState.Text; this.PushNodeStart(HtmlNodeType.Text, this._index); continue; } continue; case HtmlDocument.ParseState.QuotedAttributeValue: if (this._c == num) { this.PushAttributeValueEnd(this._index - 1); this._state = HtmlDocument.ParseState.BetweenAttributes; continue; } if (this._c == 60 && this._index < this.Text.Length && (int) this.Text[this._index] == 37) { this._oldstate = this._state; this._state = HtmlDocument.ParseState.ServerSideCode; continue; } continue; case HtmlDocument.ParseState.ServerSideCode: if (this._c == 37 && this._index < this.Text.Length && (int) this.Text[this._index] == 62) { switch (this._oldstate) { case HtmlDocument.ParseState.BetweenAttributes: this.PushAttributeNameEnd(this._index + 1); this._state = HtmlDocument.ParseState.BetweenAttributes; break; case HtmlDocument.ParseState.AttributeAfterEquals: this._state = HtmlDocument.ParseState.AttributeValue; break; default: this._state = this._oldstate; break; } this.IncrementPosition(); continue; } continue; case HtmlDocument.ParseState.PcData: if (this._currentnode._namelength + 3 <= this.Text.Length - (this._index - 1) && string.Compare(this.Text.Substring(this._index - 1, this._currentnode._namelength + 2), "</" + this._currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0) { int c = (int) this.Text[this._index - 1 + 2 + this._currentnode.Name.Length]; if (c == 62 || HtmlDocument.IsWhiteSpace(c)) { HtmlNode node = this.CreateNode(HtmlNodeType.Text, this._currentnode._outerstartindex + this._currentnode._outerlength); node._outerlength = this._index - 1 - node._outerstartindex; this._currentnode.AppendChild(node); this.PushNodeStart(HtmlNodeType.Element, this._index - 1); this.PushNodeNameStart(false, this._index - 1 + 2); this._state = HtmlDocument.ParseState.Tag; this.IncrementPosition(); continue; } continue; } continue; default: continue; } } if (this._currentnode._namestartindex > 0) this.PushNodeNameEnd(this._index); this.PushNodeEnd(this._index, false); this.Lastnodes.Clear(); }
private void Parse() { int lastquote = 0; if (OptionComputeChecksum) { _crc32 = new Crc32(); } Lastnodes = new Dictionary<string, HtmlNode>(); _c = 0; _fullcomment = false; _parseerrors = new List<HtmlParseError>(); _line = 1; _lineposition = 1; _maxlineposition = 1; _state = ParseState.Text; _oldstate = _state; _documentnode._innerlength = Text.Length; _documentnode._outerlength = Text.Length; _remainderOffset = Text.Length; _lastparentnode = _documentnode; _currentnode = CreateNode(HtmlNodeType.Text, 0); _currentattribute = null; _index = 0; PushNodeStart(HtmlNodeType.Text, 0); while (_index < Text.Length) { _c = Text[_index]; IncrementPosition(); switch (_state) { case ParseState.Text: if (NewCheck()) continue; break; case ParseState.WhichTag: if (NewCheck()) continue; if (_c == '/') { PushNodeNameStart(false, _index); } else { PushNodeNameStart(true, _index - 1); DecrementPosition(); } _state = ParseState.Tag; break; case ParseState.Tag: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; _state = ParseState.BetweenAttributes; continue; } if (_c == '/') { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; _state = ParseState.EmptyTag; continue; } if (_c == '>') { PushNodeNameEnd(_index - 1); if (_state != ParseState.Tag) continue; if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.Tag) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); } break; case ParseState.BetweenAttributes: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if ((_c == '/') || (_c == '?')) { _state = ParseState.EmptyTag; continue; } if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.BetweenAttributes) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } PushAttributeNameStart(_index - 1); _state = ParseState.AttributeName; break; case ParseState.EmptyTag: if (NewCheck()) continue; if (_c == '>') { if (!PushNodeEnd(_index, true)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.EmptyTag) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } _state = ParseState.BetweenAttributes; break; case ParseState.AttributeName: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushAttributeNameEnd(_index - 1); _state = ParseState.AttributeBeforeEquals; continue; } if (_c == '=') { PushAttributeNameEnd(_index - 1); _state = ParseState.AttributeAfterEquals; continue; } if (_c == '>') { PushAttributeNameEnd(_index - 1); if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeName) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.AttributeBeforeEquals: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeBeforeEquals) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } if (_c == '=') { _state = ParseState.AttributeAfterEquals; continue; } // no equals, no whitespace, it's a new attrribute starting _state = ParseState.BetweenAttributes; DecrementPosition(); break; case ParseState.AttributeAfterEquals: if (NewCheck()) continue; if (IsWhiteSpace(_c)) continue; if ((_c == '\'') || (_c == '"')) { _state = ParseState.QuotedAttributeValue; PushAttributeValueStart(_index, _c); lastquote = _c; continue; } if (_c == '>') { if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeAfterEquals) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } PushAttributeValueStart(_index - 1); _state = ParseState.AttributeValue; break; case ParseState.AttributeValue: if (NewCheck()) continue; if (IsWhiteSpace(_c)) { PushAttributeValueEnd(_index - 1); _state = ParseState.BetweenAttributes; continue; } if (_c == '>') { PushAttributeValueEnd(_index - 1); if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } if (_state != ParseState.AttributeValue) continue; _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.QuotedAttributeValue: if (_c == lastquote) { PushAttributeValueEnd(_index - 1); _state = ParseState.BetweenAttributes; continue; } if (_c == '<') { if (_index < Text.Length) { if (Text[_index] == '%') { _oldstate = _state; _state = ParseState.ServerSideCode; continue; } } } break; case ParseState.Comment: if (_c == '>') { if (_fullcomment) { if ((Text[_index - 2] != '-') || (Text[_index - 3] != '-')) { continue; } } if (!PushNodeEnd(_index, false)) { // stop parsing _index = Text.Length; break; } _state = ParseState.Text; PushNodeStart(HtmlNodeType.Text, _index); continue; } break; case ParseState.ServerSideCode: if (_c == '%') { if (_index < Text.Length) { if (Text[_index] == '>') { switch (_oldstate) { case ParseState.AttributeAfterEquals: _state = ParseState.AttributeValue; break; case ParseState.BetweenAttributes: PushAttributeNameEnd(_index + 1); _state = ParseState.BetweenAttributes; break; default: _state = _oldstate; break; } IncrementPosition(); } } } break; case ParseState.PcData: // look for </tag + 1 char // check buffer end if ((_currentnode._namelength + 3) <= (Text.Length - (_index - 1))) { if (string.Compare(Text.Substring(_index - 1, _currentnode._namelength + 2), "</" + _currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0) { int c = Text[_index - 1 + 2 + _currentnode.Name.Length]; if ((c == '>') || (IsWhiteSpace(c))) { // add the script as a text node HtmlNode script = CreateNode(HtmlNodeType.Text, _currentnode._outerstartindex + _currentnode._outerlength); script._outerlength = _index - 1 - script._outerstartindex; _currentnode.AppendChild(script); PushNodeStart(HtmlNodeType.Element, _index - 1); PushNodeNameStart(false, _index - 1 + 2); _state = ParseState.Tag; IncrementPosition(); } } } break; } } // finish the current work if (_currentnode._namestartindex > 0) { PushNodeNameEnd(_index); } PushNodeEnd(_index, false); // we don't need this anymore Lastnodes.Clear(); }
private void Parse() { int lastquote = 0; if (OptionComputeChecksum) { _crc32 = new Crc32(); } _lastnodes = new Hashtable(); _c = 0; _fullcomment = false; _parseerrors = new ArrayList(); _line = 1; _lineposition = 1; _maxlineposition = 1; _state = ParseState.Text; _oldstate = _state; _documentnode._innerlength = _text.FullLength; _documentnode._outerlength = _text.FullLength; _lastparentnode = _documentnode; _currentnode = CreateNode(HtmlNodeType.Text, 0); _currentattribute = null; _index = 0; PushNodeStart(HtmlNodeType.Text, 0); DoParse (lastquote); }