Ejemplo n.º 1
0
 private void Parse()
 {
   int num = 0;
   if (this.OptionComputeChecksum)
     this._crc32 = new Crc32();
   this.Lastnodes = new Dictionary<string, HtmlNode>();
   this._c = 0;
   this._fullcomment = false;
   this._parseerrors = new List<HtmlParseError>();
   this._line = 1;
   this._lineposition = 1;
   this._maxlineposition = 1;
   this._state = HtmlDocument.ParseState.Text;
   this._oldstate = this._state;
   this._documentnode._innerlength = this.Text.Length;
   this._documentnode._outerlength = this.Text.Length;
   this._remainderOffset = this.Text.Length;
   this._lastparentnode = this._documentnode;
   this._currentnode = this.CreateNode(HtmlNodeType.Text, 0);
   this._currentattribute = (HtmlAttribute) null;
   this._index = 0;
   this.PushNodeStart(HtmlNodeType.Text, 0);
   while (this._index < this.Text.Length)
   {
     this._c = (int) this.Text[this._index];
     this.IncrementPosition();
     switch (this._state)
     {
       case HtmlDocument.ParseState.Text:
         if (!this.NewCheck())
           continue;
         continue;
       case HtmlDocument.ParseState.WhichTag:
         if (!this.NewCheck())
         {
           if (this._c == 47)
           {
             this.PushNodeNameStart(false, this._index);
           }
           else
           {
             this.PushNodeNameStart(true, this._index - 1);
             this.DecrementPosition();
           }
           this._state = HtmlDocument.ParseState.Tag;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.Tag:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               this._state = HtmlDocument.ParseState.BetweenAttributes;
               continue;
             }
             continue;
           }
           if (this._c == 47)
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               this._state = HtmlDocument.ParseState.EmptyTag;
               continue;
             }
             continue;
           }
           if (this._c == 62)
           {
             this.PushNodeNameEnd(this._index - 1);
             if (this._state == HtmlDocument.ParseState.Tag)
             {
               if (!this.PushNodeEnd(this._index, false))
               {
                 this._index = this.Text.Length;
                 continue;
               }
               if (this._state == HtmlDocument.ParseState.Tag)
               {
                 this._state = HtmlDocument.ParseState.Text;
                 this.PushNodeStart(HtmlNodeType.Text, this._index);
                 continue;
               }
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.BetweenAttributes:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 47 || this._c == 63)
           {
             this._state = HtmlDocument.ParseState.EmptyTag;
             continue;
           }
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.BetweenAttributes)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this.PushAttributeNameStart(this._index - 1);
           this._state = HtmlDocument.ParseState.AttributeName;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.EmptyTag:
         if (!this.NewCheck())
         {
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, true))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.EmptyTag)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeName:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushAttributeNameEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.AttributeBeforeEquals;
             continue;
           }
           if (this._c == 61)
           {
             this.PushAttributeNameEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.AttributeAfterEquals;
             continue;
           }
           if (this._c == 62)
           {
             this.PushAttributeNameEnd(this._index - 1);
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeName)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeBeforeEquals:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeBeforeEquals)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           if (this._c == 61)
           {
             this._state = HtmlDocument.ParseState.AttributeAfterEquals;
             continue;
           }
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           this.DecrementPosition();
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeAfterEquals:
         if (!this.NewCheck() && !HtmlDocument.IsWhiteSpace(this._c))
         {
           if (this._c == 39 || this._c == 34)
           {
             this._state = HtmlDocument.ParseState.QuotedAttributeValue;
             this.PushAttributeValueStart(this._index, this._c);
             num = this._c;
             continue;
           }
           if (this._c == 62)
           {
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeAfterEquals)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           this.PushAttributeValueStart(this._index - 1);
           this._state = HtmlDocument.ParseState.AttributeValue;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.AttributeValue:
         if (!this.NewCheck())
         {
           if (HtmlDocument.IsWhiteSpace(this._c))
           {
             this.PushAttributeValueEnd(this._index - 1);
             this._state = HtmlDocument.ParseState.BetweenAttributes;
             continue;
           }
           if (this._c == 62)
           {
             this.PushAttributeValueEnd(this._index - 1);
             if (!this.PushNodeEnd(this._index, false))
             {
               this._index = this.Text.Length;
               continue;
             }
             if (this._state == HtmlDocument.ParseState.AttributeValue)
             {
               this._state = HtmlDocument.ParseState.Text;
               this.PushNodeStart(HtmlNodeType.Text, this._index);
               continue;
             }
             continue;
           }
           continue;
         }
         continue;
       case HtmlDocument.ParseState.Comment:
         if (this._c == 62 && (!this._fullcomment || (int) this.Text[this._index - 2] == 45 && (int) this.Text[this._index - 3] == 45))
         {
           if (!this.PushNodeEnd(this._index, false))
           {
             this._index = this.Text.Length;
             continue;
           }
           this._state = HtmlDocument.ParseState.Text;
           this.PushNodeStart(HtmlNodeType.Text, this._index);
           continue;
         }
         continue;
       case HtmlDocument.ParseState.QuotedAttributeValue:
         if (this._c == num)
         {
           this.PushAttributeValueEnd(this._index - 1);
           this._state = HtmlDocument.ParseState.BetweenAttributes;
           continue;
         }
         if (this._c == 60 && this._index < this.Text.Length && (int) this.Text[this._index] == 37)
         {
           this._oldstate = this._state;
           this._state = HtmlDocument.ParseState.ServerSideCode;
           continue;
         }
         continue;
       case HtmlDocument.ParseState.ServerSideCode:
         if (this._c == 37 && this._index < this.Text.Length && (int) this.Text[this._index] == 62)
         {
           switch (this._oldstate)
           {
             case HtmlDocument.ParseState.BetweenAttributes:
               this.PushAttributeNameEnd(this._index + 1);
               this._state = HtmlDocument.ParseState.BetweenAttributes;
               break;
             case HtmlDocument.ParseState.AttributeAfterEquals:
               this._state = HtmlDocument.ParseState.AttributeValue;
               break;
             default:
               this._state = this._oldstate;
               break;
           }
           this.IncrementPosition();
           continue;
         }
         continue;
       case HtmlDocument.ParseState.PcData:
         if (this._currentnode._namelength + 3 <= this.Text.Length - (this._index - 1) && string.Compare(this.Text.Substring(this._index - 1, this._currentnode._namelength + 2), "</" + this._currentnode.Name, StringComparison.OrdinalIgnoreCase) == 0)
         {
           int c = (int) this.Text[this._index - 1 + 2 + this._currentnode.Name.Length];
           if (c == 62 || HtmlDocument.IsWhiteSpace(c))
           {
             HtmlNode node = this.CreateNode(HtmlNodeType.Text, this._currentnode._outerstartindex + this._currentnode._outerlength);
             node._outerlength = this._index - 1 - node._outerstartindex;
             this._currentnode.AppendChild(node);
             this.PushNodeStart(HtmlNodeType.Element, this._index - 1);
             this.PushNodeNameStart(false, this._index - 1 + 2);
             this._state = HtmlDocument.ParseState.Tag;
             this.IncrementPosition();
             continue;
           }
           continue;
         }
         continue;
       default:
         continue;
     }
   }
   if (this._currentnode._namestartindex > 0)
     this.PushNodeNameEnd(this._index);
   this.PushNodeEnd(this._index, false);
   this.Lastnodes.Clear();
 }
Ejemplo n.º 2
0
 private bool PushNodeEnd(int index, bool close)
 {
   this._currentnode._outerlength = index - this._currentnode._outerstartindex;
   if (this._currentnode._nodetype == HtmlNodeType.Text || this._currentnode._nodetype == HtmlNodeType.Comment)
   {
     if (this._currentnode._outerlength > 0)
     {
       this._currentnode._innerlength = this._currentnode._outerlength;
       this._currentnode._innerstartindex = this._currentnode._outerstartindex;
       if (this._lastparentnode != null)
         this._lastparentnode.AppendChild(this._currentnode);
     }
   }
   else if (this._currentnode._starttag && this._lastparentnode != this._currentnode)
   {
     if (this._lastparentnode != null)
       this._lastparentnode.AppendChild(this._currentnode);
     this.ReadDocumentEncoding(this._currentnode);
     this._currentnode._prevwithsamename = Utilities.GetDictionaryValueOrNull<string, HtmlNode>(this.Lastnodes, this._currentnode.Name);
     this.Lastnodes[this._currentnode.Name] = this._currentnode;
     if (this._currentnode.NodeType == HtmlNodeType.Document || this._currentnode.NodeType == HtmlNodeType.Element)
       this._lastparentnode = this._currentnode;
     if (HtmlNode.IsCDataElement(this.CurrentNodeName()))
     {
       this._state = HtmlDocument.ParseState.PcData;
       return true;
     }
     if (HtmlNode.IsClosedElement(this._currentnode.Name) || HtmlNode.IsEmptyElement(this._currentnode.Name))
       close = true;
   }
   if (close || !this._currentnode._starttag)
   {
     if (this.OptionStopperNodeName != null && this._remainder == null && string.Compare(this._currentnode.Name, this.OptionStopperNodeName, StringComparison.OrdinalIgnoreCase) == 0)
     {
       this._remainderOffset = index;
       this._remainder = this.Text.Substring(this._remainderOffset);
       this.CloseCurrentNode();
       return false;
     }
     this.CloseCurrentNode();
   }
   return true;
 }
Ejemplo n.º 3
0
 private bool NewCheck()
 {
   if (this._c != 60)
     return false;
   if (this._index < this.Text.Length && (int) this.Text[this._index] == 37)
   {
     switch (this._state)
     {
       case HtmlDocument.ParseState.WhichTag:
         this.PushNodeNameStart(true, this._index - 1);
         this._state = HtmlDocument.ParseState.Tag;
         break;
       case HtmlDocument.ParseState.BetweenAttributes:
         this.PushAttributeNameStart(this._index - 1);
         break;
       case HtmlDocument.ParseState.AttributeAfterEquals:
         this.PushAttributeValueStart(this._index - 1);
         break;
     }
     this._oldstate = this._state;
     this._state = HtmlDocument.ParseState.ServerSideCode;
     return true;
   }
   if (!this.PushNodeEnd(this._index - 1, true))
   {
     this._index = this.Text.Length;
     return true;
   }
   this._state = HtmlDocument.ParseState.WhichTag;
   if (this._index - 1 <= this.Text.Length - 2 && (int) this.Text[this._index] == 33)
   {
     this.PushNodeStart(HtmlNodeType.Comment, this._index - 1);
     this.PushNodeNameStart(true, this._index);
     this.PushNodeNameEnd(this._index + 1);
     this._state = HtmlDocument.ParseState.Comment;
     if (this._index < this.Text.Length - 2)
       this._fullcomment = (int) this.Text[this._index + 1] == 45 && (int) this.Text[this._index + 2] == 45;
     return true;
   }
   this.PushNodeStart(HtmlNodeType.Element, this._index - 1);
   return true;
 }