Beispiel #1
0
		/// <summary>
		/// This will parse a string containing HTML and will produce a domain tree.
		/// </summary>
		/// <param name="html">The HTML to be parsed</param>
		/// <returns>A tree representing the elements</returns>
		public HtmlNodeCollection Parse(string html)
		{
			HtmlNodeCollection nodes = new HtmlNodeCollection(null);

			html = PreprocessScript( html ,"script" );
			html = PreprocessScript( html ,"style" );

			html = RemoveComments( html );
			html = RemoveSGMLComments( html );
			StringCollection tokens = GetTokens( html );

			int index = 0;
			HtmlElement element = null;
			while( index < tokens.Count )
			{
				if( "<".Equals( tokens[index] ) )
				{
					// Read open tag

					index++;
					if( index >= tokens.Count ) break;
					string tag_name = tokens[index];
					index++;
					element = new HtmlElement( tag_name );
					// read the attributes and values

					while( index < tokens.Count && ! ">".Equals( tokens[index] ) && ! "/>".Equals( tokens[index] ) )
					{
						string attribute_name = tokens[ index ];
						index++;
						if( index < tokens.Count && "=".Equals( tokens[ index ] ) )
						{
							index++;
							string attribute_value;
							if( index < tokens.Count )
							{
								attribute_value = tokens[ index ];
							}
							else
							{
								attribute_value = null;
							}
							index++;
							HtmlAttribute attribute = new HtmlAttribute( attribute_name , HtmlEncoder.DecodeValue( attribute_value ) );
							element.Attributes.Add( attribute );
						}
						else if( index < tokens.Count )
						{
							// Null-value attribute
							HtmlAttribute attribute = new HtmlAttribute( attribute_name , null );
							element.Attributes.Add( attribute );
						}
					}
					nodes.Add( element );
					if( index < tokens.Count && "/>".Equals( tokens[ index ] ) )
					{
						element.IsTerminated = true;
						index++;
						element = null;
					}
					else if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
					{
						index++;
					}
				}
				else if( ">".Equals( tokens[index] ) )
				{
					index++;
				}
				else if( "</".Equals( tokens[index] ) )
				{
					// Read close tag
					index++;
					if( index >= tokens.Count ) break;
					string tag_name = tokens[index];
					index++;

					int open_index = FindTagOpenNodeIndex( nodes , tag_name );
					if( open_index != -1 )
					{
						MoveNodesDown( ref nodes , open_index + 1 , (HtmlElement)nodes[open_index] );
					}
					else
					{
						// Er, there is a close tag without an opening tag!!
					}

					// Skip to the end of this tag
					while( index < tokens.Count && ! ">".Equals( tokens[ index ] ) )
					{
						index++;
					}
					if( index < tokens.Count && ">".Equals( tokens[ index ] ) )
					{
						index++;
					}

					element = null;
				}
				else
				{
					// Read text
					string value = tokens[ index ];
					if( mRemoveEmptyElementText )
					{
						value = RemoveWhitespace( value );
					}
					value = DecodeScript( value );

					if( mRemoveEmptyElementText && value.Length == 0 )
					{
						// We do nothing
					}
					else
					{
						if( ! ( element != null && element.NoEscaping ) )
						{
							value = HtmlEncoder.DecodeValue( value );
						}
						HtmlText node = new HtmlText( value );
						nodes.Add( node );
					}
					index++;
				}
			}
			return nodes;
		}
Beispiel #2
0
		/// <summary>
		/// This will create an empty collection of attributes.
		/// </summary>
		/// <param name="element"></param>
		internal HtmlAttributeCollection(HtmlElement element)
		{
			mElement = element;
		}
Beispiel #3
0
		/// <summary>
		/// This will move all the nodes from the specified index to the new parent.
		/// </summary>
		/// <param name="nodes">The collection of nodes</param>
		/// <param name="node_index">The index of the first node (in the above collection) to move</param>
		/// <param name="new_parent">The node which will become the parent of the moved nodes</param>

		private void MoveNodesDown(ref HtmlNodeCollection nodes,int node_index,HtmlElement new_parent)
		{
			for( int i = node_index ; i < nodes.Count ; i++ )
			{
				((HtmlElement)new_parent).Nodes.Add( nodes[i] );
				nodes[i].SetParent( new_parent );
			}
			int c = nodes.Count;
			for( int i = node_index ; i < c ; i++ )
			{
				nodes.RemoveAt( node_index );
			}
			new_parent.IsExplicitlyTerminated = true;
		}
Beispiel #4
0
        static string GetRouteName(HtmlElement elem)
        {
            foreach (var e in elem.Nodes.OfType<HtmlElement>())
            {
                if (e.Nodes.OfType<HtmlElement>().Any())
                    continue;

                if (!string.IsNullOrWhiteSpace(e.InnerText))
                    return e.InnerText;
            }
            return null;
        }
Beispiel #5
0
		public HtmlAttributeCollection()
		{
			mElement = null;
		}
Beispiel #6
0
        static string GetRouteColor(HtmlElement elem)
        {
            var aux = GetElementByClass(elem, "altroute-aux");
            if (aux == null)
                return null;

            var info = GetElementByClass(aux, "dir-traffic-");
            if (info == null)
                return null;

            var match = Regex.Match(info.Attributes["class"].Value, @"dir-traffic-(\w+)");
            return match.Success ? match.Groups[1].Value : null;
        }
Beispiel #7
0
        static string GetRouteInfo(HtmlElement elem)
        {
            var info = GetElementByClass(elem, "altroute-info");
            if (info == null)
                return null;

            return info.InnerText;
        }
Beispiel #8
0
 static HtmlElement GetElementByClass(HtmlElement elem, string search)
 {
     return GetAllElements(elem.Nodes)
     .FirstOrDefault(e =>
     {
         var attr = e.Attributes["class"];
         return attr != null && attr.Value.ToLower().Contains(search);
     });
 }
Beispiel #9
0
		/// <summary>
		/// A collection is usually associated with a parent node (an HtmlElement, actually)
		/// but you can pass null to implement an abstracted collection.
		/// </summary>
		/// <param name="parent">The parent element, or null if it is not appropriate</param>
		internal HtmlNodeCollection(HtmlElement parent)
		{
			mParent = parent;
		}
Beispiel #10
0
		// Public constructor to create an empty collection.
		public HtmlNodeCollection()
		{
			mParent = null;
		}
Beispiel #11
0
		/// <summary>
		/// Internal method to maintain the identity of the parent node.
		/// </summary>
		/// <param name="parentNode">The parent node of this one</param>
		internal void SetParent(HtmlElement parentNode)
		{
			mParent = parentNode;
		}
Beispiel #12
0
		/// <summary>
		/// This constructor is used by the subclasses.
		/// </summary>
		protected HtmlNode()
		{
			mParent = null;
		}