/// <summary> /// Parses the contents of an HTML tag. The current position should /// be at the first character following the tag's opening less-than /// character. /// /// Note: We parse to the end of the tag even if this tag was not /// requested by the caller. This ensures subsequent parsing takes /// place after this tag /// </summary> /// <param name="name">Name of the tag the caller is requesting, /// or "*" if caller is requesting all tags</param> /// <param name="tag">Returns information on this tag if it's one /// the caller is requesting</param> /// <returns>True if data is being returned for a tag requested by /// the caller or false otherwise</returns> protected bool ParseTag(string name, ref HtmlTag tag) { // Get name of this tag string s = ParseTagName(); // Special handling bool doctype = _scriptBegin = false; if (String.Compare(s, "!DOCTYPE", true) == 0) doctype = true; else if (String.Compare(s, "script", true) == 0) _scriptBegin = true; // Is this a tag requested by caller? bool requested = false; if (name == "*" || String.Compare(s, name, true) == 0) { // Yes, create new tag object tag = new HtmlTag(); tag.Name = s; tag.Attributes = new Dictionary<string, string>(); requested = true; } // Parse attributes SkipWhitespace(); while (Peek() != '>') { if (Peek() == '/') { // Handle trailing forward slash if (requested) tag.TrailingSlash = true; Move(); SkipWhitespace(); // If this is a script tag, it was closed _scriptBegin = false; } else { // Parse attribute name s = (!doctype) ? ParseAttributeName() : ParseAttributeValue(); SkipWhitespace(); // Parse attribute value string value = String.Empty; if (Peek() == '=') { Move(); SkipWhitespace(); value = ParseAttributeValue(); SkipWhitespace(); } // Add attribute to collection if requested tag if (requested) { // This tag replaces existing tags with same name if (tag.Attributes.Keys.Contains(s)) tag.Attributes.Remove(s); tag.Attributes.Add(s, value); } } } // Skip over closing '>' Move(); return requested; }
/// <summary> /// Parses the next tag that matches the specified tag name /// </summary> /// <param name="name">Name of the tags to parse ("*" = parse all /// tags)</param> /// <param name="tag">Returns information on the next occurrence /// of the specified tag or null if none found</param> /// <returns>True if a tag was parsed or false if the end of the /// document was reached</returns> public bool ParseNext(string name, out HtmlTag tag) { tag = null; // Nothing to do if no tag specified if (String.IsNullOrEmpty(name)) return false; // Loop until match is found or there are no more tags while (MoveToNextTag()) { // Skip opening '<' Move(); // Examine first tag character char c = Peek(); if (c == '!' && Peek(1) == '-' && Peek(2) == '-') { // Skip over comments const string endComment = "-->"; _pos = _html.IndexOf(endComment, _pos); NormalizePosition(); Move(endComment.Length); } else if (c == '/') { // Skip over closing tags _pos = _html.IndexOf('>', _pos); NormalizePosition(); Move(); } else { // Parse tag bool result = ParseTag(name, ref tag); // Because scripts may contain tag characters, // we need special handling to skip over // script contents if (_scriptBegin) { const string endScript = "</script"; _pos = _html.IndexOf(endScript, _pos, StringComparison.OrdinalIgnoreCase); NormalizePosition(); Move(endScript.Length); SkipWhitespace(); if (Peek() == '>') Move(); } // Return true if requested tag was found if (result) return true; } } return false; }