public bool IsMatch(Element e)
        {
            BeginTag tag = e as BeginTag;

            if (tag == null)
            {
                return(false);
            }

            if (tagName != null && !tag.NameEquals(tagName))
            {
                return(false);
            }

            foreach (RequiredAttribute reqAttr in attrs)
            {
                int  foundAt;
                Attr attr = tag.GetAttribute(reqAttr.Name, true, 0, out foundAt);
                if (attr == null)
                {
                    return(false);
                }
                if (reqAttr.Value != null && reqAttr.Value != attr.Value)
                {
                    return(false);
                }
            }

            return(true);
        }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag.NameEquals(HTMLTokens.Body))
            {
                bodyBeginTag = tag;
            }

            base.OnBeginTag(tag);
        }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag.NameEquals(HTMLTokens.Ul))
            {
                unorderedListLevel++;
            }
            else if (tag.NameEquals(HTMLTokens.Ol))
            {
                orderedListLevel++;
            }
            else if ((unorderedListLevel < 1) &&
                (orderedListLevel < 1) &&
                (tag.NameEquals(HTMLTokens.Li)))
            {
                hasIncompleteList = true;
            }

            base.OnBeginTag(tag);
        }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag.NameEquals(HTMLTokens.Title) && !tag.Complete)
                _inTitle = true;

            if (TagsToPreserve.Contains(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes(tag.Name, tag);
            }
            else if (ReplaceTags.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                EmitTagAndAttributes((string)ReplaceTags[tag.Name.ToUpper(CultureInfo.InvariantCulture)], tag);
            }
        }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag != null && LightWeightHTMLDocument.AllUrlElements.ContainsKey(tag.Name.ToUpper(CultureInfo.InvariantCulture)))
            {
                Attr attr = tag.GetAttribute((string)LightWeightHTMLDocument.AllUrlElements[tag.Name.ToUpper(CultureInfo.InvariantCulture)]);
                if (attr != null)
                {
                    string url = attr.Value;
                    if (!UrlHelper.IsUrl(url) && ShouldEscapeRelativeUrl(url))
                        attr.Value = UrlHelper.EscapeRelativeURL(BaseUrl, url);
                }
            }

            // Special case params
            if (tag != null && tag.NameEquals(HTMLTokens.Param))
            {
                // Handle Params
                foreach (string paramValue in LightWeightHTMLDocument.ParamsUrlElements)
                {
                    Attr attr = tag.GetAttribute(HTMLTokens.Name);
                    if (attr != null)
                    {
                        if (attr.Value.ToUpper(CultureInfo.InvariantCulture) == paramValue)
                        {
                            Attr valueAttr = tag.GetAttribute(HTMLTokens.Value);
                            if (valueAttr != null)
                            {
                                string url = valueAttr.Value;
                                if (!UrlHelper.IsUrl(url))
                                    valueAttr.Value = UrlHelper.EscapeRelativeURL(BaseUrl, url);
                            }
                        }
                    }
                }
            }
            base.OnBeginTag(tag);
        }
        /// <summary>
        /// Is the tag a meaningless tag such as <p></p> or <a href="..."></a> or <a href="...">&nbsp;</a>
        /// </summary>
        /// <param name="htmlParser"></param>
        /// <param name="bt"></param>
        /// <returns></returns>
        private static bool RemoveMeaninglessTags(SimpleHtmlParser htmlParser, BeginTag bt)
        {
            // Look to see if the tag is a <p> without any attributes
            if ((bt.NameEquals("p") && bt.Attributes.Length == 0 && !bt.HasResidue))
            {
                Element e = htmlParser.Peek(0);

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("p"))
                {
                    // eat up the end tag
                    htmlParser.Next();
                    return true;
                }
            }

            // Look to see if the tag is an <a> without a style/id/name attribute, but has an href... meaning the link is not useful
            if ((bt.NameEquals("a") && bt.GetAttribute("name") == null && bt.GetAttributeValue("style") == null && bt.GetAttributeValue("id") == null && bt.GetAttributeValue("href") != null))
            {
                bool hadWhiteSpaceText = false;
                Element e = htmlParser.Peek(0);

                // Look to see if the a just has whitespace inside of it
                if (e is Text && HtmlUtils.UnEscapeEntities(e.RawText, HtmlUtils.UnEscapeMode.NonMarkupText).Trim().Length == 0)
                {
                    e = htmlParser.Peek(1);
                    hadWhiteSpaceText = true;
                }

                // Look to see if thereis a matching end tag to the element we are looking at
                if (e != null && e is EndTag && ((EndTag)e).NameEquals("a"))
                {
                    // if this was an <a> with whitespace in the middle eat it up
                    if (hadWhiteSpaceText)
                        htmlParser.Next();
                    // eat up the end tag
                    htmlParser.Next();

                    return true;
                }
            }

            return false;
        }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag != null)
            {
                // Reset any frame urls
                // This is done because the HTML that is often in this document may have
                // incorrect urls for frames.  The frames enumeration is accurate, so if the
                // name from the frames enumeration is the same as this frame, we should fix its
                // url up.
                if (tag.NameEquals(HTMLTokens.Frame))
                {
                    Attr name = tag.GetAttribute(HTMLTokens.Name);
                    if (name != null && this._frames != null)
                    {
                        LightWeightHTMLDocument frameDoc = GetFrameDocumentByName(name.Value);
                        if (frameDoc != null)
                        {
                            Attr src = tag.GetAttribute(HTMLTokens.Src);
                            if (src != null && src.Value != frameDoc.Url)
                                Generator.AddSubstitionUrl(new UrlToReplace(src.Value, frameDoc.Url));
                        }
                    }
                }

                LightWeightTag currentTag = new LightWeightTag(tag);
                // The key we'll use for the table
                string key = tag.Name.ToUpper(CultureInfo.InvariantCulture);
                if (!_tagTable.ContainsKey(key))
                    _tagTable[key] = new LightWeightTag[0];

                LightWeightTag[] currentTags = (LightWeightTag[])_tagTable[key];
                LightWeightTag[] grownTags = new LightWeightTag[currentTags.Length + 1];
                currentTags.CopyTo(grownTags, 0);
                grownTags[currentTags.Length] = currentTag;
                _tagTable[key] = grownTags;

                // Accumulate the title text
                if (tag.NameEquals(HTMLTokens.Title) && !tag.Complete)
                    _nextTextIsTitleText = true;
                else if (tag.NameEquals(HTMLTokens.A) && !tag.Complete && tag.GetAttribute(HTMLTokens.Href) != null)
                {
                    if (_collectingForTag != null)
                    {
                        if (tag.NameEquals(HTMLTokens.A))
                            _collectingForTagDepth++;
                    }
                    else
                        _collectingForTag = currentTag;
                }

            }
            base.OnBeginTag(tag);
        }
        /// <summary>
        /// Retrieves the next element from the stream, or null
        /// if the end of the stream has been reached.
        /// </summary>
        private Element Next(bool allowPeekElement)
        {
            if (allowPeekElement && peekElements.Count > 0)
            {
                Element peekElement = peekElements[0];
                peekElements.RemoveAt(0);
                return(peekElement);
            }

            if (elementStack.Count != 0)
            {
                return(elementStack.Pop());
            }

            int dataLen = data.Length;

            if (dataLen == pos)
            {
                // If we're at EOF, return

                return(null);
            }

            // None of the special cases are true.  Start consuming characters

            int tokenStart = pos;

            while (true)
            {
                // Consume everything until a tag-looking thing
                while (pos < dataLen && data[pos] != '<')
                {
                    pos++;
                }

                if (pos >= dataLen)
                {
                    // EOF has been reached.
                    if (tokenStart != pos)
                    {
                        return(new Text(data, tokenStart, pos - tokenStart));
                    }
                    else
                    {
                        return(null);
                    }
                }

                // We started parsing right on a tag-looking thing.  Try
                // parsing it as such.  If it doesn't turn out to be a tag,
                // we'll return it as text

                int oldPos = pos;

                Element element;
                EndTag  trailingEnd;
                int     len = ParseMarkup(out element, out trailingEnd);
                if (len >= 0)
                {
                    pos += len;

                    if (trailingEnd != null)
                    {
                        // empty-element tag detected, add implicit end tag
                        elementStack.Push(trailingEnd);
                    }
                    else if (element is BeginTag)
                    {
                        // look for <script> or <style> body

                        Regex consumeTextUntil = null;

                        BeginTag tag = (BeginTag)element;
                        if (tag.NameEquals("script"))
                        {
                            consumeTextUntil = endScript;
                        }
                        else if (tag.NameEquals("style"))
                        {
                            consumeTextUntil = endStyle;
                        }

                        if (consumeTextUntil != null)
                        {
                            int structuredTextLen = ConsumeStructuredText(data, pos, consumeTextUntil);
                            pos += structuredTextLen;
                        }
                    }

                    elementStack.Push(element);
                    if (oldPos != tokenStart)
                    {
                        elementStack.Push(new Text(data, tokenStart, oldPos - tokenStart));
                    }

                    return(elementStack.Pop());
                }
                else
                {
                    // '<' didn't begin a tag after all;
                    // consume it and continue
                    pos++;
                    continue;
                }
            }
        }
 private bool IsIllegalTag(BeginTag tag)
 {
     if (IsRegexMatch(IllegalTagName, tag.Name))
     {
         return true;
     }
     else if (FlagIsSet(Flag.RemoveStyles) && tag.NameEquals("link"))
     {
         //if this link element is a stylesheet, it is illegal
         Attr relAttr = tag.GetAttribute("rel");
         if (relAttr != null && relAttr.Value != null && relAttr.Value.ToUpperInvariant().Trim() == "STYLESHEET")
         {
             return true;
         }
     }
     return false;
 }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (FlagIsSet(Flag.RemovePartialTags) && tag.Unterminated)
            {
                return;
            }

            //remove all illegal attributes from the tag
            foreach (Attr attr in tag.Attributes)
            {
                if (IsIllegalAttribute(attr))
                    attr.Value = string.Empty;
            }

            if (tag.NameEquals("script"))
                Debug.WriteLine("Script tag");
            if (IsRegexMatch(IllegalTagTreeName, tag.Name))
            {
                suspendTagDepth++;
            }
            else if (!IsIllegalTag(tag) && suspendTagDepth == 0)
            {
                PushStartTag(tag.Name);
                base.OnBeginTag(tag);
            }
        }
 private bool TagPermittedAboveBody(BeginTag tag)
 {
     foreach (string permittedAboveBody in _permittedBeforeBody)
         if (tag.NameEquals(permittedAboveBody))
             return true;
     return false;
 }
        protected override void OnBeginTag(BeginTag tag)
        {
            if (tag == null)
                return;

            if (_firstTag)
            {
                if (!tag.NameEquals(HTMLTokens.Html))
                    EmitTag(HTMLTokens.Html);
                _firstTag = false;
            }

            if (!_seenHead && !TagPermittedAboveBody(tag))
            {
                Emit("<head>");
                EmitAdditionalMetaData();
                Emit("</head>");
                _seenHead = true;
            }

            if (tag.NameEquals(HTMLTokens.Script))
            {
                if (!tag.Complete)
                    _scriptDepth++;
                return;
            }

            if (tag.NameEquals(HTMLTokens.Head))
                _seenHead = true;
            else if (!_seenBody && !tag.NameEquals(HTMLTokens.Body))
            {
                if (!TagPermittedAboveBody(tag))
                {
                    EmitTag(HTMLTokens.Body);
                    _seenBody = true;
                }
            }
            else if (!_seenBody && tag.NameEquals(HTMLTokens.Body))
                _seenBody = true;

            if (tag.NameEquals(HTMLTokens.Base))
            {
                if (_metaData == null || _metaData.Base == null)
                    return;
                else
                {
                    Attr href = tag.GetAttribute(HTMLTokens.Href);
                    if (href != null)
                        href.Value = _metaData.Base;
                }
                _emittedMetaData.Add(HTMLTokens.Base);
            }

            if (tag.NameEquals(HTMLTokens.Meta))
                ModifyMetaDataAsNecessary(tag);

            foreach (Attr attr in tag.Attributes)
                if (attr != null)
                {
                    if (IsScriptAttribute(attr))
                        tag.RemoveAttribute(attr.Name);
                    else
                        attr.Value = ReplaceValue(attr.Value);
                }

            Emit(tag.ToString());
            base.OnBeginTag(tag);
        }