Пример #1
0
        public HTMLTag FindCorrespondingEndTag(HTMLTag tag, HTMLTag stopBeforeTag)
        {
            if (tag == null)
            {
                return(null);
            }
            if (tag.IsEnd)
            {
                throw new ArgumentException("Tag must be a start tag.");
            }
            if (tag.IsSelfClosing)
            {
                return(tag);
            }
            int startIndex = GetTagIndex(tag) + 1;
            int stopIndex  = stopBeforeTag != null ? (GetTagIndex(stopBeforeTag) - 1) : (_tags.Count - 1);
            int depth      = 1;

            for (int i = startIndex; i <= stopIndex; i++)
            {
                HTMLTag tag2 = _tags[i];
                if (!tag2.IsSelfClosing && tag2.NameEquals(tag.Name))
                {
                    depth += tag2.IsEnd ? -1 : 1;
                    if (depth == 0)
                    {
                        return(tag2);
                    }
                }
            }
            return(null);
        }
Пример #2
0
        private static string DetectCharacterSetFromContent(byte[] bytes, string httpContentType)
        {
            string     text       = UnknownEncodingToString(bytes, 4096);
            HTMLParser htmlParser = new HTMLParser(text);
            string     mimeType   = GetMIMETypeFromContentType(httpContentType) ?? String.Empty;
            string     charSet;

            if (mimeType.Equals("application/xhtml+xml", StringComparison.OrdinalIgnoreCase) ||
                mimeType.Equals("application/xml", StringComparison.OrdinalIgnoreCase) ||
                mimeType.Equals("text/xml", StringComparison.OrdinalIgnoreCase))
            {
                if (text.StartsWith("<?xml", StringComparison.OrdinalIgnoreCase))
                {
                    // XML declaration
                    HTMLParser xmlParser = new HTMLParser("<" + text.Substring(2));
                    HTMLTag    xmlTag    = xmlParser.Tags.Count >= 1 ? xmlParser.Tags[0] : null;
                    if (xmlTag != null && xmlTag.NameEquals("xml") && xmlTag.Offset == 0)
                    {
                        charSet = xmlTag.GetAttributeValue("encoding");
                        if (!String.IsNullOrEmpty(charSet))
                        {
                            return(charSet);
                        }
                    }
                }

                // Default
                return("UTF-8");
            }

            foreach (HTMLTag tag in htmlParser.FindStartTags("meta"))
            {
                // charset attribute
                charSet = tag.GetAttributeValue("charset");
                if (!String.IsNullOrEmpty(charSet))
                {
                    return(charSet);
                }

                // http-equiv and content attributes
                if (tag.GetAttributeValueOrEmpty("http-equiv").Trim().Equals("Content-Type", StringComparison.OrdinalIgnoreCase))
                {
                    charSet = GetCharSetFromContentType(tag.GetAttributeValue("content"));
                    if (!String.IsNullOrEmpty(charSet))
                    {
                        return(charSet);
                    }
                }
            }

            return(null);
        }