public HTMLTag FindCorrespondingEndTag(HTMLTag tag, HTMLTag stopBeforeTag) { if (tag == null) { return(null); } if (tag.IsEnd) { throw new ArgumentException("Tag must be a start tag."); } if (tag.IsSelfClosing) { return(tag); } int startIndex = GetTagIndex(tag) + 1; int stopIndex = stopBeforeTag != null ? (GetTagIndex(stopBeforeTag) - 1) : (_tags.Count - 1); int depth = 1; for (int i = startIndex; i <= stopIndex; i++) { HTMLTag tag2 = _tags[i]; if (!tag2.IsSelfClosing && tag2.NameEquals(tag.Name)) { depth += tag2.IsEnd ? -1 : 1; if (depth == 0) { return(tag2); } } } return(null); }
private static string DetectCharacterSetFromContent(byte[] bytes, string httpContentType) { string text = UnknownEncodingToString(bytes, 4096); HTMLParser htmlParser = new HTMLParser(text); string mimeType = GetMIMETypeFromContentType(httpContentType) ?? String.Empty; string charSet; if (mimeType.Equals("application/xhtml+xml", StringComparison.OrdinalIgnoreCase) || mimeType.Equals("application/xml", StringComparison.OrdinalIgnoreCase) || mimeType.Equals("text/xml", StringComparison.OrdinalIgnoreCase)) { if (text.StartsWith("<?xml", StringComparison.OrdinalIgnoreCase)) { // XML declaration HTMLParser xmlParser = new HTMLParser("<" + text.Substring(2)); HTMLTag xmlTag = xmlParser.Tags.Count >= 1 ? xmlParser.Tags[0] : null; if (xmlTag != null && xmlTag.NameEquals("xml") && xmlTag.Offset == 0) { charSet = xmlTag.GetAttributeValue("encoding"); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } } } // Default return("UTF-8"); } foreach (HTMLTag tag in htmlParser.FindStartTags("meta")) { // charset attribute charSet = tag.GetAttributeValue("charset"); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } // http-equiv and content attributes if (tag.GetAttributeValueOrEmpty("http-equiv").Trim().Equals("Content-Type", StringComparison.OrdinalIgnoreCase)) { charSet = GetCharSetFromContentType(tag.GetAttributeValue("content")); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } } } return(null); }