private static string DetectCharacterSetFromContent(byte[] bytes, string httpContentType) { string text = UnknownEncodingToString(bytes, 4096); HTMLParser htmlParser = new HTMLParser(text); string mimeType = GetMIMETypeFromContentType(httpContentType) ?? String.Empty; string charSet; if (mimeType.Equals("application/xhtml+xml", StringComparison.OrdinalIgnoreCase) || mimeType.Equals("application/xml", StringComparison.OrdinalIgnoreCase) || mimeType.Equals("text/xml", StringComparison.OrdinalIgnoreCase)) { if (text.StartsWith("<?xml", StringComparison.OrdinalIgnoreCase)) { // XML declaration HTMLParser xmlParser = new HTMLParser("<" + text.Substring(2)); HTMLTag xmlTag = xmlParser.Tags.Count >= 1 ? xmlParser.Tags[0] : null; if (xmlTag != null && xmlTag.NameEquals("xml") && xmlTag.Offset == 0) { charSet = xmlTag.GetAttributeValue("encoding"); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } } } // Default return("UTF-8"); } foreach (HTMLTag tag in htmlParser.FindStartTags("meta")) { // charset attribute charSet = tag.GetAttributeValue("charset"); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } // http-equiv and content attributes if (tag.GetAttributeValueOrEmpty("http-equiv").Trim().Equals("Content-Type", StringComparison.OrdinalIgnoreCase)) { charSet = GetCharSetFromContentType(tag.GetAttributeValue("content")); if (!String.IsNullOrEmpty(charSet)) { return(charSet); } } } return(null); }
public static bool ClassAttributeValueHas(HTMLTag tag, string targetClassName) { string attributeValue = tag.GetAttributeValue("class"); return(attributeValue != null && ClassAttributeValueHas(attributeValue, targetClassName)); }
public static bool ClassAttributeValueHas(HTMLTag tag, string targetClassName) { string attributeValue = tag.GetAttributeValue("class"); return attributeValue != null && ClassAttributeValueHas(attributeValue, targetClassName); }