Ejemplo n.º 1
0
        public string GetDetectedEncodingName(byte[] bytes, int count, string language, bool enableLanguageDetection)
        {
            var index = 0;

            var encoding = GetInitialEncoding(bytes, count);

            if (encoding != null && encoding.Equals(Encoding.UTF8))
            {
                return("utf-8");
            }

            if (string.IsNullOrWhiteSpace(language) && enableLanguageDetection)
            {
                if (!_langDetectInitialized)
                {
                    _langDetectInitialized = true;
                    LanguageDetector.Initialize(_json);
                }

                language = DetectLanguage(bytes, index, count);

                if (!string.IsNullOrWhiteSpace(language))
                {
                    _logger.LogDebug("Text language detected as {0}", language);
                }
            }

            var charset = DetectCharset(bytes, index, count, language);

            if (!string.IsNullOrWhiteSpace(charset))
            {
                if (string.Equals(charset, "utf-8", StringComparison.OrdinalIgnoreCase))
                {
                    return("utf-8");
                }

                if (!string.Equals(charset, "windows-1252", StringComparison.OrdinalIgnoreCase))
                {
                    return(charset);
                }
            }

            if (!string.IsNullOrWhiteSpace(language))
            {
                return(GetFileCharacterSetFromLanguage(language));
            }

            return(null);
        }