コード例 #1
0
        static CharsetUtils()
        {
            int gb2312;

#if NETSTANDARD
            Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
#endif

            try {
                Latin1 = Encoding.GetEncoding(28591, new EncoderExceptionFallback(), new DecoderExceptionFallback());
            } catch (NotSupportedException) {
                // Note: Some ASP.NET web hosts such as GoDaddy's Windows environment do not have
                // iso-8859-1 support, they only have the built-in text encodings, so we need to
                // hack around it by using an alternative encoding.

                // Try to use Windows-1252 if it is available...
                Latin1 = Encoding.GetEncoding(1252, new EncoderExceptionFallback(), new DecoderExceptionFallback());
            }

            // Note: Encoding.UTF8.GetString() replaces invalid bytes with a unicode '?' character,
            // so we use our own UTF8 instance when using GetString() if we do not want it to do that.
            UTF8 = Encoding.GetEncoding(65001, new EncoderExceptionFallback(), new DecoderExceptionFallback());

            aliases = new Dictionary <string, int> (MimeUtils.OrdinalIgnoreCase);

            AddAliases(aliases, 65001, -1, "utf-8", "utf8");

            // ANSI_X3.4-1968 is used on some systems and should be
            // treated the same as US-ASCII.
            AddAliases(aliases, 20127, -1, "ansi_x3.4-1968");

            // ANSI_X3.110-1983 is another odd-ball charset that appears
            // every once in a while and seems closest to ISO-8859-1.
            AddAliases(aliases, 28591, -1, "ansi_x3.110-1983", "latin1");

            // Macintosh aliases
            AddAliases(aliases, 10000, -1, "macintosh");
            AddAliases(aliases, 10079, -1, "x-mac-icelandic");

            // Korean charsets (aliases for euc-kr)
            // 'upgrade' ks_c_5601-1987 to euc-kr since it is a superset
            AddAliases(aliases, 51949, -1,
                       "ks_c_5601-1987",
                       "ksc-5601-1987",
                       "ksc-5601_1987",
                       "ksc-5601",
                       "5601",
                       "ks_c_5861-1992",
                       "ksc-5861-1992",
                       "ksc-5861_1992",
                       "euckr-0",
                       "euc-kr");

            // Chinese charsets (aliases for big5)
            AddAliases(aliases, 950, -1, "big5", "big5-0", "big5-hkscs", "big5.eten-0", "big5hkscs-0");

            // Chinese charsets (aliases for gb2312)
            gb2312 = AddAliases(aliases, 936, -1, "gb2312", "gb-2312", "gb2312-0", "gb2312-80", "gb2312.1980-0");

            // Chinese charsets (euc-cn and gbk not supported on Mono)
            // https://bugzilla.mozilla.org/show_bug.cgi?id=844082 seems to suggest falling back to gb2312.
            AddAliases(aliases, 51936, gb2312, "euc-cn", "gbk-0", "x-gbk", "gbk");

            // Chinese charsets (hz-gb-2312 not suported on Mono)
            AddAliases(aliases, 52936, gb2312, "hz-gb-2312", "hz-gb2312");

            // Chinese charsets (aliases for gb18030)
            AddAliases(aliases, 54936, -1, "gb18030-0", "gb18030");

            // Japanese charsets (aliases for euc-jp)
            AddAliases(aliases, 51932, -1, "eucjp-0", "euc-jp", "ujis-0", "ujis");

            // Japanese charsets (aliases for Shift_JIS)
            AddAliases(aliases, 932, -1, "shift_jis", "jisx0208.1983-0", "jisx0212.1990-0", "pck");

            // Note from http://msdn.microsoft.com/en-us/library/system.text.encoding.getencodings.aspx
            // Encodings 50220 and 50222 are both associated with the name "iso-2022-jp", but they
            // are not identical. Encoding 50220 converts half-width Katakana characters to
            // full-width Katakana characters, whereas encoding 50222 uses a shift-in/shift-out
            // sequence to encode half-width Katakana characters. The display name for encoding
            // 50222 is "Japanese (JIS-Allow 1 byte Kana - SO/SI)" to distinguish it from encoding
            // 50220, which has the display name "Japanese (JIS)".
            //
            // If your application requests the encoding name "iso-2022-jp", the .NET Framework
            // returns encoding 50220. However, the encoding that is appropriate for your application
            // will depend on the preferred treatment of the half-width Katakana characters.
            AddAliases(aliases, 50220, -1, "iso-2022-jp");
        }