static CharsetUtils() { int gb2312; #if NETSTANDARD Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); #endif try { Latin1 = Encoding.GetEncoding(28591, new EncoderExceptionFallback(), new DecoderExceptionFallback()); } catch (NotSupportedException) { // Note: Some ASP.NET web hosts such as GoDaddy's Windows environment do not have // iso-8859-1 support, they only have the built-in text encodings, so we need to // hack around it by using an alternative encoding. // Try to use Windows-1252 if it is available... Latin1 = Encoding.GetEncoding(1252, new EncoderExceptionFallback(), new DecoderExceptionFallback()); } // Note: Encoding.UTF8.GetString() replaces invalid bytes with a unicode '?' character, // so we use our own UTF8 instance when using GetString() if we do not want it to do that. UTF8 = Encoding.GetEncoding(65001, new EncoderExceptionFallback(), new DecoderExceptionFallback()); aliases = new Dictionary <string, int> (MimeUtils.OrdinalIgnoreCase); AddAliases(aliases, 65001, -1, "utf-8", "utf8"); // ANSI_X3.4-1968 is used on some systems and should be // treated the same as US-ASCII. AddAliases(aliases, 20127, -1, "ansi_x3.4-1968"); // ANSI_X3.110-1983 is another odd-ball charset that appears // every once in a while and seems closest to ISO-8859-1. AddAliases(aliases, 28591, -1, "ansi_x3.110-1983", "latin1"); // Macintosh aliases AddAliases(aliases, 10000, -1, "macintosh"); AddAliases(aliases, 10079, -1, "x-mac-icelandic"); // Korean charsets (aliases for euc-kr) // 'upgrade' ks_c_5601-1987 to euc-kr since it is a superset AddAliases(aliases, 51949, -1, "ks_c_5601-1987", "ksc-5601-1987", "ksc-5601_1987", "ksc-5601", "5601", "ks_c_5861-1992", "ksc-5861-1992", "ksc-5861_1992", "euckr-0", "euc-kr"); // Chinese charsets (aliases for big5) AddAliases(aliases, 950, -1, "big5", "big5-0", "big5-hkscs", "big5.eten-0", "big5hkscs-0"); // Chinese charsets (aliases for gb2312) gb2312 = AddAliases(aliases, 936, -1, "gb2312", "gb-2312", "gb2312-0", "gb2312-80", "gb2312.1980-0"); // Chinese charsets (euc-cn and gbk not supported on Mono) // https://bugzilla.mozilla.org/show_bug.cgi?id=844082 seems to suggest falling back to gb2312. AddAliases(aliases, 51936, gb2312, "euc-cn", "gbk-0", "x-gbk", "gbk"); // Chinese charsets (hz-gb-2312 not suported on Mono) AddAliases(aliases, 52936, gb2312, "hz-gb-2312", "hz-gb2312"); // Chinese charsets (aliases for gb18030) AddAliases(aliases, 54936, -1, "gb18030-0", "gb18030"); // Japanese charsets (aliases for euc-jp) AddAliases(aliases, 51932, -1, "eucjp-0", "euc-jp", "ujis-0", "ujis"); // Japanese charsets (aliases for Shift_JIS) AddAliases(aliases, 932, -1, "shift_jis", "jisx0208.1983-0", "jisx0212.1990-0", "pck"); // Note from http://msdn.microsoft.com/en-us/library/system.text.encoding.getencodings.aspx // Encodings 50220 and 50222 are both associated with the name "iso-2022-jp", but they // are not identical. Encoding 50220 converts half-width Katakana characters to // full-width Katakana characters, whereas encoding 50222 uses a shift-in/shift-out // sequence to encode half-width Katakana characters. The display name for encoding // 50222 is "Japanese (JIS-Allow 1 byte Kana - SO/SI)" to distinguish it from encoding // 50220, which has the display name "Japanese (JIS)". // // If your application requests the encoding name "iso-2022-jp", the .NET Framework // returns encoding 50220. However, the encoding that is appropriate for your application // will depend on the preferred treatment of the half-width Katakana characters. AddAliases(aliases, 50220, -1, "iso-2022-jp"); }