Esempio n. 1
0
        public void ShouldFallBackToDefaultCodePage()
        {
            var filePath = GetFixtureResouce("Samples", "ANSI.txt");

            Encoding encoding;

            FileSystem.ReadFile(filePath, out encoding);
            Assert.AreEqual(CrossPlatform.GetDefaultEncoding(), encoding);
        }
Esempio n. 2
0
        public void RetainANSI()
        {
            var filePath  = GetFixtureResouce("Samples", "ANSI.txt");
            var variables = new VariableDictionary();

            variables["LocalCacheFolderName"] = "SpongeBob";

            var result = PerformTest(filePath, variables);

            Encoding encoding;

            FileSystem.ReadFile(filePath, out encoding);
            Assert.AreEqual(CrossPlatform.GetDefaultEncoding(), encoding);
            Assert.AreEqual(CrossPlatform.GetDefaultEncoding(), result.Encoding);
        }
Esempio n. 3
0
 public static string ConvertServiceMessageValue(string value)
 {
     return(Convert.ToBase64String(CrossPlatform.GetDefaultEncoding().GetBytes(value)));
 }
        //Read a file and detect different encodings. Based on answer from http://stackoverflow.com/questions/1025332/determine-a-strings-encoding-in-c-sharp
        //but don't try to handle UTF16 without BOM or non-default ANSI codepage.
        public string ReadFile(string filename, out Encoding encoding)
        {
            var b = File.ReadAllBytes(filename);

            // BOM/signature exists (sourced from http://www.unicode.org/faq/utf_bom.html#bom4)
            if (b.Length >= 4 && b[0] == 0x00 && b[1] == 0x00 && b[2] == 0xFE && b[3] == 0xFF)
            {
                encoding = Encoding.GetEncoding("utf-32BE"); return(Encoding.GetEncoding("utf-32BE").GetString(b, 4, b.Length - 4));
            }                                                                                                                                                                                                           // UTF-32, big-endian
            else if (b.Length >= 4 && b[0] == 0xFF && b[1] == 0xFE && b[2] == 0x00 && b[3] == 0x00)
            {
                encoding = Encoding.UTF32; return(Encoding.UTF32.GetString(b, 4, b.Length - 4));
            }                                                                                                                                                                              // UTF-32, little-endian
            else if (b.Length >= 2 && b[0] == 0xFE && b[1] == 0xFF)
            {
                encoding = Encoding.BigEndianUnicode; return(Encoding.BigEndianUnicode.GetString(b, 2, b.Length - 2));
            }                                                                                                                                                                     // UTF-16, big-endian
            else if (b.Length >= 2 && b[0] == 0xFF && b[1] == 0xFE)
            {
                encoding = Encoding.Unicode; return(Encoding.Unicode.GetString(b, 2, b.Length - 2));
            }                                                                                                                                                          // UTF-16, little-endian
            else if (b.Length >= 3 && b[0] == 0xEF && b[1] == 0xBB && b[2] == 0xBF)
            {
                encoding = Encoding.UTF8; return(Encoding.UTF8.GetString(b, 3, b.Length - 3));
            }                                                                                                                                                          // UTF-8
            else if (b.Length >= 3 && b[0] == 0x2b && b[1] == 0x2f && b[2] == 0x76)
            {
                encoding = Encoding.UTF7; return(Encoding.UTF7.GetString(b, 3, b.Length - 3));
            }                                                                                                                                                          // UTF-7

            // Some text files are encoded in UTF8, but have no BOM/signature. Hence
            // the below manually checks for a UTF8 pattern. This code is based off
            // the top answer at: http://stackoverflow.com/questions/6555015/check-for-invalid-utf8
            var i     = 0;
            var utf8  = false;
            var ascii = true;

            while (i < b.Length - 4)
            {
                if (b[i] <= 0x7F)
                {
                    i += 1; continue;
                }                                           // If all characters are below 0x80, then it is valid UTF8, but UTF8 is not 'required'
                if (b[i] >= 0xC2 && b[i] <= 0xDF && b[i + 1] >= 0x80 && b[i + 1] < 0xC0)
                {
                    i += 2; utf8 = true; ascii = false; continue;
                }
                if (b[i] >= 0xE0 && b[i] <= 0xF0 && b[i + 1] >= 0x80 && b[i + 1] < 0xC0 && b[i + 2] >= 0x80 && b[i + 2] < 0xC0)
                {
                    i += 3; utf8 = true; ascii = false; continue;
                }
                if (b[i] >= 0xF0 && b[i] <= 0xF4 && b[i + 1] >= 0x80 && b[i + 1] < 0xC0 && b[i + 2] >= 0x80 && b[i + 2] < 0xC0 && b[i + 3] >= 0x80 && b[i + 3] < 0xC0)
                {
                    i += 4; utf8 = true; ascii = false; continue;
                }
                ascii = false; utf8 = false; break;
            }
            if (ascii)
            {
                encoding = Encoding.ASCII;
                return(Encoding.ASCII.GetString(b));
            }
            if (utf8)
            {
                encoding = new UTF8Encoding(encoderShouldEmitUTF8Identifier: false); //UTF8 with no BOM
                return(Encoding.UTF8.GetString(b));
            }
            // If all else fails, the encoding is probably (though certainly not definitely) the user's local codepage!
            // this probably something like Windows 1252 on Windows, but is Encoding.Default is UTF8 on Linux so this probably isn't right in Linux.
            encoding = CrossPlatform.GetDefaultEncoding();

            return(encoding.GetString(b));
        }