Esempio n. 1
0
 public static TextFile Load(string fileName, AnsiCodePageGetter ansiCodePageGetter)
 {
     if (string.IsNullOrEmpty(fileName))
         throw new ArgumentNullException("fileName");
     if (!File.Exists(fileName))
         throw new FileNotFoundException("File not found.", fileName);
     byte[] allBytes;
     using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
     {
         if (fs.Length == 0)
             allBytes = new byte[] { };
         else
         {
             allBytes = new byte[fs.Length];
             int nread = fs.Read(allBytes, 0, allBytes.Length);
             if (nread != allBytes.Length)
                 throw new Exception();
         }
     }
     int codePage;
     int textStart;
     if (allBytes.Length == 0)
     {
         codePage = (int)SpecialCodePage.Utf8;
         textStart = 0;
     }
     else
     {
         KeyValuePair<SpecialCodePage, CodePageInfoAttribute>? codePageGot = null;
         foreach (KeyValuePair<SpecialCodePage, CodePageInfoAttribute> kv in TextFile.SpecialCodePageInfo)
         {
             bool maybe;
             if (kv.Value.Bom.Length > 0)
             {
                 if (allBytes.Length >= kv.Value.Bom.Length)
                 {
                     byte[] bytesBom = new byte[kv.Value.Bom.Length];
                     Array.Copy(allBytes, bytesBom, bytesBom.Length);
                     maybe = Array.Equals(bytesBom, kv.Value.Bom);
                 }
                 else
                 {
                     maybe = false;
                 }
             }
             else
                 maybe = true;
             if (maybe && ((codePageGot == null) || (codePageGot.Value.Value.Bom.Length < kv.Value.Bom.Length)))
                 codePageGot = kv;
         }
         if (codePageGot != null)
         {
             codePage = (int)codePageGot.Value.Key;
             textStart = codePageGot.Value.Value.Bom.Length;
         }
         else
         {
             textStart = 0;
             List<int> ansiPositions = new List<int>();
             for (int i = 0; i < allBytes.Length; i++)
             {
                 byte cur = allBytes[i];
                 if (cur <= 0x80)
                     continue;
                 if (i == (allBytes.Length - 1))
                 {
                     ansiPositions.Add(i);
                     continue;
                 }
                 byte nxt1 = allBytes[i + 1];
                 if ((cur >= 0xC0) && (cur <= 0xDF) && (nxt1 >= 0x80) && (nxt1 <= 0xBF))
                 {
                     i += 1; //ok: 2-byte char
                 }
                 else
                 {
                     if (i == (allBytes.Length - 2))
                     {
                         ansiPositions.Add(i);
                         continue;
                     }
                     byte nxt2 = allBytes[i + 2];
                     if ((cur >= 0xE0) && (cur <= 0xEF) && (nxt1 >= 0x80) && (nxt1 <= 0xBF) && (nxt2 >= 0x80) && (nxt2 <= 0xBF))
                     {
                         i += 2; //ok: 3-byte char
                     }
                     else
                     {
                         if (i == (allBytes.Length - 3))
                         {
                             ansiPositions.Add(i);
                             continue;
                         }
                         byte nxt3 = allBytes[i + 3];
                         if ((cur >= 0xF0) && (cur <= 0xF7) && (nxt1 >= 0x80) && (nxt1 <= 0xBF) && (nxt2 >= 0x80) && (nxt2 <= 0xBF) && (nxt3 >= 0x80) && (nxt3 <= 0xBF))
                         {
                             i += 3; //ok: 4-byte char
                         }
                         else
                         {
                             ansiPositions.Add(i);
                             continue;
                         }
                     }
                 }
             }
             if (ansiPositions.Count == 0)
             {
                 codePage = (int)SpecialCodePage.Utf8;
             }
             else
             {
                 int defaultCodePage = System.Text.Encoding.Default.CodePage;
                 if (!TextFile.AnsiCodePages.ContainsKey(defaultCodePage))
                     foreach (KeyValuePair<int, string> kv in TextFile.AnsiCodePages)
                     {
                         defaultCodePage = kv.Key;
                         break;
                     }
                 if (ansiCodePageGetter == null)
                 {
                     codePage = defaultCodePage;
                 }
                 else
                 {
                     int? cp = ansiCodePageGetter(TextFile.AnsiCodePages, defaultCodePage, allBytes, ansiPositions.ToArray());
                     if (cp.HasValue)
                         codePage = cp.Value;
                     else
                         throw new OperationCanceledException();
                 }
             }
         }
     }
     string text;
     if ((allBytes.Length - textStart) == 0)
         text = "";
     else
         text = Encoding.GetEncoding((int)codePage).GetString(allBytes, textStart, allBytes.Length - textStart);
     string newLineSequence = null;
     for (int i = 0; (newLineSequence == null) && (i < text.Length); i++)
     {
         switch (text[i])
         {
             case '\r':
                 if ((i < text.Length - 1) && (text[i + 1] == '\n'))
                     newLineSequence = "\r\n";
                 else
                     newLineSequence = "\r";
                 break;
             case '\n':
                 newLineSequence = "\n";
                 break;
         }
     }
     if (newLineSequence == null)
         newLineSequence = Environment.NewLine;
     IO.LineEndings lineEnding;
     switch (newLineSequence)
     {
         case "\r\n":
             lineEnding = IO.LineEndings.Windows;
             break;
         case "\n":
             lineEnding = IO.LineEndings.Linux;
             break;
         case "\r":
             lineEnding = IO.LineEndings.Mac;
             break;
         default:
             throw new Exception("Unknown line separator.");
     }
     return new TextFile(fileName, codePage, textStart > 0, (newLineSequence.Length == 1) ? text.Split(newLineSequence[0]) : text.Split(new string[] { newLineSequence }, StringSplitOptions.None), lineEnding);
 }
Esempio n. 2
0
        public static TextFile Load(string fileName, AnsiCodePageGetter ansiCodePageGetter)
        {
            if (string.IsNullOrEmpty(fileName))
            {
                throw new ArgumentNullException("fileName");
            }
            if (!File.Exists(fileName))
            {
                throw new FileNotFoundException("File not found.", fileName);
            }
            byte[] allBytes;
            using (FileStream fs = new FileStream(fileName, FileMode.Open, FileAccess.Read, FileShare.Read))
            {
                if (fs.Length == 0)
                {
                    allBytes = new byte[] { }
                }
                ;
                else
                {
                    allBytes = new byte[fs.Length];
                    int nread = fs.Read(allBytes, 0, allBytes.Length);
                    if (nread != allBytes.Length)
                    {
                        throw new Exception();
                    }
                }
            }
            int codePage;
            int textStart;

            if (allBytes.Length == 0)
            {
                codePage  = (int)SpecialCodePage.Utf8;
                textStart = 0;
            }
            else
            {
                KeyValuePair <SpecialCodePage, CodePageInfoAttribute>?codePageGot = null;
                foreach (KeyValuePair <SpecialCodePage, CodePageInfoAttribute> kv in TextFile.SpecialCodePageInfo)
                {
                    bool maybe;
                    if (kv.Value.Bom.Length > 0)
                    {
                        if (allBytes.Length >= kv.Value.Bom.Length)
                        {
                            byte[] bytesBom = new byte[kv.Value.Bom.Length];
                            Array.Copy(allBytes, bytesBom, bytesBom.Length);
                            maybe = Array.Equals(bytesBom, kv.Value.Bom);
                        }
                        else
                        {
                            maybe = false;
                        }
                    }
                    else
                    {
                        maybe = true;
                    }
                    if (maybe && ((codePageGot == null) || (codePageGot.Value.Value.Bom.Length < kv.Value.Bom.Length)))
                    {
                        codePageGot = kv;
                    }
                }
                if (codePageGot != null)
                {
                    codePage  = (int)codePageGot.Value.Key;
                    textStart = codePageGot.Value.Value.Bom.Length;
                }
                else
                {
                    textStart = 0;
                    List <int> ansiPositions = new List <int>();
                    for (int i = 0; i < allBytes.Length; i++)
                    {
                        byte cur = allBytes[i];
                        if (cur <= 0x80)
                        {
                            continue;
                        }
                        if (i == (allBytes.Length - 1))
                        {
                            ansiPositions.Add(i);
                            continue;
                        }
                        byte nxt1 = allBytes[i + 1];
                        if ((cur >= 0xC0) && (cur <= 0xDF) && (nxt1 >= 0x80) && (nxt1 <= 0xBF))
                        {
                            i += 1;                             //ok: 2-byte char
                        }
                        else
                        {
                            if (i == (allBytes.Length - 2))
                            {
                                ansiPositions.Add(i);
                                continue;
                            }
                            byte nxt2 = allBytes[i + 2];
                            if ((cur >= 0xE0) && (cur <= 0xEF) && (nxt1 >= 0x80) && (nxt1 <= 0xBF) && (nxt2 >= 0x80) && (nxt2 <= 0xBF))
                            {
                                i += 2;                                 //ok: 3-byte char
                            }
                            else
                            {
                                if (i == (allBytes.Length - 3))
                                {
                                    ansiPositions.Add(i);
                                    continue;
                                }
                                byte nxt3 = allBytes[i + 3];
                                if ((cur >= 0xF0) && (cur <= 0xF7) && (nxt1 >= 0x80) && (nxt1 <= 0xBF) && (nxt2 >= 0x80) && (nxt2 <= 0xBF) && (nxt3 >= 0x80) && (nxt3 <= 0xBF))
                                {
                                    i += 3;                                     //ok: 4-byte char
                                }
                                else
                                {
                                    ansiPositions.Add(i);
                                    continue;
                                }
                            }
                        }
                    }
                    if (ansiPositions.Count == 0)
                    {
                        codePage = (int)SpecialCodePage.Utf8;
                    }
                    else
                    {
                        int defaultCodePage = System.Text.Encoding.Default.CodePage;
                        if (!TextFile.AnsiCodePages.ContainsKey(defaultCodePage))
                        {
                            foreach (KeyValuePair <int, string> kv in TextFile.AnsiCodePages)
                            {
                                defaultCodePage = kv.Key;
                                break;
                            }
                        }
                        if (ansiCodePageGetter == null)
                        {
                            codePage = defaultCodePage;
                        }
                        else
                        {
                            int?cp = ansiCodePageGetter(TextFile.AnsiCodePages, defaultCodePage, allBytes, ansiPositions.ToArray());
                            if (cp.HasValue)
                            {
                                codePage = cp.Value;
                            }
                            else
                            {
                                throw new OperationCanceledException();
                            }
                        }
                    }
                }
            }
            string text;

            if ((allBytes.Length - textStart) == 0)
            {
                text = "";
            }
            else
            {
                text = Encoding.GetEncoding((int)codePage).GetString(allBytes, textStart, allBytes.Length - textStart);
            }
            string newLineSequence = null;

            for (int i = 0; (newLineSequence == null) && (i < text.Length); i++)
            {
                switch (text[i])
                {
                case '\r':
                    if ((i < text.Length - 1) && (text[i + 1] == '\n'))
                    {
                        newLineSequence = "\r\n";
                    }
                    else
                    {
                        newLineSequence = "\r";
                    }
                    break;

                case '\n':
                    newLineSequence = "\n";
                    break;
                }
            }
            if (newLineSequence == null)
            {
                newLineSequence = Environment.NewLine;
            }
            IO.LineEndings lineEnding;
            switch (newLineSequence)
            {
            case "\r\n":
                lineEnding = IO.LineEndings.Windows;
                break;

            case "\n":
                lineEnding = IO.LineEndings.Linux;
                break;

            case "\r":
                lineEnding = IO.LineEndings.Mac;
                break;

            default:
                throw new Exception("Unknown line separator.");
            }
            return(new TextFile(fileName, codePage, textStart > 0, (newLineSequence.Length == 1) ? text.Split(newLineSequence[0]) : text.Split(new string[] { newLineSequence }, StringSplitOptions.None), lineEnding));
        }