Exemple #1
0
        public void FileExtension_Path_ReturnsFileExtension()
        {
            var extension = ".txt";
            var result    = TextFile.FileExtension(path);

            Assert.AreEqual(result, extension);
        }
Exemple #2
0
        public static void RemoveDiacriticsInFile(string path)
        {
            string originalText          = File.OpenText(path).ReadToEnd();
            string textWithoutDiacritics = StringRoutines.MyDiacriticsRemover(originalText);

            File.WriteAllText($"{TextFile.FileName(path)}_WITHOUT-DIACRITICS{TextFile.FileExtension(path)}", textWithoutDiacritics);
        }
Exemple #3
0
        internal static void Test(string path, DiacriticsReconstructor dr, bool writeStatistics = true)
        {
            long bytes = GC.GetTotalMemory(true);

            Console.WriteLine($"Memory (bytes): {bytes}");
            if (writeStatistics)
            {
                statisticsPath = $"{TextFile.FileName(path)}_STATISTICS{TextFile.FileExtension(path)}";
                File.WriteAllText(statisticsPath, $"Memory (bytes): {bytes}\n");
            }

            Console.WriteLine($"Reading {path}");
            string originalText = File.OpenText(path).ReadToEnd();

            Console.WriteLine("Removing diacritics...");
            string textWithoutDiacritics = StringRoutines.MyDiacriticsRemover(originalText);

            File.WriteAllText($"{TextFile.FileName(path)}_WITHOUT-DIACRITICS{TextFile.FileExtension(path)}", textWithoutDiacritics);

            Console.WriteLine("Reconstructing...");
            var    sw = Stopwatch.StartNew();
            string reconstructedText = dr.Reconstruct(textWithoutDiacritics);

            sw.Stop();
            string ngramsStat = dr.GetStatistic();

            Console.Write(ngramsStat);
            if (writeStatistics)
            {
                File.AppendAllText(statisticsPath, ngramsStat);
            }
            dr.EraseStatistic();
            Console.WriteLine($"Elapsed (milliseconds): {sw.Elapsed.TotalMilliseconds}");
            if (writeStatistics)
            {
                File.AppendAllText(statisticsPath, $"Elapsed (milliseconds): {sw.Elapsed.TotalMilliseconds}\n");
            }
            Console.WriteLine("Done.");

            File.WriteAllText($"{TextFile.FileName(path)}_RENCOSTRUCTED{TextFile.FileExtension(path)}", reconstructedText);

            Console.WriteLine("Testing...");
            FindMistakes(originalText, reconstructedText, path, writeStatistics);
            Console.WriteLine("Done.\n");
        }
        private List <string> GetAllPartialBinFiles(string[] binFiles)
        {
            var ret = new List <string>();

            int size = 4;

            foreach (var path in binFiles)
            {
                int stepSize = GetDivisionCountByNumber(size--);
                int i        = 0;

                string finalPath;
                while (File.Exists(finalPath = TextFile.FileName(path) + i + TextFile.FileExtension(path)))
                {
                    ret.Add(finalPath);
                    Console.WriteLine(finalPath);
                    i += stepSize;
                }
            }

            return(ret);
        }
Exemple #5
0
        public static void FindMistakes(string originalText, string reconstructedText, string path, bool writeStatistics)
        {
            string[] originalWords      = originalText.Split(' ', '\n', '\t', '\r').Where(x => x != "").ToArray();
            string[] reconstructedWords = reconstructedText.Split(' ', '\n', '\t', '\r').Where(x => x != "").ToArray();

            Console.WriteLine($"originalWords.Length = {originalWords.Length}");
            Console.WriteLine($"reconstructedWords.Length = {reconstructedWords.Length}");

            if (originalWords.Length != reconstructedWords.Length)
            {
                //throw new Exception("Length of original and reconstructed text are not equal!");
                Console.WriteLine("Length of original and reconstructed text are not equal!");
            }

            int count = 0;

            using (var sw = new StreamWriter($"{TextFile.FileName(path)}_MISTAKES-RECONST-ORIG{TextFile.FileExtension(path)}"))
            {
                //int len = originalWords.Length;
                int len = Math.Min(originalWords.Length, reconstructedWords.Length);
                for (int i = 0; i < len; i++)
                {
                    var originalW    = originalWords[i];
                    var reconstructW = reconstructedWords[i];
                    if (originalW != reconstructW)
                    {
                        sw.WriteLine("{0} {1} {2} {3} {4} {5} {6} - {7} {8} {9} {10} {11} {12} {13}",
                                     i - 3 >= 0 ? reconstructedWords[i - 3] : "",
                                     i - 2 >= 0 ? reconstructedWords[i - 2] : "",
                                     i - 1 >= 0 ? reconstructedWords[i - 1] : "",
                                     reconstructW,
                                     i + 1 < len ? reconstructedWords[i + 1] : "",
                                     i + 2 < len ? reconstructedWords[i + 2] : "",
                                     i + 3 < len ? reconstructedWords[i + 3] : "",

                                     i - 3 >= 0 ? originalWords[i - 3] : "",
                                     i - 2 >= 0 ? originalWords[i - 2] : "",
                                     i - 1 >= 0 ? originalWords[i - 1] : "",
                                     originalW,
                                     i + 1 < len ? originalWords[i + 1] : "",
                                     i + 2 < len ? originalWords[i + 2] : "",
                                     i + 3 < len ? originalWords[i + 3] : "");

                        count++;
                    }
                }
            }

            Console.WriteLine($"Number of mistakes: {count}");
            if (writeStatistics)
            {
                statisticsPath = $"{TextFile.FileName(path)}_STATISTICS{TextFile.FileExtension(path)}";

                File.AppendAllText(statisticsPath, $"originalWords.Length = {originalWords.Length}\n");
                File.AppendAllText(statisticsPath, $"reconstructedWords.Length = {reconstructedWords.Length}\n");
                File.AppendAllText(statisticsPath, $"Number of mistakes: {count}\n");
            }

            countOfAllOrigWords    += originalWords.Length;
            countOfAllReconstWords += reconstructedWords.Length;
            countOfAllMistakes     += count;
        }
Exemple #6
0
 public static void CleanFileFromHiddenChars(string path)
 {
     using (var strmWriter = new StreamWriter($"{TextFile.FileName(path)}_CLEANED{TextFile.FileExtension(path)}"))
         using (var binReader = new BinaryReader(File.Open(path, FileMode.Open)))
         {
             var rgxLatin  = new Regex($"[{latinChars}]");
             var rgxCommon = new Regex("[-+*/=_—–]");
             while (binReader.BaseStream.Position != binReader.BaseStream.Length)
             {
                 byte b = binReader.ReadByte();
                 char c = Convert.ToChar(b);
                 if (rgxLatin.IsMatch(c.ToString().ToLower()) || char.IsDigit(c) ||
                     char.IsPunctuation(c) || char.IsSeparator(c) ||
                     char.IsWhiteSpace(c) /*|| rgxCommon.IsMatch(c.ToString())*/)
                 {
                     strmWriter.Write(c);
                 }
                 else
                 {
                     Console.WriteLine($"<{c}>");
                 }
             }
         }
 }