/// <summary>
        /// This method performs the BurrowsWheelerTransform algorithm on the input SuffixArray.
        /// </summary>
        /// <returns></returns>
        public BurrowsWheelerTransformAsc(SuffixArrayAsc suffixArray)
        {
            textLength = suffixArray.InputText.Length;

              List<string> bwtTemp =
            (suffixArray.Suffixes.Select(
              i => new { i, lenSuffix = i.Length }).Select(
            @t => @t.i + suffixArray.InputText.Substring(0, textLength - @t.lenSuffix)
              )
            ).ToList();

              bwtTemp.Sort();

              List<char> F = new List<char>();
              List<char> L = new List<char>();
              for (int i = 0; i < bwtTemp.Count; i++) {
            F.Add(bwtTemp[i][0]);
            L.Add(bwtTemp[i][bwtTemp[i].Length - 1]);
              }

              waveletTree = new WaveletTreeAsc(new string(L.ToArray()));

              WaveletTreeAsc waveletTreeF = new WaveletTreeAsc(new string(F.ToArray()));
              char[] distinctChars = F.Distinct().ToArray();
              C = new Dictionary<char, int>(distinctChars.Length);
              foreach (char c in distinctChars) {
            C.Add(c, waveletTreeF.Select(c, 1));
              }
        }
Example #2
0
        public void TestAsciiFiles()
        {
            var gen = new Random();
              var files = Directory.EnumerateFiles(DirectoryString + @"\GenFiles\", "*").Where(u => !u.Contains(@"GenFiles\B_"));
              foreach (var file in files) {
            var ob = new StatObj { FileName = file };
            var commandLineInterpreter = new CommandInterpreter() { Encoding = Encoding.ASCII, Path = ob.FileName };
            var fileReader = new AsciiFileReader(commandLineInterpreter);
            var sa = new SuffixArrayAsc(fileReader.Data);
            var bwt = new BurrowsWheelerTransformAsc(sa);
            int bwtWins = 0;
            int bfWins = 0;
            for (var i = 1; i < Math.Min(2, ob.FileSize / 2); i++) {
              ob.PatternSize = i;
              commandLineInterpreter.QueryString = fileReader.Data.Substring(
              (int)(
                  (ob.FileSize - ob.PatternSize) * gen.NextDouble()
                  ), ob.PatternSize
              );
              Console.WriteLine(commandLineInterpreter.QueryString);

              // Settings and timers
              //===============================================================
              int times = 1000;
              Stopwatch sw;
              //===============================================================

              // BWT
              //===============================================================
              StatObj bwtStat = ob;
              bwtStat.EndTimeTicks = 0;
              bwtStat.Results = 0;
              bwtStat.RuntimeTicks = 0;
              bwtStat.StartTimeTicks = 0;

              sw = Stopwatch.StartNew();
              for (int t = 0; t < times; t++) {
            bwtStat.Results = bwt.CountOccurrences(commandLineInterpreter.QueryString);
              }
              sw.Stop();

              bwtStat.RuntimeTicks = sw.ElapsedMilliseconds; //(ulong)sw.ElapsedTicks;
              //===============================================================

              // BF
              //===============================================================
              StatObj bfStat = ob;
              bfStat.EndTimeTicks = 0;
              bfStat.Results = 0;
              bfStat.RuntimeTicks = 0;
              bfStat.StartTimeTicks = 0;

              sw = Stopwatch.StartNew();
              for (int t = 0; t < times; t++) {
            bfStat.Results = BruteForce.Run(fileReader.Data, commandLineInterpreter.QueryString);
              }
              sw.Stop();

              bfStat.RuntimeTicks = sw.ElapsedMilliseconds; //(ulong)sw.ElapsedTicks;
              //===============================================================

              // Print Results
              //===============================================================
              //Console.WriteLine("BWT,{0},{1},{2},{3},{4},{5}", ob.RuntimeTicks, ob.PatternSize, ob.FileSize, ob.AlphabetSize, ob.Results, Encoding.ASCII);
              //Console.WriteLine("BruteForce,{0},{1},{2},{3},{4},{5}", ob.RuntimeTicks, ob.PatternSize, ob.FileSize, ob.AlphabetSize, ob.Results, Encoding.ASCII);

              //Console.WriteLine("BWT: {0}", bwtStat.RuntimeTicks);
              //Console.WriteLine(" BF: {0}", bfStat.RuntimeTicks);

              if (bwtStat.Results != bfStat.Results) {
            Console.WriteLine("\nBWT FAILED US!\n");
              }

              if (bfStat.RuntimeTicks >= bwtStat.RuntimeTicks)
            bwtWins++;
              else
            bfWins++;
              //===============================================================
            }
            Console.WriteLine(ob.FileName + "\nBWT Wins: " + bwtWins + "\nBF Wins: " + bfWins + "\n");
              }
        }
Example #3
0
        public void TestBinaryFiles()
        {
            var gen = new Random();
              var files = Directory.EnumerateFiles(DirectoryString + @"\GenFiles\", "B_*");
              foreach (var file in files) {
            var ob = new StatObj { FileName = file };
            var commandLineInterpreter = new CommandInterpreter() { Encoding = Encoding.UTF8, Path = ob.FileName };
            var fileReader = new AsciiFileReader(commandLineInterpreter);
            var sa = new SuffixArrayAsc(fileReader.Data);
            var bwt = new BurrowsWheelerTransformAsc(sa);

            for (var i = 1; i < Math.Min(20, ob.FileSize / 2); i++) {
              ob.PatternSize = i;
              commandLineInterpreter.QueryString = fileReader.Data.Substring(
              (int)(
                  (ob.FileSize - ob.PatternSize) * gen.NextDouble()
                  ), ob.PatternSize
              );
              ob.StartTimeTicks = (ulong)DateTime.Now.Ticks;
              /* try
               {*/
              ob.Results = bwt.CountOccurrences(commandLineInterpreter.QueryString);
              /*}
              catch
              {
              }*/
              ob.EndTimeTicks = (ulong)DateTime.Now.Ticks;
              Console.WriteLine("BWT,{0},{1},{2},{3},{4},{5}", ob.RuntimeTicks, ob.PatternSize, ob.FileSize, ob.AlphabetSize, ob.Results, Encoding.UTF8);
              ob.StartTimeTicks = (ulong)DateTime.Now.Ticks; /*try
                    {*/
              ob.Results = BruteForce.Run(fileReader.Data, commandLineInterpreter.QueryString);
              /*}
              catch
              {
              }*/
              ob.EndTimeTicks = (ulong)DateTime.Now.Ticks;
              Console.WriteLine("BruteForce,{0},{1},{2},{3},{4},{5}", ob.RuntimeTicks, ob.PatternSize, ob.FileSize, ob.AlphabetSize, ob.Results, Encoding.UTF8);
            }
              }
        }
Example #4
0
        public static void RunAsciiTestFiles()
        {
            const string DirectoryString = @"..\..";
              var gen = new Random();
              var files = Directory.EnumerateFiles(DirectoryString + @"\GenFiles\", "*").Where(u => !u.Contains(@"GenFiles\B_"));
              StringBuilder output = new StringBuilder();
              int fileNum = 0;
              foreach (var file in files) {
            var ob = new StatObj { FileName = file };
            var commandLineInterpreter = new CommandInterpreter() { Encoding = Encoding.ASCII, Path = ob.FileName };
            var fileReader = new AsciiFileReader(commandLineInterpreter);
            var sa = new SuffixArrayAsc(fileReader.Data);
            var bwt = new BurrowsWheelerTransformAsc(sa);
            for (var i = 1; i < Math.Min(20, ob.FileSize / 2); i++) {
              ob.PatternSize = i;
              commandLineInterpreter.QueryString = fileReader.Data.Substring(
              (int)(
                  (ob.FileSize - ob.PatternSize) * gen.NextDouble()
                  ), ob.PatternSize
              );

              // Settings and timers
              //===============================================================
              int times = 1000;
              Stopwatch sw;
              //===============================================================

              // BWT
              //===============================================================
              StatObj bwtStat = ob;
              bwtStat.EndTimeTicks = 0;
              bwtStat.Results = 0;
              bwtStat.RuntimeTicks = 0;
              bwtStat.StartTimeTicks = 0;

              sw = Stopwatch.StartNew();
              for (int t = 0; t < times; t++) {
            bwtStat.Results = bwt.CountOccurrences(commandLineInterpreter.QueryString);
              }
              sw.Stop();

              bwtStat.RuntimeTicks = sw.ElapsedMilliseconds;
              //===============================================================

              // BF
              //===============================================================
              StatObj bfStat = ob;
              bfStat.EndTimeTicks = 0;
              bfStat.Results = 0;
              bfStat.RuntimeTicks = 0;
              bfStat.StartTimeTicks = 0;

              sw = Stopwatch.StartNew();
              for (int t = 0; t < times; t++) {
            bfStat.Results = BruteForce.Run(fileReader.Data, commandLineInterpreter.QueryString);
              }
              sw.Stop();

              bfStat.RuntimeTicks = sw.ElapsedMilliseconds;
              //===============================================================

              // Print Results
              //===============================================================
              int m = ob.PatternSize;
              int n = ob.FileSize;
              int ep = ob.AlphabetSize;

              output.AppendLine(String.Format("BWT,{0},{1},{2},{3},{4},{5}", bwtStat.RuntimeTicks, m, n, ep, bwtStat.Results, Encoding.ASCII));
              output.AppendLine(String.Format("BruteForce,{0},{1},{2},{3},{4},{5}", bfStat.RuntimeTicks, m, n, ep, bfStat.Results, Encoding.ASCII));
              //===============================================================
            }
            fileNum++;
            Console.WriteLine(((((double)fileNum) / files.ToArray<string>().Length) * 100).ToString() + " %");
              }
              using (StreamWriter writer = new StreamWriter(DirectoryString + @"\GenFiles\AsciiResults.csv")) {
            writer.Write(output.ToString());
              }
        }
Example #5
0
        public static void RunAsciiTestOnce(string text, string pattern, int times = 10000)
        {
            Console.WriteLine("T: " + text);
              Console.WriteLine("P: " + pattern);
              Console.WriteLine();

              SuffixArrayAsc sa = new SuffixArrayAsc(text);
              BurrowsWheelerTransformAsc bwt = new BurrowsWheelerTransformAsc(sa);
              int answer = 0;

              // BWT
              Stopwatch sw = Stopwatch.StartNew();
              for (int i = 0; i < times; i++) {
            answer = bwt.CountOccurrences(pattern);
              }
              double time = ((double)sw.ElapsedMilliseconds) / times;
              Console.WriteLine("BWT:\n====");
              Console.WriteLine("     Time: " + time + " ms");
              Console.WriteLine("  Matches: " + answer.ToString());
              Console.WriteLine();

              // Brute Force
              Stopwatch sw2 = Stopwatch.StartNew();
              for (int i = 0; i < times; i++) {
            answer = BruteForce.Run(text, pattern);
              }
              double time2 = ((double)sw2.ElapsedMilliseconds) / times;
              Console.WriteLine("BF:\n====");
              Console.WriteLine("     Time: " + time2 + " ms");
              Console.WriteLine("  Matches: " + answer.ToString());
              Console.WriteLine();
        }