/// <summary> /// Method used for compression of a suffix array based on Delta encoding algorithm. /// This method is also using Psi function. See <see cref="PsiFunction.GeneratePsiFunction(int[], string)"/>. /// </summary> /// <param name="input">String which will be compressed.</param> /// <returns>Dictionary of compressed values divided by characters from the original input.</returns> public static Dictionary <char, List <Int16> > DeltaEncodingCompression(string input) { Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input); Dictionary <char, List <Int16> > deltaEncoding = new Dictionary <char, List <Int16> >(); List <Int16> deltaSequence; foreach (var key in incSequences.Keys) { deltaSequence = new List <Int16>(); deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(0))); deltaEncoding.Add(key, deltaSequence); } foreach (var key in incSequences.Keys) { deltaSequence = new List <Int16>(); deltaSequence = deltaEncoding[key]; for (int i = 0; i < incSequences[key].Count - 1; i++) { deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(i + 1) - incSequences[key].ElementAt(i))); } deltaEncoding[key] = deltaSequence; } return(deltaEncoding); }
/// <summary> /// Method used for decompression of a compressed suffix array based on Delta encoding algorithm. /// This method is also using method which generates suffix array from Psi function. /// See <see cref="PsiFunction.PsiToSA(int[])"/>. /// </summary> /// <param name="delta">Dictionary of compressed values divided by characters from the original input.</param> /// <param name="length">Length of the original input.</param> /// <returns>Suffix array.</returns> public static int[] DecompressDeltaEncoding(Dictionary <char, List <Int16> > delta, int length) { int[] psi = new int[length]; psi[0] = -1; int j = 1; foreach (var key in delta.Keys) { psi[j++] = Convert.ToInt32(delta[key][0]); for (int i = 1; i < delta[key].Count; i++) { psi[j] = Convert.ToInt32(delta[key][i]) + psi[j - 1]; j++; } } return(PsiFunction.PsiToSA(psi)); }
/// <summary> /// Method used for decompression of list of byte arrays based on Elias-Fano algorithm. /// </summary> /// <param name="eliasFano">List of byte arrays. Result of Elias-Fano compression.</param> /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param> /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param> /// <returns>Suffix array.</returns> public static int[] DecompressEliasFano(List <byte[]> eliasFano, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts) { int highLowBitsSeam = 0; List <int> sequence; List <int> psi = new List <int>(); psi.Insert(0, -1); int k = 1; for (int i = 0; i < eliasFano.Count; i++) { bool[] bitArray = ByteToBit(eliasFano[i], bitLengthOfSeq[i]); highLowBitsSeam = GetHigherLowerBitsSeam(bitArray, bitLengthOfOrigParts[i][0]); List <bool[]> leadingBits = GenerateLeadingBits(bitArray, highLowBitsSeam, bitLengthOfOrigParts[i][0]); List <bool[]> lowerBits = GenerateLowerBits(bitArray, highLowBitsSeam, bitLengthOfOrigParts[i][1]); if (bitLengthOfOrigParts[i][1] == 0) { sequence = new List <int>(); int index = 0; foreach (var item in leadingBits) { sequence.Insert(index, GetNumberFromBits(item)); index++; } } else { sequence = GenerateSequence(leadingBits, lowerBits); } foreach (var el in sequence) { psi.Insert(k, el); k++; } } return(PsiFunction.PsiToSA(psi.ToArray())); }
/// <summary> /// Method which is automatically called when starting this application. /// Contains generating suffix array and calls of different methods of compression. /// </summary> /// <param name="args"></param> public static void Main(string[] args) { Console.Write("Unesite apsolutnu putanju do datoteke: "); string path = Console.ReadLine(); string input = ""; try { input = File.ReadAllText(path); } catch (Exception e) { Console.WriteLine("\nDatoteka koju želite čitati ne postoji ili ju nije moguće otvoriti."); Console.ReadKey(); Environment.Exit(-1); } int[] suffixArray = new int[input.Length]; var watch = Stopwatch.StartNew(); SuffixArray.SAIS.sufsort(input, suffixArray, input.Length); watch.Stop(); Console.Write("SA: "); WriteArray(suffixArray); Console.WriteLine("Izgradnja SA se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine("SA zauzima " + GetMemorySizeOfObject(suffixArray) + " bajtova."); Console.WriteLine(); //compress SA with Re-Pair watch.Restart(); int[] csa = RePair.RePairMethod(suffixArray).ToArray(); watch.Stop(); Console.Write("CSA: "); WriteArray(csa); Console.WriteLine("Izgradnja CSA preko Re-Pair se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(csa) + " bajtova."); Console.WriteLine(); //decompress Re-Pair to SA watch.Restart(); RePair.DecompressRePair(csa.ToList(), input.Length); watch.Stop(); Console.WriteLine("Dekompresija Re-Pair CSA se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine(); //compress Psi with Delta encoding watch.Restart(); PsiFunction.GeneratePsiFunction(suffixArray, input); Console.Write("Psi: "); WriteArray(PsiFunction.psi); Dictionary <char, List <Int16> > delta = DeltaEncoding.DeltaEncodingCompression(input); Console.WriteLine("Delta:"); WriteDictionary(delta); watch.Stop(); Console.WriteLine("Izgradnja CSA preko Delta encoding se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(delta) + " bajtova."); Console.WriteLine(); //decompress Delta encoding to SA watch.Restart(); int[] SA = DeltaEncoding.DecompressDeltaEncoding(delta, input.Length); watch.Stop(); Console.WriteLine("Dekompresija Delta encoding CSA se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine(); WriteArray(SA); //compress Psi with Elias-Fano watch.Restart(); PsiFunction.GeneratePsiFunction(suffixArray, input); Console.Write("Psi: "); WriteArray(PsiFunction.psi); List <int> bitLengthOfSeq = new List <int>(); List <int[]> bitLengthOfOrigParts = new List <int[]>(); List <byte[]> eliasFano = EliasFano.EliasFanoCompression(input, bitLengthOfSeq, bitLengthOfOrigParts); watch.Stop(); Console.WriteLine("Elias-Fano:"); WriteListOfArrays(eliasFano); Console.WriteLine("Duljine bitova:"); WriteListOfArrays(bitLengthOfOrigParts); Console.WriteLine("Izgradnja CSA preko Elias-Fano se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(eliasFano) + " bajtova."); Console.WriteLine(); //decompress Elias-Fano to SA watch.Restart(); int[] SAElias = EliasFano.DecompressEliasFano(eliasFano, bitLengthOfSeq, bitLengthOfOrigParts); watch.Stop(); Console.WriteLine("Dekompresija Elias-Fano CSA se izvodila " + watch.ElapsedMilliseconds + " ms."); Console.WriteLine(); WriteArray(SAElias); }
/// <summary> /// Method used for compression of a suffix array based on Elias-Fano algorithm. /// </summary> /// <param name="input">Original input.</param> /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param> /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param> /// <returns>List of bytes which represent compressed values.</returns> public static List <byte[]> EliasFanoCompression(string input, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts) { Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input); List <bool[]> eliasFano = new List <bool[]>(); int eliasIndex = 0; int m; int noOfBits = 0; int noOfLeadingBits; int noOfLowerBits; int[] bucket; bool[] lowerBits; bool[] leadingBits; bool[] number; bool[] lowerEliasFano; bool[] leadingEliasFano; foreach (var val in incSequences.Values) { int lowerPointer = 0; m = val[val.Count - 1]; noOfBits = BitsToExpressNumber(val[val.Count - 1]); noOfLeadingBits = GetNumberOfLeadingBits(val.Count); bucket = new int[(int)Math.Pow(2, noOfLeadingBits)]; noOfLowerBits = noOfBits - noOfLeadingBits; int[] bits = new int[2]; bits[0] = noOfLeadingBits; bits[1] = noOfLowerBits; bitLengthOfOrigParts.Insert(eliasIndex, bits); lowerEliasFano = new bool[val.Count * noOfLowerBits]; foreach (var item in val) { number = new bool[noOfBits]; lowerBits = new bool[noOfLowerBits]; leadingBits = new bool[noOfLeadingBits]; number = NumberToBits(item, noOfBits); int j = 0; for (int i = 0; i < noOfBits; i++) { if (i < noOfLeadingBits) { leadingBits[i] = number[i]; } else { lowerBits[j++] = number[i]; } } bucket[GetNumberFromBits(leadingBits)]++; for (int i = 0; i < noOfLowerBits; i++) { lowerEliasFano[lowerPointer++] = lowerBits[i]; } } int sizeOfLeading = 0; foreach (var counter in bucket) { sizeOfLeading += counter + 1; } leadingEliasFano = new bool[sizeOfLeading]; GenerateLeadingSequence(bucket, leadingEliasFano); bool[] resultVector = GenerateResultVectorOfBits(sizeOfLeading + val.Count * noOfLowerBits, leadingEliasFano, lowerEliasFano); eliasFano.Insert(eliasIndex, resultVector); eliasIndex++; } int f = 0; foreach (var seq in eliasFano) { bitLengthOfSeq.Insert(f, seq.Length); f++; } return(EncodeToBytes(eliasFano)); }