/// <summary> /// Method used for compression of a suffix array based on Delta encoding algorithm. /// This method is also using Psi function. See <see cref="PsiFunction.GeneratePsiFunction(int[], string)"/>. /// </summary> /// <param name="input">String which will be compressed.</param> /// <returns>Dictionary of compressed values divided by characters from the original input.</returns> public static Dictionary <char, List <Int16> > DeltaEncodingCompression(string input) { Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input); Dictionary <char, List <Int16> > deltaEncoding = new Dictionary <char, List <Int16> >(); List <Int16> deltaSequence; foreach (var key in incSequences.Keys) { deltaSequence = new List <Int16>(); deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(0))); deltaEncoding.Add(key, deltaSequence); } foreach (var key in incSequences.Keys) { deltaSequence = new List <Int16>(); deltaSequence = deltaEncoding[key]; for (int i = 0; i < incSequences[key].Count - 1; i++) { deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(i + 1) - incSequences[key].ElementAt(i))); } deltaEncoding[key] = deltaSequence; } return(deltaEncoding); }
/// <summary> /// Method used for compression of a suffix array based on Elias-Fano algorithm. /// </summary> /// <param name="input">Original input.</param> /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param> /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param> /// <returns>List of bytes which represent compressed values.</returns> public static List <byte[]> EliasFanoCompression(string input, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts) { Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input); List <bool[]> eliasFano = new List <bool[]>(); int eliasIndex = 0; int m; int noOfBits = 0; int noOfLeadingBits; int noOfLowerBits; int[] bucket; bool[] lowerBits; bool[] leadingBits; bool[] number; bool[] lowerEliasFano; bool[] leadingEliasFano; foreach (var val in incSequences.Values) { int lowerPointer = 0; m = val[val.Count - 1]; noOfBits = BitsToExpressNumber(val[val.Count - 1]); noOfLeadingBits = GetNumberOfLeadingBits(val.Count); bucket = new int[(int)Math.Pow(2, noOfLeadingBits)]; noOfLowerBits = noOfBits - noOfLeadingBits; int[] bits = new int[2]; bits[0] = noOfLeadingBits; bits[1] = noOfLowerBits; bitLengthOfOrigParts.Insert(eliasIndex, bits); lowerEliasFano = new bool[val.Count * noOfLowerBits]; foreach (var item in val) { number = new bool[noOfBits]; lowerBits = new bool[noOfLowerBits]; leadingBits = new bool[noOfLeadingBits]; number = NumberToBits(item, noOfBits); int j = 0; for (int i = 0; i < noOfBits; i++) { if (i < noOfLeadingBits) { leadingBits[i] = number[i]; } else { lowerBits[j++] = number[i]; } } bucket[GetNumberFromBits(leadingBits)]++; for (int i = 0; i < noOfLowerBits; i++) { lowerEliasFano[lowerPointer++] = lowerBits[i]; } } int sizeOfLeading = 0; foreach (var counter in bucket) { sizeOfLeading += counter + 1; } leadingEliasFano = new bool[sizeOfLeading]; GenerateLeadingSequence(bucket, leadingEliasFano); bool[] resultVector = GenerateResultVectorOfBits(sizeOfLeading + val.Count * noOfLowerBits, leadingEliasFano, lowerEliasFano); eliasFano.Insert(eliasIndex, resultVector); eliasIndex++; } int f = 0; foreach (var seq in eliasFano) { bitLengthOfSeq.Insert(f, seq.Length); f++; } return(EncodeToBytes(eliasFano)); }