Ejemplo n.º 1
0
        /// <summary>
        /// Method used for compression of a suffix array based on Delta encoding algorithm.
        /// This method is also using Psi function. See <see cref="PsiFunction.GeneratePsiFunction(int[], string)"/>.
        /// </summary>
        /// <param name="input">String which will be compressed.</param>
        /// <returns>Dictionary of compressed values divided by characters from the original input.</returns>

        public static Dictionary <char, List <Int16> > DeltaEncodingCompression(string input)
        {
            Dictionary <char, List <int> >   incSequences  = PsiFunction.GetIncreasingSequences(input);
            Dictionary <char, List <Int16> > deltaEncoding = new Dictionary <char, List <Int16> >();
            List <Int16> deltaSequence;

            foreach (var key in incSequences.Keys)
            {
                deltaSequence = new List <Int16>();
                deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(0)));
                deltaEncoding.Add(key, deltaSequence);
            }

            foreach (var key in incSequences.Keys)
            {
                deltaSequence = new List <Int16>();
                deltaSequence = deltaEncoding[key];
                for (int i = 0; i < incSequences[key].Count - 1; i++)
                {
                    deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(i + 1) - incSequences[key].ElementAt(i)));
                }
                deltaEncoding[key] = deltaSequence;
            }

            return(deltaEncoding);
        }
Ejemplo n.º 2
0
        /// <summary>
        /// Method used for compression of a suffix array based on Elias-Fano algorithm.
        /// </summary>
        /// <param name="input">Original input.</param>
        /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param>
        /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param>
        /// <returns>List of bytes which represent compressed values.</returns>

        public static List <byte[]> EliasFanoCompression(string input, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts)
        {
            Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input);

            List <bool[]> eliasFano  = new List <bool[]>();
            int           eliasIndex = 0;
            int           m;
            int           noOfBits = 0;
            int           noOfLeadingBits;
            int           noOfLowerBits;

            int[]  bucket;
            bool[] lowerBits;
            bool[] leadingBits;
            bool[] number;
            bool[] lowerEliasFano;
            bool[] leadingEliasFano;

            foreach (var val in incSequences.Values)
            {
                int lowerPointer = 0;
                m               = val[val.Count - 1];
                noOfBits        = BitsToExpressNumber(val[val.Count - 1]);
                noOfLeadingBits = GetNumberOfLeadingBits(val.Count);

                bucket        = new int[(int)Math.Pow(2, noOfLeadingBits)];
                noOfLowerBits = noOfBits - noOfLeadingBits;
                int[] bits = new int[2];
                bits[0] = noOfLeadingBits;
                bits[1] = noOfLowerBits;
                bitLengthOfOrigParts.Insert(eliasIndex, bits);
                lowerEliasFano = new bool[val.Count * noOfLowerBits];

                foreach (var item in val)
                {
                    number      = new bool[noOfBits];
                    lowerBits   = new bool[noOfLowerBits];
                    leadingBits = new bool[noOfLeadingBits];

                    number = NumberToBits(item, noOfBits);

                    int j = 0;

                    for (int i = 0; i < noOfBits; i++)
                    {
                        if (i < noOfLeadingBits)
                        {
                            leadingBits[i] = number[i];
                        }
                        else
                        {
                            lowerBits[j++] = number[i];
                        }
                    }

                    bucket[GetNumberFromBits(leadingBits)]++;

                    for (int i = 0; i < noOfLowerBits; i++)
                    {
                        lowerEliasFano[lowerPointer++] = lowerBits[i];
                    }
                }

                int sizeOfLeading = 0;
                foreach (var counter in bucket)
                {
                    sizeOfLeading += counter + 1;
                }
                leadingEliasFano = new bool[sizeOfLeading];

                GenerateLeadingSequence(bucket, leadingEliasFano);

                bool[] resultVector = GenerateResultVectorOfBits(sizeOfLeading + val.Count * noOfLowerBits,
                                                                 leadingEliasFano, lowerEliasFano);

                eliasFano.Insert(eliasIndex, resultVector);
                eliasIndex++;
            }

            int f = 0;

            foreach (var seq in eliasFano)
            {
                bitLengthOfSeq.Insert(f, seq.Length);
                f++;
            }

            return(EncodeToBytes(eliasFano));
        }