/// <summary>
        /// Method used for compression of a suffix array based on Delta encoding algorithm.
        /// This method is also using Psi function. See <see cref="PsiFunction.GeneratePsiFunction(int[], string)"/>.
        /// </summary>
        /// <param name="input">String which will be compressed.</param>
        /// <returns>Dictionary of compressed values divided by characters from the original input.</returns>

        public static Dictionary <char, List <Int16> > DeltaEncodingCompression(string input)
        {
            Dictionary <char, List <int> >   incSequences  = PsiFunction.GetIncreasingSequences(input);
            Dictionary <char, List <Int16> > deltaEncoding = new Dictionary <char, List <Int16> >();
            List <Int16> deltaSequence;

            foreach (var key in incSequences.Keys)
            {
                deltaSequence = new List <Int16>();
                deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(0)));
                deltaEncoding.Add(key, deltaSequence);
            }

            foreach (var key in incSequences.Keys)
            {
                deltaSequence = new List <Int16>();
                deltaSequence = deltaEncoding[key];
                for (int i = 0; i < incSequences[key].Count - 1; i++)
                {
                    deltaSequence.Add(Convert.ToInt16(incSequences[key].ElementAt(i + 1) - incSequences[key].ElementAt(i)));
                }
                deltaEncoding[key] = deltaSequence;
            }

            return(deltaEncoding);
        }
        /// <summary>
        /// Method used for decompression of a compressed suffix array based on Delta encoding algorithm.
        /// This method is also using method which generates suffix array from Psi function.
        /// See <see cref="PsiFunction.PsiToSA(int[])"/>.
        /// </summary>
        /// <param name="delta">Dictionary of compressed values divided by characters from the original input.</param>
        /// <param name="length">Length of the original input.</param>
        /// <returns>Suffix array.</returns>

        public static int[] DecompressDeltaEncoding(Dictionary <char, List <Int16> > delta, int length)
        {
            int[] psi = new int[length];
            psi[0] = -1;
            int j = 1;

            foreach (var key in delta.Keys)
            {
                psi[j++] = Convert.ToInt32(delta[key][0]);

                for (int i = 1; i < delta[key].Count; i++)
                {
                    psi[j] = Convert.ToInt32(delta[key][i]) + psi[j - 1];
                    j++;
                }
            }

            return(PsiFunction.PsiToSA(psi));
        }
Beispiel #3
0
        /// <summary>
        /// Method used for decompression of list of byte arrays based on Elias-Fano algorithm.
        /// </summary>
        /// <param name="eliasFano">List of byte arrays. Result of Elias-Fano compression.</param>
        /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param>
        /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param>
        /// <returns>Suffix array.</returns>

        public static int[] DecompressEliasFano(List <byte[]> eliasFano, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts)
        {
            int        highLowBitsSeam = 0;
            List <int> sequence;
            List <int> psi = new List <int>();

            psi.Insert(0, -1);
            int k = 1;

            for (int i = 0; i < eliasFano.Count; i++)
            {
                bool[] bitArray = ByteToBit(eliasFano[i], bitLengthOfSeq[i]);
                highLowBitsSeam = GetHigherLowerBitsSeam(bitArray, bitLengthOfOrigParts[i][0]);
                List <bool[]> leadingBits = GenerateLeadingBits(bitArray, highLowBitsSeam, bitLengthOfOrigParts[i][0]);
                List <bool[]> lowerBits   = GenerateLowerBits(bitArray, highLowBitsSeam, bitLengthOfOrigParts[i][1]);
                if (bitLengthOfOrigParts[i][1] == 0)
                {
                    sequence = new List <int>();
                    int index = 0;
                    foreach (var item in leadingBits)
                    {
                        sequence.Insert(index, GetNumberFromBits(item));
                        index++;
                    }
                }
                else
                {
                    sequence = GenerateSequence(leadingBits, lowerBits);
                }
                foreach (var el in sequence)
                {
                    psi.Insert(k, el);
                    k++;
                }
            }

            return(PsiFunction.PsiToSA(psi.ToArray()));
        }
Beispiel #4
0
        /// <summary>
        /// Method which is automatically called when starting this application.
        /// Contains generating suffix array and calls of different methods of compression.
        /// </summary>
        /// <param name="args"></param>

        public static void Main(string[] args)
        {
            Console.Write("Unesite apsolutnu putanju do datoteke: ");
            string path  = Console.ReadLine();
            string input = "";

            try
            {
                input = File.ReadAllText(path);
            }
            catch (Exception e)
            {
                Console.WriteLine("\nDatoteka koju želite čitati ne postoji ili ju nije moguće otvoriti.");
                Console.ReadKey();
                Environment.Exit(-1);
            }
            int[] suffixArray = new int[input.Length];
            var   watch       = Stopwatch.StartNew();


            SuffixArray.SAIS.sufsort(input, suffixArray, input.Length);
            watch.Stop();
            Console.Write("SA: ");
            WriteArray(suffixArray);
            Console.WriteLine("Izgradnja SA se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine("SA zauzima " + GetMemorySizeOfObject(suffixArray) + " bajtova.");
            Console.WriteLine();

            //compress SA with Re-Pair
            watch.Restart();
            int[] csa = RePair.RePairMethod(suffixArray).ToArray();
            watch.Stop();
            Console.Write("CSA: ");
            WriteArray(csa);
            Console.WriteLine("Izgradnja CSA preko Re-Pair se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(csa) + " bajtova.");
            Console.WriteLine();

            //decompress Re-Pair to SA
            watch.Restart();
            RePair.DecompressRePair(csa.ToList(), input.Length);
            watch.Stop();
            Console.WriteLine("Dekompresija Re-Pair CSA se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine();

            //compress Psi with Delta encoding
            watch.Restart();
            PsiFunction.GeneratePsiFunction(suffixArray, input);
            Console.Write("Psi: ");
            WriteArray(PsiFunction.psi);
            Dictionary <char, List <Int16> > delta = DeltaEncoding.DeltaEncodingCompression(input);

            Console.WriteLine("Delta:");
            WriteDictionary(delta);

            watch.Stop();
            Console.WriteLine("Izgradnja CSA preko Delta encoding se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(delta) + " bajtova.");
            Console.WriteLine();


            //decompress Delta encoding to SA
            watch.Restart();
            int[] SA = DeltaEncoding.DecompressDeltaEncoding(delta, input.Length);
            watch.Stop();
            Console.WriteLine("Dekompresija Delta encoding CSA se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine();
            WriteArray(SA);

            //compress Psi with Elias-Fano
            watch.Restart();
            PsiFunction.GeneratePsiFunction(suffixArray, input);
            Console.Write("Psi: ");
            WriteArray(PsiFunction.psi);
            List <int>    bitLengthOfSeq       = new List <int>();
            List <int[]>  bitLengthOfOrigParts = new List <int[]>();
            List <byte[]> eliasFano            = EliasFano.EliasFanoCompression(input, bitLengthOfSeq, bitLengthOfOrigParts);

            watch.Stop();
            Console.WriteLine("Elias-Fano:");
            WriteListOfArrays(eliasFano);
            Console.WriteLine("Duljine bitova:");
            WriteListOfArrays(bitLengthOfOrigParts);

            Console.WriteLine("Izgradnja CSA preko Elias-Fano se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine("CSA zauzima " + GetMemorySizeOfObject(eliasFano) + " bajtova.");
            Console.WriteLine();


            //decompress Elias-Fano to SA
            watch.Restart();
            int[] SAElias = EliasFano.DecompressEliasFano(eliasFano, bitLengthOfSeq, bitLengthOfOrigParts);
            watch.Stop();
            Console.WriteLine("Dekompresija Elias-Fano CSA se izvodila " + watch.ElapsedMilliseconds + " ms.");
            Console.WriteLine();
            WriteArray(SAElias);
        }
Beispiel #5
0
        /// <summary>
        /// Method used for compression of a suffix array based on Elias-Fano algorithm.
        /// </summary>
        /// <param name="input">Original input.</param>
        /// <param name="bitLengthOfSeq">List of values that represent length of resulting bit sequences by increasing sequences.</param>
        /// <param name="bitLengthOfOrigParts">List of integer arrays containing information about length of leading and lower bit sequences.</param>
        /// <returns>List of bytes which represent compressed values.</returns>

        public static List <byte[]> EliasFanoCompression(string input, List <int> bitLengthOfSeq, List <int[]> bitLengthOfOrigParts)
        {
            Dictionary <char, List <int> > incSequences = PsiFunction.GetIncreasingSequences(input);

            List <bool[]> eliasFano  = new List <bool[]>();
            int           eliasIndex = 0;
            int           m;
            int           noOfBits = 0;
            int           noOfLeadingBits;
            int           noOfLowerBits;

            int[]  bucket;
            bool[] lowerBits;
            bool[] leadingBits;
            bool[] number;
            bool[] lowerEliasFano;
            bool[] leadingEliasFano;

            foreach (var val in incSequences.Values)
            {
                int lowerPointer = 0;
                m               = val[val.Count - 1];
                noOfBits        = BitsToExpressNumber(val[val.Count - 1]);
                noOfLeadingBits = GetNumberOfLeadingBits(val.Count);

                bucket        = new int[(int)Math.Pow(2, noOfLeadingBits)];
                noOfLowerBits = noOfBits - noOfLeadingBits;
                int[] bits = new int[2];
                bits[0] = noOfLeadingBits;
                bits[1] = noOfLowerBits;
                bitLengthOfOrigParts.Insert(eliasIndex, bits);
                lowerEliasFano = new bool[val.Count * noOfLowerBits];

                foreach (var item in val)
                {
                    number      = new bool[noOfBits];
                    lowerBits   = new bool[noOfLowerBits];
                    leadingBits = new bool[noOfLeadingBits];

                    number = NumberToBits(item, noOfBits);

                    int j = 0;

                    for (int i = 0; i < noOfBits; i++)
                    {
                        if (i < noOfLeadingBits)
                        {
                            leadingBits[i] = number[i];
                        }
                        else
                        {
                            lowerBits[j++] = number[i];
                        }
                    }

                    bucket[GetNumberFromBits(leadingBits)]++;

                    for (int i = 0; i < noOfLowerBits; i++)
                    {
                        lowerEliasFano[lowerPointer++] = lowerBits[i];
                    }
                }

                int sizeOfLeading = 0;
                foreach (var counter in bucket)
                {
                    sizeOfLeading += counter + 1;
                }
                leadingEliasFano = new bool[sizeOfLeading];

                GenerateLeadingSequence(bucket, leadingEliasFano);

                bool[] resultVector = GenerateResultVectorOfBits(sizeOfLeading + val.Count * noOfLowerBits,
                                                                 leadingEliasFano, lowerEliasFano);

                eliasFano.Insert(eliasIndex, resultVector);
                eliasIndex++;
            }

            int f = 0;

            foreach (var seq in eliasFano)
            {
                bitLengthOfSeq.Insert(f, seq.Length);
                f++;
            }

            return(EncodeToBytes(eliasFano));
        }