Example #1
0
        public void unsortedExample()
        {
            const int N = 1333333;

            int[] data = new int[N];
            // initialize the data (most will be small
            for (int k = 0; k < N; k += 1)
            {
                data[k] = 3;
            }
            // throw some larger values
            for (int k = 0; k < N; k += 5)
            {
                data[k] = 100;
            }
            for (int k = 0; k < N; k += 533)
            {
                data[k] = 10000;
            }
            int[]        compressed = new int[N + 1024];// could need more
            IntegerCODEC codec      = new Composition(new FastPFOR(), new VariableByte());
            // compressing
            IntWrapper inputoffset  = new IntWrapper(0);
            IntWrapper outputoffset = new IntWrapper(0);

            codec.compress(data, inputoffset, data.Length, compressed, outputoffset);
            Console.WriteLine("compressed unsorted integers from " + data.Length * 4 / 1024 + "KB to "
                              + outputoffset.intValue() * 4 / 1024 + "KB");
            // we can repack the data: (optional)
            compressed = Arrays.copyOf(compressed, outputoffset.intValue());

            int[]      recovered = new int[N];
            IntWrapper recoffset = new IntWrapper(0);

            codec.uncompress(compressed, new IntWrapper(0), compressed.Length, recovered, recoffset);
            if (Arrays.equals(data, recovered))
            {
                Console.WriteLine("data is recovered without loss");
            }
            else
            {
                throw new Exception("bug"); // could use assert
            }
            Console.WriteLine();
        }
        /**
         * Uncompress an array and returns the uncompressed result as a new array.
         *
         * @param compressed compressed array
         * @return uncompressed array
         */
        public int[] uncompress(int[] compressed)
        {
            int[]      decompressed = new int[compressed[0]];
            IntWrapper inpos        = new IntWrapper(1);

            codec.headlessUncompress(compressed, inpos,
                                     compressed.Length - inpos.intValue(),
                                     decompressed, new IntWrapper(0),
                                     decompressed.Length, new IntWrapper(0));
            return(decompressed);
        }
        /**
         * Compress an array and returns the compressed result as a new array.
         *
         * @param input array to be compressed
         * @return compressed array
         */
        public int[] compress(int[] input)
        {
            int[] compressed = new int[input.Length + 1024];
            compressed[0] = input.Length;
            IntWrapper outpos    = new IntWrapper(1);
            IntWrapper initvalue = new IntWrapper(0);

            codec.headlessCompress(input, new IntWrapper(0),
                                   input.Length, compressed, outpos, initvalue);
            compressed = Arrays.copyOf(compressed, outpos.intValue());
            return(compressed);
        }
Example #4
0
        private static void testSpurious(IntegerCODEC c)
        {
            int[]      x  = new int[1024];
            int[]      y  = new int[0];
            IntWrapper i0 = new IntWrapper(0);
            IntWrapper i1 = new IntWrapper(0);

            for (int inlength = 0; inlength < 32; ++inlength)
            {
                c.compress(x, i0, inlength, y, i1);
                Assert2.assertEquals(0, i1.intValue());
            }
        }
Example #5
0
        //使用FastPfor算法将排序了的int数组进行压缩,注意:target数组必须是排序后的数组
        public static int[] fastPforEncoder(int[] uncompressed)
        {
            var codec        = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte());
            var compressed   = new int[uncompressed.Length + 1024];
            var inputoffset  = new IntWrapper(0);
            var outputoffset = new IntWrapper(1);

            codec.headlessCompress(uncompressed, inputoffset, uncompressed.Length, compressed, outputoffset, new IntWrapper(0));
            compressed[0] = uncompressed.Length;
            compressed    = Arrays.copyOf(compressed, outputoffset.intValue());

            return(compressed);
        }
Example #6
0
        private static void testZeroInZeroOut(IntegerCODEC c)
        {
            int[]      x  = new int[0];
            int[]      y  = new int[0];
            IntWrapper i0 = new IntWrapper(0);
            IntWrapper i1 = new IntWrapper(0);

            c.compress(x, i0, 0, y, i1);
            Assert2.assertEquals(0, i1.intValue());

            int[]      @out   = new int[0];
            IntWrapper outpos = new IntWrapper(0);

            c.uncompress(y, i1, 0, @out, outpos);
            Assert2.assertEquals(0, outpos.intValue());
        }
Example #7
0
        private void testUnsorted3(IntegerCODEC codec)
        {
            int[] data = new int[128];
            data[127] = -1;
            int[]      compressed   = new int[1024];
            IntWrapper inputoffset  = new IntWrapper(0);
            IntWrapper outputoffset = new IntWrapper(0);

            codec.compress(data, inputoffset, data.Length, compressed, outputoffset);
            // we can repack the data: (optional)
            compressed = Arrays.copyOf(compressed, outputoffset.intValue());

            int[]      recovered = new int[128];
            IntWrapper recoffset = new IntWrapper(0);

            codec.uncompress(compressed, new IntWrapper(0), compressed.Length,
                             recovered, recoffset);
            Assert2.assertArrayEquals(data, recovered);
        }
Example #8
0
        public void testUnsorted(IntegerCODEC codec)
        {
            int[] lengths = { 133, 1026, 1333333 };
            foreach (int N in lengths)
            {
                int[] data = new int[N];
                // initialize the data (most will be small)
                for (int k = 0; k < N; k += 1)
                {
                    data[k] = 3;
                }
                // throw some larger values
                for (int k = 0; k < N; k += 5)
                {
                    data[k] = 100;
                }
                for (int k = 0; k < N; k += 533)
                {
                    data[k] = 10000;
                }
                data[5] = -311;
                // could need more compressing
                int[]      compressed   = new int[(int)Math.Ceiling(N * 1.01) + 1024];
                IntWrapper inputoffset  = new IntWrapper(0);
                IntWrapper outputoffset = new IntWrapper(0);
                codec.compress(data, inputoffset, data.Length, compressed,
                               outputoffset);
                // we can repack the data: (optional)
                compressed = Arrays.copyOf(compressed, outputoffset.intValue());

                int[]      recovered = new int[N];
                IntWrapper recoffset = new IntWrapper(0);
                codec.uncompress(compressed, new IntWrapper(0), compressed.Length,
                                 recovered, recoffset);
                Assert2.assertArrayEquals(data, recovered);
            }
        }
Example #9
0
        public void basicExampleHeadless()
        {
            int[] data = new int[2342351];
            Console.WriteLine("Compressing " + data.Length + " integers in one go using the headless approach");
            // data should be sorted for best
            // results
            for (int k = 0; k < data.Length; ++k)
            {
                data[k] = k;
            }
            // Very important: the data is in sorted order!!! If not, you
            // will get very poor compression with IntegratedBinaryPacking,
            // you should use another CODEC.

            // next we compose a CODEC. Most of the processing
            // will be done with binary packing, and leftovers will
            // be processed using variable byte
            SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(),
                                                                                      new IntegratedVariableByte());

            // output vector should be large enough...
            int[] compressed = new int[data.Length + 1024];
            // compressed might not be large enough in some cases
            // if you get java.lang.ArrayIndexOutOfBoundsException, try
            // allocating more memory

            /**
             *
             * compressing
             *
             */
            IntWrapper inputoffset  = new IntWrapper(0);
            IntWrapper outputoffset = new IntWrapper(1);

            compressed[0] = data.Length; // we manually store how many integers we
            codec.headlessCompress(data, inputoffset, data.Length, compressed, outputoffset, new IntWrapper(0));
            // got it!
            // inputoffset should be at data.Length but outputoffset tells
            // us where we are...
            Console.WriteLine(
                "compressed from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB");
            // we can repack the data: (optional)
            compressed = Arrays.copyOf(compressed, outputoffset.intValue());

            /**
             *
             * now uncompressing
             *
             */
            int howmany = compressed[0];// we manually stored the number of

            // compressed integers
            int[]      recovered = new int[howmany];
            IntWrapper recoffset = new IntWrapper(0);

            codec.headlessUncompress(compressed, new IntWrapper(1), compressed.Length, recovered, recoffset, howmany, new IntWrapper(0));
            if (Arrays.equals(data, recovered))
            {
                Console.WriteLine("data is recovered without loss");
            }
            else
            {
                throw new Exception("bug"); // could use assert
            }
            Console.WriteLine();
        }
Example #10
0
        public void advancedExample()
        {
            const int TotalSize = 2342351; // some arbitrary number
            const int ChunkSize = 16384;   // size of each chunk, choose a multiple of 128

            Console.WriteLine("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers ("
                              + ChunkSize * 4 / 1024 + "KB)");
            Console.WriteLine("(It is often better for applications to work in chunks fitting in CPU cache.)");
            int[] data = new int[TotalSize];
            // data should be sorted for best
            // results
            for (int k = 0; k < data.Length; ++k)
            {
                data[k] = k;
            }
            // next we compose a CODEC. Most of the processing
            // will be done with binary packing, and leftovers will
            // be processed using variable byte, using variable byte
            // only for the last chunk!
            IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking();
            IntegratedVariableByte ivb          = new IntegratedVariableByte();
            IntegratedIntegerCODEC lastcodec    = new IntegratedComposition(regularcodec, ivb);

            // output vector should be large enough...
            int[] compressed = new int[TotalSize + 1024];

            /**
             *
             * compressing
             *
             */
            IntWrapper inputoffset  = new IntWrapper(0);
            IntWrapper outputoffset = new IntWrapper(0);

            for (int k = 0; k < TotalSize / ChunkSize; ++k)
            {
                regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset);
            }
            lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset);
            // got it!
            // inputoffset should be at data.Length but outputoffset tells
            // us where we are...
            Console.WriteLine(
                "compressed from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB");
            // we can repack the data:
            compressed = Arrays.copyOf(compressed, outputoffset.intValue());

            /**
             *
             * now uncompressing
             *
             * We are *not* assuming that the original array length is known,
             * however we assume that the chunk size (ChunkSize) is known.
             *
             */
            int[]      recovered = new int[ChunkSize];
            IntWrapper compoff   = new IntWrapper(0);
            IntWrapper recoffset;
            int        currentpos = 0;

            while (compoff.get() < compressed.Length)
            {
                recoffset = new IntWrapper(0);
                regularcodec.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset);

                if (recoffset.get() < ChunkSize)
                {// last chunk detected
                    ivb.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset);
                }
                for (int i = 0; i < recoffset.get(); ++i)
                {
                    if (data[currentpos + i] != recovered[i])
                    {
                        throw new Exception("bug"); // could use assert
                    }
                }
                currentpos += recoffset.get();
            }
            Console.WriteLine("data is recovered without loss");
            Console.WriteLine();
        }