public void unsortedExample() { const int N = 1333333; int[] data = new int[N]; // initialize the data (most will be small for (int k = 0; k < N; k += 1) { data[k] = 3; } // throw some larger values for (int k = 0; k < N; k += 5) { data[k] = 100; } for (int k = 0; k < N; k += 533) { data[k] = 10000; } int[] compressed = new int[N + 1024];// could need more IntegerCODEC codec = new Composition(new FastPFOR(), new VariableByte()); // compressing IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); codec.compress(data, inputoffset, data.Length, compressed, outputoffset); Console.WriteLine("compressed unsorted integers from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); int[] recovered = new int[N]; IntWrapper recoffset = new IntWrapper(0); codec.uncompress(compressed, new IntWrapper(0), compressed.Length, recovered, recoffset); if (Arrays.equals(data, recovered)) { Console.WriteLine("data is recovered without loss"); } else { throw new Exception("bug"); // could use assert } Console.WriteLine(); }
/** * Uncompress an array and returns the uncompressed result as a new array. * * @param compressed compressed array * @return uncompressed array */ public int[] uncompress(int[] compressed) { int[] decompressed = new int[compressed[0]]; IntWrapper inpos = new IntWrapper(1); codec.headlessUncompress(compressed, inpos, compressed.Length - inpos.intValue(), decompressed, new IntWrapper(0), decompressed.Length, new IntWrapper(0)); return(decompressed); }
/** * Compress an array and returns the compressed result as a new array. * * @param input array to be compressed * @return compressed array */ public int[] compress(int[] input) { int[] compressed = new int[input.Length + 1024]; compressed[0] = input.Length; IntWrapper outpos = new IntWrapper(1); IntWrapper initvalue = new IntWrapper(0); codec.headlessCompress(input, new IntWrapper(0), input.Length, compressed, outpos, initvalue); compressed = Arrays.copyOf(compressed, outpos.intValue()); return(compressed); }
private static void testSpurious(IntegerCODEC c) { int[] x = new int[1024]; int[] y = new int[0]; IntWrapper i0 = new IntWrapper(0); IntWrapper i1 = new IntWrapper(0); for (int inlength = 0; inlength < 32; ++inlength) { c.compress(x, i0, inlength, y, i1); Assert2.assertEquals(0, i1.intValue()); } }
//使用FastPfor算法将排序了的int数组进行压缩,注意:target数组必须是排序后的数组 public static int[] fastPforEncoder(int[] uncompressed) { var codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); var compressed = new int[uncompressed.Length + 1024]; var inputoffset = new IntWrapper(0); var outputoffset = new IntWrapper(1); codec.headlessCompress(uncompressed, inputoffset, uncompressed.Length, compressed, outputoffset, new IntWrapper(0)); compressed[0] = uncompressed.Length; compressed = Arrays.copyOf(compressed, outputoffset.intValue()); return(compressed); }
private static void testZeroInZeroOut(IntegerCODEC c) { int[] x = new int[0]; int[] y = new int[0]; IntWrapper i0 = new IntWrapper(0); IntWrapper i1 = new IntWrapper(0); c.compress(x, i0, 0, y, i1); Assert2.assertEquals(0, i1.intValue()); int[] @out = new int[0]; IntWrapper outpos = new IntWrapper(0); c.uncompress(y, i1, 0, @out, outpos); Assert2.assertEquals(0, outpos.intValue()); }
private void testUnsorted3(IntegerCODEC codec) { int[] data = new int[128]; data[127] = -1; int[] compressed = new int[1024]; IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); codec.compress(data, inputoffset, data.Length, compressed, outputoffset); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); int[] recovered = new int[128]; IntWrapper recoffset = new IntWrapper(0); codec.uncompress(compressed, new IntWrapper(0), compressed.Length, recovered, recoffset); Assert2.assertArrayEquals(data, recovered); }
public void testUnsorted(IntegerCODEC codec) { int[] lengths = { 133, 1026, 1333333 }; foreach (int N in lengths) { int[] data = new int[N]; // initialize the data (most will be small) for (int k = 0; k < N; k += 1) { data[k] = 3; } // throw some larger values for (int k = 0; k < N; k += 5) { data[k] = 100; } for (int k = 0; k < N; k += 533) { data[k] = 10000; } data[5] = -311; // could need more compressing int[] compressed = new int[(int)Math.Ceiling(N * 1.01) + 1024]; IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); codec.compress(data, inputoffset, data.Length, compressed, outputoffset); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); int[] recovered = new int[N]; IntWrapper recoffset = new IntWrapper(0); codec.uncompress(compressed, new IntWrapper(0), compressed.Length, recovered, recoffset); Assert2.assertArrayEquals(data, recovered); } }
public void basicExampleHeadless() { int[] data = new int[2342351]; Console.WriteLine("Compressing " + data.Length + " integers in one go using the headless approach"); // data should be sorted for best // results for (int k = 0; k < data.Length; ++k) { data[k] = k; } // Very important: the data is in sorted order!!! If not, you // will get very poor compression with IntegratedBinaryPacking, // you should use another CODEC. // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte SkippableIntegratedComposition codec = new SkippableIntegratedComposition(new IntegratedBinaryPacking(), new IntegratedVariableByte()); // output vector should be large enough... int[] compressed = new int[data.Length + 1024]; // compressed might not be large enough in some cases // if you get java.lang.ArrayIndexOutOfBoundsException, try // allocating more memory /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(1); compressed[0] = data.Length; // we manually store how many integers we codec.headlessCompress(data, inputoffset, data.Length, compressed, outputoffset, new IntWrapper(0)); // got it! // inputoffset should be at data.Length but outputoffset tells // us where we are... Console.WriteLine( "compressed from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: (optional) compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * */ int howmany = compressed[0];// we manually stored the number of // compressed integers int[] recovered = new int[howmany]; IntWrapper recoffset = new IntWrapper(0); codec.headlessUncompress(compressed, new IntWrapper(1), compressed.Length, recovered, recoffset, howmany, new IntWrapper(0)); if (Arrays.equals(data, recovered)) { Console.WriteLine("data is recovered without loss"); } else { throw new Exception("bug"); // could use assert } Console.WriteLine(); }
public void advancedExample() { const int TotalSize = 2342351; // some arbitrary number const int ChunkSize = 16384; // size of each chunk, choose a multiple of 128 Console.WriteLine("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers (" + ChunkSize * 4 / 1024 + "KB)"); Console.WriteLine("(It is often better for applications to work in chunks fitting in CPU cache.)"); int[] data = new int[TotalSize]; // data should be sorted for best // results for (int k = 0; k < data.Length; ++k) { data[k] = k; } // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte, using variable byte // only for the last chunk! IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking(); IntegratedVariableByte ivb = new IntegratedVariableByte(); IntegratedIntegerCODEC lastcodec = new IntegratedComposition(regularcodec, ivb); // output vector should be large enough... int[] compressed = new int[TotalSize + 1024]; /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); for (int k = 0; k < TotalSize / ChunkSize; ++k) { regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset); } lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset); // got it! // inputoffset should be at data.Length but outputoffset tells // us where we are... Console.WriteLine( "compressed from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * * We are *not* assuming that the original array length is known, * however we assume that the chunk size (ChunkSize) is known. * */ int[] recovered = new int[ChunkSize]; IntWrapper compoff = new IntWrapper(0); IntWrapper recoffset; int currentpos = 0; while (compoff.get() < compressed.Length) { recoffset = new IntWrapper(0); regularcodec.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset); if (recoffset.get() < ChunkSize) {// last chunk detected ivb.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset); } for (int i = 0; i < recoffset.get(); ++i) { if (data[currentpos + i] != recovered[i]) { throw new Exception("bug"); // could use assert } } currentpos += recoffset.get(); } Console.WriteLine("data is recovered without loss"); Console.WriteLine(); }