public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num, IntWrapper initvalue) { if (inlength == 0) { return; } int init = inpos.get(); F1.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); inlength -= inpos.get() - init; num -= outpos.get(); F2.headlessUncompress(@in, inpos, inlength, @out, outpos, num, initvalue); }
/*@Override*/ public void uncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) { if (inlength == 0) { return; } /* final */ int outlength = @in[inpos.get()]; inpos.increment(); headlessUncompress(@in, inpos, inlength, @out, outpos, outlength); }
public void uncompress(sbyte[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) { int p = inpos.get(); int initoffset = 0; int finalp = inpos.get() + inlength; int tmpoutpos = outpos.get(); for (int v = 0; p < finalp; @out[tmpoutpos++] = (initoffset = initoffset + v)) { v = @in[p] & 0x7F; if (@in[p] < 0) { p += 1; continue; } v = ((@in[p + 1] & 0x7F) << 7) | v; if (@in[p + 1] < 0) { p += 2; continue; } v = ((@in[p + 2] & 0x7F) << 14) | v; if (@in[p + 2] < 0) { p += 3; continue; } v = ((@in[p + 3] & 0x7F) << 21) | v; if (@in[p + 3] < 0) { p += 4; continue; } v = ((@in[p + 4] & 0x7F) << 28) | v; p += 5; } outpos.set(tmpoutpos); inpos.add(p); }
public void headlessUncompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, int num) { int outlength = Util.greatestMultiple(num, BLOCK_SIZE); int tmpinpos = inpos.get(); int s = outpos.get(); for (; s + BLOCK_SIZE * 4 - 1 < outpos.get() + outlength; s += BLOCK_SIZE * 4) { int mbits1 = (int)((uint)@in[tmpinpos] >> 24); int mbits2 = (int)((uint)@in[tmpinpos] >> 16) & 0xFF; int mbits3 = (int)((uint)@in[tmpinpos] >> 8) & 0xFF; int mbits4 = (int)((uint)@in[tmpinpos]) & 0xFF; ++tmpinpos; BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits1); tmpinpos += mbits1; BitPacking .fastunpack(@in, tmpinpos, @out, s + BLOCK_SIZE, mbits2); tmpinpos += mbits2; BitPacking.fastunpack(@in, tmpinpos, @out, s + 2 * BLOCK_SIZE, mbits3); tmpinpos += mbits3; BitPacking.fastunpack(@in, tmpinpos, @out, s + 3 * BLOCK_SIZE, mbits4); tmpinpos += mbits4; } for (; s < outpos.get() + outlength; s += BLOCK_SIZE) { int mbits = @in[tmpinpos]; ++tmpinpos; BitPacking.fastunpack(@in, tmpinpos, @out, s, mbits); tmpinpos += mbits; } outpos.add(outlength); inpos.set(tmpinpos); }
private static int compress(PerformanceLogger logger, IntegerCODEC codec, int[] src, int[] dst) { IntWrapper inpos = new IntWrapper(); IntWrapper outpos = new IntWrapper(); logger.compressionTimer.start(); codec.compress(src, inpos, src.Length, dst, outpos); logger.compressionTimer.end(); int outSize = outpos.get(); logger.addOriginalSize(src.Length); logger.addCompressedSize(outSize); return(outSize); }
private static void testCodec(IntegerCODEC c, IntegerCODEC co, int[][] data, int max) { int N = data.Length; int maxlength = 0; for (int k = 0; k < N; ++k) { if (data[k].Length > maxlength) { maxlength = data[k].Length; } } int[] buffer = new int[maxlength + 1024]; int[] dataout = new int[4 * maxlength + 1024]; // 4x + 1024 to account for the possibility of some negative // compression. IntWrapper inpos = new IntWrapper(); IntWrapper outpos = new IntWrapper(); for (int k = 0; k < N; ++k) { int[] backupdata = Arrays.copyOf(data[k], data[k].Length); inpos.set(1); outpos.set(0); if (!(c is IntegratedIntegerCODEC)) { Delta.delta(backupdata); } c.compress(backupdata, inpos, backupdata.Length - inpos.get(), dataout, outpos); int thiscompsize = outpos.get() + 1; inpos.set(0); outpos.set(1); buffer[0] = backupdata[0]; co.uncompress(dataout, inpos, thiscompsize - 1, buffer, outpos); if (!(c is IntegratedIntegerCODEC)) { Delta.fastinverseDelta(buffer); } // Check assertions. Assert2.assertEquals(outpos.get(), data[k].Length); //"length is not match" int[] bufferCutout = Arrays.copyOf(buffer, outpos.get()); Assert2.assertArrayEquals(data[k], bufferCutout); //"failed to reconstruct original data" } }
public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) { inLen = inLen - inLen % BLOCK_LENGTH; if (inLen == 0) { return; } outBuf[outPos.get()] = inLen; outPos.increment(); DeltaZigzagEncoding.Encoder ctx = new DeltaZigzagEncoding.Encoder(0); int[] work = new int[BLOCK_LENGTH]; int op = outPos.get(); int ip = inPos.get(); int inPosLast = ip + inLen; for (; ip < inPosLast; ip += BLOCK_LENGTH) { ctx.encodeArray(inBuf, ip, BLOCK_LENGTH, work); int bits1 = Util.maxbits32(work, 0); int bits2 = Util.maxbits32(work, 32); int bits3 = Util.maxbits32(work, 64); int bits4 = Util.maxbits32(work, 96); outBuf[op++] = (bits1 << 24) | (bits2 << 16) | (bits3 << 8) | (bits4 << 0); op += pack(work, 0, outBuf, op, bits1); op += pack(work, 32, outBuf, op, bits2); op += pack(work, 64, outBuf, op, bits3); op += pack(work, 96, outBuf, op, bits4); } inPos.add(inLen); outPos.set(op); }
private static int compressWithSkipTable(object c, int[] data, int[] output, IntWrapper outpos, int[] metadata, int blocksize) { int metapos = 0; metadata[metapos++] = data.Length; IntWrapper inpos = new IntWrapper(); int initvalue = 0; IntWrapper ival = new IntWrapper(initvalue); while (inpos.get() < data.Length) { metadata[metapos++] = outpos.get(); metadata[metapos++] = initvalue; if (c is SkippableIntegerCODEC) { int size = blocksize > data.Length - inpos.get() ? data.Length - inpos.get() : blocksize; initvalue = Delta.delta(data, inpos.get(), size, initvalue); ((SkippableIntegerCODEC)c).headlessCompress(data, inpos, blocksize, output, outpos); } else if (c is SkippableIntegratedIntegerCODEC) { ival.set(initvalue); ((SkippableIntegratedIntegerCODEC)c).headlessCompress(data, inpos, blocksize, output, outpos, ival); initvalue = ival.get(); } else { throw new Exception("Unrecognized codec " + c); } } return(metapos); }
public void headlessDemo() { _testOutputHelper.WriteLine("Compressing arrays with minimal header..."); int[] uncompressed1 = { 1, 2, 1, 3, 1 }; int[] uncompressed2 = { 3, 2, 4, 6, 1 }; int[] compressed = new int[uncompressed1.Length + uncompressed2.Length + 1024]; SkippableIntegerCODEC codec = new SkippableComposition(new BinaryPacking(), new VariableByte()); // compressing IntWrapper outPos = new IntWrapper(); IntWrapper previous = new IntWrapper(); codec.headlessCompress(uncompressed1, new IntWrapper(), uncompressed1.Length, compressed, outPos); int length1 = outPos.get() - previous.get(); previous = new IntWrapper(outPos.get()); codec.headlessCompress(uncompressed2, new IntWrapper(), uncompressed2.Length, compressed, outPos); int length2 = outPos.get() - previous.get(); compressed = Arrays.copyOf(compressed, length1 + length2); _testOutputHelper.WriteLine("compressed unsorted integers from " + uncompressed1.Length * 4 + "B to " + length1 * 4 + "B"); _testOutputHelper.WriteLine("compressed unsorted integers from " + uncompressed2.Length * 4 + "B to " + length2 * 4 + "B"); _testOutputHelper.WriteLine("Total compressed output " + compressed.Length); int[] recovered1 = new int[uncompressed1.Length]; int[] recovered2 = new int[uncompressed1.Length]; IntWrapper inPos = new IntWrapper(); _testOutputHelper.WriteLine("Decoding first array starting at pos = " + inPos); codec.headlessUncompress(compressed, inPos, compressed.Length, recovered1, new IntWrapper(0), uncompressed1.Length); _testOutputHelper.WriteLine("Decoding second array starting at pos = " + inPos); codec.headlessUncompress(compressed, inPos, compressed.Length, recovered2, new IntWrapper(0), uncompressed2.Length); if (!Arrays.equals(uncompressed1, recovered1)) { throw new Exception("First array does not match."); } if (!Arrays.equals(uncompressed2, recovered2)) { throw new Exception("Second array does not match."); } _testOutputHelper.WriteLine("The arrays match, your code is probably ok."); }
public void compress(int[] @in, IntWrapper inpos, int inlength, sbyte[] @out, IntWrapper outpos) { if (inlength == 0) { return; } int initoffset = 0; int outpostmp = outpos.get(); for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { long val = (@in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++ initoffset = @in[k]; if (val < (1 << 7)) { @out[outpostmp++] = (sbyte)(val | (1 << 7)); } else if (val < (1 << 14)) { @out[outpostmp++] = (sbyte)extract7bits(0, val); @out[outpostmp++] = (sbyte)(extract7bitsmaskless(1, (val)) | (1 << 7)); } else if (val < (1 << 21)) { @out[outpostmp++] = (sbyte)extract7bits(0, val); @out[outpostmp++] = (sbyte)extract7bits(1, val); @out[outpostmp++] = (sbyte)(extract7bitsmaskless(2, (val)) | (1 << 7)); } else if (val < (1 << 28)) { @out[outpostmp++] = (sbyte)extract7bits(0, val); @out[outpostmp++] = (sbyte)extract7bits(1, val); @out[outpostmp++] = (sbyte)extract7bits(2, val); @out[outpostmp++] = (sbyte)(extract7bitsmaskless(3, (val)) | (1 << 7)); } else { @out[outpostmp++] = (sbyte)extract7bits(0, val); @out[outpostmp++] = (sbyte)extract7bits(1, val); @out[outpostmp++] = (sbyte)extract7bits(2, val); @out[outpostmp++] = (sbyte)extract7bits(3, val); @out[outpostmp++] = (sbyte)(extract7bitsmaskless(4, (val)) | (1 << 7)); } } outpos.set(outpostmp); inpos.add(inlength); }
public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos, IntWrapper initvalue) { if (inlength == 0) { return; } int init = inpos.get(); F1.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); if (outpos.get() == 0) { @out[0] = 0; outpos.increment(); } inlength -= inpos.get() - init; F2.headlessCompress(@in, inpos, inlength, @out, outpos, initvalue); }
private void encodePage(int[] @in, IntWrapper inpos, int thissize, int[] @out, IntWrapper outpos) { int tmpoutpos = outpos.get(); int tmpinpos = inpos.get(); IntWrapper bestb = new IntWrapper(); IntWrapper bestexcept = new IntWrapper(); for (int finalinpos = tmpinpos + thissize; tmpinpos + BLOCK_SIZE <= finalinpos; tmpinpos += BLOCK_SIZE) { getBestBFromData(@in, tmpinpos, bestb, bestexcept); int tmpbestb = bestb.get(); int nbrexcept = bestexcept.get(); int exceptsize = 0; int remember = tmpoutpos; tmpoutpos++; if (nbrexcept > 0) { int c = 0; for (int i = 0; i < BLOCK_SIZE; ++i) { if ((int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]) != 0) { exceptbuffer[c + nbrexcept] = i; exceptbuffer[c] = (int)((uint)@in[tmpinpos + i] >> bits[tmpbestb]); ++c; } } exceptsize = S9.compress(exceptbuffer, 0, 2 * nbrexcept, @out, tmpoutpos); tmpoutpos += exceptsize; } @out[remember] = tmpbestb | (nbrexcept << 8) | (exceptsize << 16); for (int k = 0; k < BLOCK_SIZE; k += 32) { BitPacking.fastpack(@in, tmpinpos + k, @out, tmpoutpos, bits[tmpbestb]); tmpoutpos += bits[tmpbestb]; } } inpos.set(tmpinpos); outpos.set(tmpoutpos); }
public void compress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) { inLen = inLen - inLen % BLOCK_LENGTH; if (inLen == 0) { return; } outBuf[outPos.get()] = inLen; outPos.increment(); int context = 0; int[] work = new int[32]; int op = outPos.get(); int ip = inPos.get(); int inPosLast = ip + inLen; for (; ip < inPosLast; ip += BLOCK_LENGTH) { int bits1 = xorMaxBits(inBuf, ip + 0, 32, context); int bits2 = xorMaxBits(inBuf, ip + 32, 32, inBuf[ip + 31]); int bits3 = xorMaxBits(inBuf, ip + 64, 32, inBuf[ip + 63]); int bits4 = xorMaxBits(inBuf, ip + 96, 32, inBuf[ip + 95]); outBuf[op++] = (bits1 << 24) | (bits2 << 16) | (bits3 << 8) | (bits4 << 0); op += xorPack(inBuf, ip + 0, outBuf, op, bits1, context, work); op += xorPack(inBuf, ip + 32, outBuf, op, bits2, inBuf[ip + 31], work); op += xorPack(inBuf, ip + 64, outBuf, op, bits3, inBuf[ip + 63], work); op += xorPack(inBuf, ip + 96, outBuf, op, bits4, inBuf[ip + 95], work); context = inBuf[ip + 127]; } inPos.add(inLen); outPos.set(op); }
public void consistentTest() { const int N = 4096; int[] data = new int[N]; int[] rev = new int[N]; for (int k = 0; k < N; ++k) { data[k] = k % 128; } foreach (SkippableIntegerCODEC c in codecs) { _testOutputHelper.WriteLine("[SkippeableBasicTest.consistentTest] codec = " + c); int[] outBuf = new int[N + 1024]; for (int n = 0; n <= N; ++n) { IntWrapper inPos = new IntWrapper(); IntWrapper outPos = new IntWrapper(); c.headlessCompress(data, inPos, n, outBuf, outPos); IntWrapper inPoso = new IntWrapper(); IntWrapper outPoso = new IntWrapper(); c.headlessUncompress(outBuf, inPoso, outPos.get(), rev, outPoso, n); if (outPoso.get() != n) { throw new Exception("bug " + n); } if (inPoso.get() != outPos.get()) { throw new Exception("bug " + n + " " + inPoso.get() + " " + outPos.get()); } for (int j = 0; j < n; ++j) { if (data[j] != rev[j]) { throw new Exception("bug"); } } } } }
public void headlessCompress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) { inlength = Util.greatestMultiple(inlength, BLOCK_SIZE); int tmpoutpos = outpos.get(); int s = inpos.get(); for (; s + BLOCK_SIZE * 4 - 1 < inpos.get() + inlength; s += BLOCK_SIZE * 4) { int mbits1 = Util.maxbits(@in, s, BLOCK_SIZE); int mbits2 = Util.maxbits(@in, s + BLOCK_SIZE, BLOCK_SIZE); int mbits3 = Util.maxbits(@in, s + 2 * BLOCK_SIZE, BLOCK_SIZE); int mbits4 = Util.maxbits(@in, s + 3 * BLOCK_SIZE, BLOCK_SIZE); @out[tmpoutpos++] = (mbits1 << 24) | (mbits2 << 16) | (mbits3 << 8) | (mbits4); BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, mbits1); tmpoutpos += mbits1; BitPacking.fastpackwithoutmask(@in, s + BLOCK_SIZE, @out, tmpoutpos, mbits2); tmpoutpos += mbits2; BitPacking.fastpackwithoutmask(@in, s + 2 * BLOCK_SIZE, @out, tmpoutpos, mbits3); tmpoutpos += mbits3; BitPacking.fastpackwithoutmask(@in, s + 3 * BLOCK_SIZE, @out, tmpoutpos, mbits4); tmpoutpos += mbits4; } for (; s < inpos.get() + inlength; s += BLOCK_SIZE) { int mbits = Util.maxbits(@in, s, BLOCK_SIZE); @out[tmpoutpos++] = mbits; BitPacking.fastpackwithoutmask(@in, s, @out, tmpoutpos, mbits); tmpoutpos += mbits; } inpos.add(inlength); outpos.set(tmpoutpos); }
public void uncompress(int[] inBuf, IntWrapper inPos, int inLen, int[] outBuf, IntWrapper outPos) { if (inLen == 0) { return; } int outLen = inBuf[inPos.get()]; inPos.increment(); int context = 0; int[] work = new int[32]; int ip = inPos.get(); int op = outPos.get(); int outPosLast = op + outLen; for (; op < outPosLast; op += BLOCK_LENGTH) { int bits1 = (int)((uint)inBuf[ip] >> 24); int bits2 = (int)((uint)inBuf[ip] >> 16) & 0xFF; int bits3 = (int)((uint)inBuf[ip] >> 8) & 0xFF; int bits4 = (int)((uint)inBuf[ip] >> 0) & 0xFF; ++ip; ip += xorUnpack(inBuf, ip, outBuf, op + 0, bits1, context, work); ip += xorUnpack(inBuf, ip, outBuf, op + 32, bits2, outBuf[op + 31], work); ip += xorUnpack(inBuf, ip, outBuf, op + 64, bits3, outBuf[op + 63], work); ip += xorUnpack(inBuf, ip, outBuf, op + 96, bits4, outBuf[op + 95], work); context = outBuf[op + 127]; } outPos.add(outLen); inPos.set(ip); }
/** * Standard benchmark * * @param csvLog * Writer for CSV log. * @param c * the codec * @param data * arrays of input data * @param repeat * How many times to repeat the test * @param verbose * whether to output result on screen */ private static void testCodec(StreamWriter csvLog, int sparsity, object c, int[][] data, int repeat, bool verbose) { if (verbose) { Console.WriteLine("# " + c); Console.WriteLine("# bits per int, compress speed (mis), decompression speed (mis) "); } int N = data.Length; int totalSize = 0; int maxLength = 0; for (int k = 0; k < N; ++k) { totalSize += data[k].Length; if (data[k].Length > maxLength) { maxLength = data[k].Length; } } // 4x + 1024 to account for the possibility of some negative // compression. int[] compressBuffer = new int[4 * maxLength + 1024]; int[] decompressBuffer = new int[maxLength + 1024]; int[] metadataBuffer = new int[maxLength]; const int blocksize = 1024; // These variables hold time in microseconds (10^-6). double compressTime = 0; double decompressTime = 0; const int times = 5; int size = 0; for (int r = 0; r < repeat; ++r) { size = 0; for (int k = 0; k < N; ++k) { int[] backupdata = Arrays.copyOf(data[k], data[k].Length); // compress data. long beforeCompress = Port.System.nanoTime() / 1000; IntWrapper outpos = new IntWrapper(); compressWithSkipTable(c, backupdata, compressBuffer, outpos, metadataBuffer, blocksize); long afterCompress = Port.System.nanoTime() / 1000; // measure time of compression. compressTime += afterCompress - beforeCompress; int thiscompsize = outpos.get(); size += thiscompsize; // dry run int volume = 0; { IntWrapper compressedpos = new IntWrapper(0); volume = decompressFromSkipTable(c, compressBuffer, compressedpos, metadataBuffer, blocksize, decompressBuffer); // let us check the answer if (volume != backupdata.Length) { throw new Exception( "Bad output size with codec " + c); } for (int j = 0; j < volume; ++j) { if (data[k][j] != decompressBuffer[j]) { throw new Exception("bug in codec " + c); } } } // extract (uncompress) data long beforeDecompress = Port.System.nanoTime() / 1000; for (int t = 0; t < times; ++t) { IntWrapper compressedpos = new IntWrapper(0); volume = decompressFromSkipTable(c, compressBuffer, compressedpos, metadataBuffer, blocksize, decompressBuffer); } long afterDecompress = Port.System.nanoTime() / 1000; // measure time of extraction (uncompression). decompressTime += (afterDecompress - beforeDecompress) / (double)times; if (volume != data[k].Length) { throw new Exception("we have a bug (diff length) " + c + " expected " + data[k].Length + " got " + volume); } // verify: compare original array with // compressed and // uncompressed. for (int m = 0; m < outpos.get(); ++m) { if (decompressBuffer[m] != data[k][m]) { throw new Exception( "we have a bug (actual difference), expected " + data[k][m] + " found " + decompressBuffer[m] + " at " + m); } } } } if (verbose) { double bitsPerInt = size * 32.0 / totalSize; double compressSpeed = Math.Round(totalSize * repeat / (compressTime)); double decompressSpeed = Math.Round(totalSize * repeat / (decompressTime)); Console.WriteLine("\t{0:0.00}\t{1}\t{2}", bitsPerInt, compressSpeed, decompressSpeed); csvLog.WriteLine("\"{0}\",{1},{2:0.00},{3},{4}", c, sparsity, bitsPerInt, compressSpeed, decompressSpeed); } }
public void advancedExample() { const int TotalSize = 2342351; // some arbitrary number const int ChunkSize = 16384; // size of each chunk, choose a multiple of 128 Console.WriteLine("Compressing " + TotalSize + " integers using chunks of " + ChunkSize + " integers (" + ChunkSize * 4 / 1024 + "KB)"); Console.WriteLine("(It is often better for applications to work in chunks fitting in CPU cache.)"); int[] data = new int[TotalSize]; // data should be sorted for best // results for (int k = 0; k < data.Length; ++k) { data[k] = k; } // next we compose a CODEC. Most of the processing // will be done with binary packing, and leftovers will // be processed using variable byte, using variable byte // only for the last chunk! IntegratedIntegerCODEC regularcodec = new IntegratedBinaryPacking(); IntegratedVariableByte ivb = new IntegratedVariableByte(); IntegratedIntegerCODEC lastcodec = new IntegratedComposition(regularcodec, ivb); // output vector should be large enough... int[] compressed = new int[TotalSize + 1024]; /** * * compressing * */ IntWrapper inputoffset = new IntWrapper(0); IntWrapper outputoffset = new IntWrapper(0); for (int k = 0; k < TotalSize / ChunkSize; ++k) { regularcodec.compress(data, inputoffset, ChunkSize, compressed, outputoffset); } lastcodec.compress(data, inputoffset, TotalSize % ChunkSize, compressed, outputoffset); // got it! // inputoffset should be at data.Length but outputoffset tells // us where we are... Console.WriteLine( "compressed from " + data.Length * 4 / 1024 + "KB to " + outputoffset.intValue() * 4 / 1024 + "KB"); // we can repack the data: compressed = Arrays.copyOf(compressed, outputoffset.intValue()); /** * * now uncompressing * * We are *not* assuming that the original array length is known, * however we assume that the chunk size (ChunkSize) is known. * */ int[] recovered = new int[ChunkSize]; IntWrapper compoff = new IntWrapper(0); IntWrapper recoffset; int currentpos = 0; while (compoff.get() < compressed.Length) { recoffset = new IntWrapper(0); regularcodec.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset); if (recoffset.get() < ChunkSize) {// last chunk detected ivb.uncompress(compressed, compoff, compressed.Length - compoff.get(), recovered, recoffset); } for (int i = 0; i < recoffset.get(); ++i) { if (data[currentpos + i] != recovered[i]) { throw new Exception("bug"); // could use assert } } currentpos += recoffset.get(); } Console.WriteLine("data is recovered without loss"); Console.WriteLine(); }
/** * Standard benchmark * * @param csvLog * Writer for CSV log. * @param c * the codec * @param data * arrays of input data * @param repeat * How many times to repeat the test * @param verbose * whether to output result on screen */ private static void testCodec(StreamWriter csvLog, int sparsity, IntegerCODEC c, int[][] data, int repeat, bool verbose) { if (verbose) { Console.WriteLine("# " + c); Console.WriteLine("# bits per int, compress speed (mis), decompression speed (mis) "); } int N = data.Length; int totalSize = 0; int maxLength = 0; for (int k = 0; k < N; ++k) { totalSize += data[k].Length; if (data[k].Length > maxLength) { maxLength = data[k].Length; } } // 4x + 1024 to account for the possibility of some negative // compression. int[] compressBuffer = new int[4 * maxLength + 1024]; int[] decompressBuffer = new int[maxLength + 1024]; // These variables hold time in microseconds (10^-6). long compressTime = 0; long decompressTime = 0; int size = 0; IntWrapper inpos = new IntWrapper(); IntWrapper outpos = new IntWrapper(); for (int r = 0; r < repeat; ++r) { size = 0; for (int k = 0; k < N; ++k) { int[] backupdata = Arrays.copyOf(data[k], data[k].Length); // compress data. long beforeCompress = Port.System.nanoTime() / 1000; inpos.set(1); outpos.set(0); if (!(c is IntegratedIntegerCODEC)) { Delta.delta(backupdata); } c.compress(backupdata, inpos, backupdata.Length - inpos.get(), compressBuffer, outpos); long afterCompress = Port.System.nanoTime() / 1000; // measure time of compression. compressTime += afterCompress - beforeCompress; int thiscompsize = outpos.get() + 1; size += thiscompsize; // extract (uncompress) data long beforeDecompress = Port.System.nanoTime() / 1000; inpos.set(0); outpos.set(1); decompressBuffer[0] = backupdata[0]; c.uncompress(compressBuffer, inpos, thiscompsize - 1, decompressBuffer, outpos); if (!(c is IntegratedIntegerCODEC)) { Delta.fastinverseDelta(decompressBuffer); } long afterDecompress = Port.System.nanoTime() / 1000; // measure time of extraction (uncompression). decompressTime += afterDecompress - beforeDecompress; if (outpos.get() != data[k].Length) { throw new Exception( "we have a bug (diff length) " + c + " expected " + data[k].Length + " got " + outpos.get()); } // verify: compare original array with // compressed and // uncompressed. for (int m = 0; m < outpos.get(); ++m) { if (decompressBuffer[m] != data[k][m]) { throw new Exception( "we have a bug (actual difference), expected " + data[k][m] + " found " + decompressBuffer[m] + " at " + m); } } } } if (verbose) { double bitsPerInt = size * 32.0 / totalSize; long compressSpeed = totalSize * repeat / (compressTime); long decompressSpeed = totalSize * repeat / (decompressTime); Console.WriteLine("\t{0:0.00}\t{1}\t{2}", bitsPerInt, compressSpeed, decompressSpeed); csvLog.WriteLine("\"{0}\",{1},{2:0.00},{3},{4}", c, sparsity, bitsPerInt, compressSpeed, decompressSpeed); } }
public void compress(int[] @in, IntWrapper inpos, int inlength, int[] @out, IntWrapper outpos) { if (inlength == 0) { return; } int initoffset = 0; ByteBuffer buf = ByteBuffer.allocateDirect(inlength * 8); buf.order(ByteOrder.LITTLE_ENDIAN); for (int k = inpos.get(); k < inpos.get() + inlength; ++k) { long val = (@in[k] - initoffset) & 0xFFFFFFFFL; // To be consistent with unsigned integers in C/C++ initoffset = @in[k]; if (val < (1 << 7)) { buf.put((sbyte)(val | (1 << 7))); } else if (val < (1 << 14)) { buf.put((sbyte)extract7bits(0, val)); buf.put((sbyte)(extract7bitsmaskless(1, (val)) | (1 << 7))); } else if (val < (1 << 21)) { buf.put((sbyte)extract7bits(0, val)); buf.put((sbyte)extract7bits(1, val)); buf.put((sbyte)(extract7bitsmaskless(2, (val)) | (1 << 7))); } else if (val < (1 << 28)) { buf.put((sbyte)extract7bits(0, val)); buf.put((sbyte)extract7bits(1, val)); buf.put((sbyte)extract7bits(2, val)); buf.put((sbyte)(extract7bitsmaskless(3, (val)) | (1 << 7))); } else { buf.put((sbyte)extract7bits(0, val)); buf.put((sbyte)extract7bits(1, val)); buf.put((sbyte)extract7bits(2, val)); buf.put((sbyte)extract7bits(3, val)); buf.put((sbyte)(extract7bitsmaskless(4, (val)) | (1 << 7))); } } while (buf.position() % 4 != 0) { buf.put((sbyte)0); } int length = buf.position(); buf.flip(); IntBuffer ibuf = buf.asIntBuffer(); ibuf.get(@out, outpos.get(), length / 4); outpos.add(length / 4); inpos.add(inlength); }
public static void bytebench(List <int[]> postings, CompressionMode cm, bool verbose) { int maxlength = 0; foreach (int[] x in postings) { if (maxlength < x.Length) { maxlength = x.Length; } } if (verbose) { Console.WriteLine("Max array length: " + maxlength); } sbyte[] compbuffer = new sbyte[6 * (maxlength + 1024)]; int[] decompbuffer = new int[maxlength]; if (verbose) { Console.WriteLine("Scheme -- bits/int -- speed (mis)"); } foreach (ByteIntegerCODEC c in (cm == CompressionMode.DELTA ? bcodecs : regbcodecs)) { long bef = 0; long aft = 0; long decomptime = 0; long volumein = 0; long volumeout = 0; sbyte[][] compdata = new sbyte[postings.Count][]; for (int k = 0; k < postings.Count; ++k) { int[] @in = postings[k]; IntWrapper inpos = new IntWrapper(0); IntWrapper outpos = new IntWrapper(0); c.compress(@in, inpos, @in.Length, compbuffer, outpos); int clength = outpos.get(); inpos = new IntWrapper(0); outpos = new IntWrapper(0); c.uncompress(compbuffer, inpos, clength, decompbuffer, outpos); volumein += @in.Length; volumeout += clength; if (outpos.get() != @in.Length) { throw new Exception("bug"); } for (int z = 0; z < @in.Length; ++z) { if (@in[z] != decompbuffer[z]) { throw new Exception( "bug"); } } compdata[k] = Arrays .copyOf(compbuffer, clength); } bef = Port.System.nanoTime(); foreach (sbyte[] cin in compdata) { IntWrapper inpos = new IntWrapper(0); IntWrapper outpos = new IntWrapper(0); c.uncompress(cin, inpos, cin.Length, decompbuffer, outpos); if (inpos.get() != cin.Length) { throw new Exception("bug"); } } aft = Port.System.nanoTime(); decomptime += (aft - bef); double bitsPerInt = volumeout * 8.0 / volumein; double decompressSpeed = volumein * 1000.0 / (decomptime); if (verbose) { Console.WriteLine(c + "\t" + string.Format("\t{0:0.00}\t{1:0.00}", bitsPerInt, decompressSpeed)); } } }