/// <summary> /// try various compression on the data, /// return the best codec and compute the bits saved by it /// </summary> /// <returns></returns> private Tuple <Type, Bitstream> GetBestCompressor( string label, List <uint> data ) { // use this to check each stream var cc = new CompressionChecker { Options = CompressionOptions }; var stream = new Datastream(data); var results = cc.TestAll(label, stream, internalFlags); results.Sort((a, b) => a.CompressedBitLength.CompareTo(b.CompressedBitLength)); var best = results[0]; CodecBase codec; if (best.CompressorType == typeof(FixedSizeCodec)) { codec = new FixedSizeCodec(); } else if (best.CompressorType == typeof(ArithmeticCodec)) { codec = new ArithmeticCodec(); } else if (best.CompressorType == typeof(HuffmanCodec)) { codec = new HuffmanCodec(); } else if (best.CompressorType == typeof(GolombCodec)) { codec = new GolombCodec { Parameter = best.Parameters[0] } } ; else { throw new NotImplementedException("Unknown codec type"); } var bitstream = codec.CompressToStream(stream, internalFlags); var codecName = codec.GetType().Name; StatRecorder.AddStat("codec win: " + label + " " + codecName, 1); StatRecorder.AddStat($"codec win {codecName} saved high ", results.Last().CompressedBitLength - best.CompressedBitLength); if (results.Count > 1) { StatRecorder.AddStat($"codec win {codecName} saved low ", results[1].CompressedBitLength - best.CompressedBitLength); } if (Options.HasFlag(OptionFlags.DumpCompressorSelections)) { WriteLine($"{label} using {codecName}"); } return(new Tuple <Type, Bitstream>(codec.GetType(), bitstream)); }
uint Optimize(Datastream data, Header.HeaderFlags headerFlags) { // Golomb bitlength seems to be a convex down function of the parameter: // Let non-negative integers a1,a2,a3,..aN, parameter M. b=#bits to encode M, = Floor[log_2 M]+1 // golomb code then qi=Floor[ai/M],ri=ai-Mqi, unary encode qi in qi+1 bits, encode ri in b or b-1 bits using // adaptive code. Then each of these is convex (true?) in M, so sum is convex, so length is convex. // Want best parameter between M=1 (total unary) and M=max{ai} = total fixed encoding // for large ai, unary uses lots of bits, so start at high end, 2^k=M >= max{ai}, divide by 2 until stops decreasing, // then binary search on final range. // todo - writeup optimal selection as blog post var g = new GolombCodec(); g.Options &= ~OptionFlags.Optimize; // disable auto optimizer // function to compute length given the parameter Func <uint, uint> f = m1 => { g.Parameter = m1; var bs = g.CompressToStream(data, headerFlags); var len1 = bs.Length; return(len1); }; Trace.Assert(data.Max() < 0x80000000); // needs to be true to work // start parameters var m = 1U << (int)BitsRequired(data.Max()); var length = f(m); uint oldLength; do { oldLength = length; m /= 2; length = f(m); } while (length < oldLength && m > 1); // now best between length and oldLength, binary search // todo - search Trace.Assert(m > 0); var left = m; var right = 2 * m; var mid = 0U; var a = f(left); while (left <= right) { mid = (left + right) / 2; var c = f(mid); if (c < a) { left = mid + 1; } else { right = mid - 1; } } if (mid == 1) { mid = 2; } // check mid, mid+1, mid-1 uint best; if (f(mid) < f(mid + 1)) { if (f(mid - 1) < f(mid)) { best = mid - 1; } else { best = mid; } } else { best = mid + 1; } if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"Golomb opt {mid} {f(mid-2)} {f(mid-1)} {f(mid)} {f(mid+1)} {f(mid+2)} {f(mid+3)}"); } return(best); }