public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { Header.WriteUniversalHeader(bitstream, data, headerFlags); if (data.Count == 0) { return; } // make code tree var tree = MakeTree(data); // walk tree assigning codewords AssignBitStrings(tree); // get leaf nodes, which are the symbols // after this the rest of the tree is not needed leaves.Clear(); GetLeaves(tree); MakeCanonical(leaves); // relabel codewords into canonical ordering // write symbol table // must have canonical labeled leaves bitstream.WriteStream(MakeTable(leaves)); // prepare to add rest of data if (Options.HasFlag(OptionFlags.DumpEncoding)) { WriteLine("Huff encode: ["); } }
public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { var header = Header.ReadUniversalHeader(bitstream, headerFlags); Parameter = UniversalCodec.Lomont.DecodeLomont1(bitstream, Lomont1Parameter, 0); return(header.Item1); // total items }
// end encoding notes http://www3.sympatico.ca/mt0000/biacode/biacode.html // and http://bijective.dogma.net/compres10.htm /// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // count occurrences to get probabilities counts = new uint[data.Max() + 1]; foreach (var b in data) { counts[b]++; } total = (uint)data.Count; // total frequency MakeSums(); ResetCoder(); // arithmetic gets total probability from the header, so ensure it gets saved headerFlags |= Header.HeaderFlags.SymbolCount; Header.WriteUniversalHeader(bitstream, data, headerFlags); // we'll insert the bitlength of what follows at this spot during the footer whereToInsertBitlength = bitstream.Position; // write freq tables var tables = MakeFrequencyTable(); bitstream.WriteStream(tables); if (Options.HasFlag(OptionFlags.DumpState)) { Write("Enc: "); } }
/// <summary> /// Read the header for the compression algorithm /// Return number of symbols in stream if known, else 0 if not present /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { ResetCoder(); // arithmetic gets total probability from the header, so... headerFlags |= Header.HeaderFlags.SymbolCount; var header = Header.ReadUniversalHeader(bitstream, headerFlags); total = header.Item1; bitLength = UniversalCodec.Lomont.DecodeLomont1(bitstream, 8, -1); bitsRead = 0; //Console.WriteLine($"Arith decode bitsize {bitLength}"); var tempPos = bitstream.Position; DecodeTable(bitstream); bitsRead = bitstream.Position - tempPos; // start buffer with 31 bits buffer = 0; for (var i = 0; i < 31; ++i) { buffer = (buffer << 1) | ReadBit(bitstream); } if (Options.HasFlag(OptionFlags.DumpState)) { Write("Dec: "); } return(total); }
/// <summary> /// Read the header for the compression algorithm /// Return number of symbols in stream if known, else 0 if not present /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { var header = Header.ReadUniversalHeader(bitstream, headerFlags | Header.HeaderFlags.BitsPerSymbol); uint symbolCount = header.Item1; BitsPerSymbol = header.Item2; return(symbolCount); }
/// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // always store bps also Header.WriteUniversalHeader(bitstream, data, headerFlags | Header.HeaderFlags.BitsPerSymbol); var max = data.Any() ? data.Max() : 0; BitsPerSymbol = BitsRequired(max); }
/// <summary> /// Stream version to keep from 0 padding last byte /// </summary> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public Bitstream CompressToStream(Datastream data, Header.HeaderFlags headerFlags) { var bitstream = new Bitstream(); WriteHeader(bitstream, data, headerFlags); foreach (var symbol in data) { CompressSymbol(bitstream, symbol); } WriteFooter(bitstream); return(bitstream); }
/// <summary> /// Stream version /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags"></param> /// <returns></returns> public Datastream DecompressFromStream(Bitstream bitstream, Header.HeaderFlags headerFlags) { uint symbolCount = ReadHeader(bitstream, headerFlags); var datastream = new Datastream(); for (var i = 0U; i < symbolCount; ++i) { datastream.Add(DecompressSymbol(bitstream)); } ReadFooter(bitstream); return(datastream); }
/// <summary> /// Compress the data in the streams. /// </summary> /// <returns></returns> private void PackHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // actual value extremes occurring in data streams encoderState.ActualMaxDistance = distances.Any() ? distances.Max() : 0; encoderState.ActualMinLength = lengths.Any() ? lengths.Min() : 0; // token (distance,length) is encoded as // (length - actualMinLength) * (actualMaxDistance+1) + distance // compute largest occurring token value uint actualMaxToken = 0; for (var i = 0; i < distances.Count; ++i) { uint length = lengths[i]; uint distance = distances[i]; uint token = (length - encoderState.ActualMinLength) * (encoderState.ActualMaxDistance + 1) + distance; actualMaxToken = Math.Max(actualMaxToken, token); } // bit sizes encoderState.ActualBitsPerSymbol = BitsRequired(literals.Max()); encoderState.ActualBitsPerToken = BitsRequired(actualMaxToken); //some info to help analyze if (Options.HasFlag(OptionFlags.DumpHeader)) { //WriteLine("LZ77 compress:"); //WriteLine($" Data length {data.Count} "); //WriteLine($" Bits per symbol {encoderState.actualBitsPerSymbol} "); //WriteLine($" Bits per token {encoderState.actualBitsPerToken}"); //WriteLine($" Max token {actualMaxToken}, {lengths.Count} tokens "); //WriteLine($" Max distance {encoderState.actualMaxDistance} "); } // checks Trace.Assert(0 < encoderState.ActualBitsPerSymbol); // header values Header.WriteUniversalHeader(bitstream, data, headerFlags); UniversalCodec.Lomont.EncodeLomont1(bitstream, encoderState.ActualBitsPerSymbol - 1, 3, 0); // usually 8, -1 => 7, fits in 3 bits UniversalCodec.Lomont.EncodeLomont1(bitstream, encoderState.ActualBitsPerToken - 1, 5, 0); // around 20 or so, depends on parameters UniversalCodec.Lomont.EncodeLomont1(bitstream, encoderState.ActualMinLength, 2, 0); // usually 2 UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMaxToken, 25, -10); // UniversalCodec.Lomont.EncodeLomont1(bitstream, encoderState.ActualMaxDistance, 14, -7); // if (Options.HasFlag(OptionFlags.DumpHeader)) { WriteLine("LZ77 header:"); WriteLine($" datalen {data.Count} bits/symbol {encoderState.ActualBitsPerSymbol}, bits/token {encoderState.ActualBitsPerToken}, minLen {encoderState.ActualMinLength}, max token {actualMaxToken}, max dist {encoderState.ActualMaxDistance}"); } }
public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { Header.WriteUniversalHeader(bitstream, data, headerFlags); if (Options.HasFlag(OptionFlags.Optimize)) { Parameter = Optimize(data, headerFlags); } if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"Golomb encode parameter {Parameter}"); } //Console.WriteLine("Parameter " + Parameter); UniversalCodec.Lomont.EncodeLomont1(bitstream, Parameter, Lomont1Parameter, 0); }
/// <summary> /// Read the header for the compression algorithm /// Return number of symbols in stream if known, else 0 if not present /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { decoderState = new DecoderState(); // read header values var header = Header.ReadUniversalHeader(bitstream, headerFlags); decoderState.SymbolCount = header.Item1; // get max distance occurring, used to encode tokens, very useful to users to know window needed size decoderState.ActualMaxDistance = UniversalCodec.Lomont.DecodeLomont1(bitstream, 10, 0); decoderState.ActualMinLength = UniversalCodec.Lomont.DecodeLomont1(bitstream, 2, 0); // see if decisions or decision runs if (bitstream.Read(1) == 0) { decoderState.DecisionDecoder = ReadItem(bitstream); } else { // read initial value decoderState.InitialValue = bitstream.Read(1); // read item decoderState.DecisionRunDecoder = ReadItem(bitstream); } // literals decoderState.LiteralDecoder = ReadItem(bitstream); // tokens or separate distance, length pairs if (bitstream.Read(1) == 0) { decoderState.TokenDecoder = ReadItem(bitstream); } else { decoderState.DistanceDecoder = ReadItem(bitstream); decoderState.LengthDecoder = ReadItem(bitstream); } return(decoderState.SymbolCount); }
public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { state = new DecompresorState(); bool useLowMemoryDecoding = Options.HasFlag(OptionFlags.UseLowMemoryDecoding); // save stream for internal use state.Bitstream = bitstream; var header = Header.ReadUniversalHeader(bitstream, headerFlags); state.SymbolLength = header.Item1; ParseTable(state, useLowMemoryDecoding); if (!useLowMemoryDecoding) { // dump table for debugging WriteLine("Decode tree: "); var entry = 0; foreach (var row in state.Table) { ++entry; Write($" {entry,3}: {row.Item1,3} -> "); foreach (var s in row.Item2) { Write($"{s,3}, "); } WriteLine(); } } if (Options.HasFlag(OptionFlags.DumpDecoding)) { WriteLine("Huff decode ["); } return(state.SymbolLength); }
/// <summary> /// Read the header for the compression algorithm /// Return number of symbols in stream if known, else 0 if not present /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { var bsd = bitstream; // alias // header values var header = Header.ReadUniversalHeader(bitstream, headerFlags); decoderState.ByteLength = header.Item1; decoderState.ActualBitsPerSymbol = UniversalCodec.Lomont.DecodeLomont1(bsd, 3, 0) + 1; // usually 8, -1 => 7, fits in 3 bits decoderState.ActualBitsPerToken = UniversalCodec.Lomont.DecodeLomont1(bsd, 5, 0) + 1; // around 20 or so, depends on parameters decoderState.ActualMinLength = UniversalCodec.Lomont.DecodeLomont1(bsd, 2, 0); // usually 2 uint actualMaxToken = UniversalCodec.Lomont.DecodeLomont1(bsd, 25, -10); // decoderState.ActualMaxDistance = UniversalCodec.Lomont.DecodeLomont1(bsd, 14, -7); // if (Options.HasFlag(OptionFlags.DumpDebug)) { //some info to help analyze WriteLine("LZ77 decompress:"); WriteLine($" Data length {decoderState.ByteLength} "); WriteLine($" Bits per symbol {decoderState.ActualBitsPerSymbol}"); WriteLine($" Bits per token {decoderState.ActualBitsPerToken}"); WriteLine($" Max token {actualMaxToken}"); WriteLine($" Max distance {decoderState.ActualMaxDistance}"); WriteLine($" Min length {decoderState.ActualMinLength}"); } if (Options.HasFlag(OptionFlags.DumpDebug)) { Write("LZ77 decode stream: "); } decoderState.Reset(); return(decoderState.ByteLength); }
/// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // erase data streams decisions.Clear(); literals.Clear(); distances.Clear(); lengths.Clear(); // fill in various data streams ScanData(data); // put header into stream PackHeader(bitstream, data, headerFlags); // start indices here encoderState.Reset(); if (Options.HasFlag(OptionFlags.DumpEncode)) { Write("LZ77 stream: "); } }
/// <summary> /// Read the header for the compression algorithm /// Return number of symbols in stream if known, else 0 if not present /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public virtual uint ReadHeader(Bitstream bitstream, Header.HeaderFlags headerFlags) { return(0); }
/// <summary> /// try various compression on the data, /// return list of compression results (bitLength, type, optional parameters) /// </summary> /// <param name="statPrefix"></param> /// <param name="data"></param> /// <param name="headerFlags"></param> /// <returns></returns> public List <Result> TestAll(string statPrefix, Datastream data, Header.HeaderFlags headerFlags) { var results = new List <Result>(); // perform compression algorithm Action <string, CodecBase, Type> tryCodec = (label, codec, codecType) => { var bitstream = codec.CompressToStream(data, headerFlags); var result = new Result(label, bitstream.Length, codecType); if (codec.GetType() == typeof(GolombCodec)) { var g = codec as GolombCodec; if (g != null) { result.Parameters.Add(g.Parameter); result.CompressorName += $"({g.Parameter})"; } } results.Add(result); }; // try compression algorithms if (Options.HasFlag(OptionFlags.UseFixed)) { tryCodec("Fixed size", new FixedSizeCodec(), typeof(FixedSizeCodec)); } if (Options.HasFlag(OptionFlags.UseArithmetic)) { tryCodec("Arithmetic", new ArithmeticCodec(), typeof(ArithmeticCodec)); } if (Options.HasFlag(OptionFlags.UseHuffman)) { tryCodec("Huffman", new HuffmanCodec(), typeof(HuffmanCodec)); } if (Options.HasFlag(OptionFlags.UseGolomb) && data.Max() < GolombCodec.GolombThreshold) { tryCodec("Golomb", new GolombCodec(), typeof(GolombCodec)); } /* // try Golomb encoding * if (Options.HasFlag(OptionFlags.UseGolomb)) * { * var bestg = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode, data, 1, Math.Min(data.Max(), 256)); * var bitstream = bestg.Item1; * var gname = $"Golomb({bestg.Item2,2})"; * results.Add(new Result(gname, bitstream.Length, typeof(GolombCodec), bestg.Item2)); * //results.Add(new Result(gname,bitstream.Length, typeof(UniversalCodec.Golomb), bestg.Item2)); * } */ Action <string, UniversalCodec.UniversalCodeDelegate, Type> tryEncoder = (label, codec, codecType) => { var bitstream = UniversalCodec.CompressStream(codec, data.Select(v => v + 1).ToList()); results.Add(new Result(label, bitstream.Length, codecType)); }; // try Elias codes - all perform poorly - todo - need way to pass this back as type? if (Options.HasFlag(OptionFlags.UseEliasDelta)) { tryEncoder("EliasDelta", UniversalCodec.Elias.EncodeDelta, typeof(UniversalCodec.Elias)); } if (Options.HasFlag(OptionFlags.UseEliasGamma)) { tryEncoder("EliasGamma", UniversalCodec.Elias.EncodeGamma, typeof(UniversalCodec.Elias)); } if (Options.HasFlag(OptionFlags.UseEliasOmega)) { tryEncoder("EliasOmega", UniversalCodec.Elias.EncodeOmega, typeof(UniversalCodec.Elias)); } // Stout if (Options.HasFlag(OptionFlags.UseStout)) { tryEncoder("Stout", (b, v) => UniversalCodec.Stout.Encode(b, v, 3), typeof(UniversalCodec.Stout)); } // BinaryAdaptiveSequentialEncode if (Options.HasFlag(OptionFlags.UseBasc)) { var bitstream = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(bitstream, data, UniversalCodec.Elias.EncodeDelta); var label = "BASC"; results.Add(new Result(label, bitstream.Length, typeof(UniversalCodec))); } // save stats foreach (var result in results) { StatRecorder.AddStat(statPrefix + "_" + result.CompressorName, result.CompressedBitLength); } return(results); }
/// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags"></param> /// <returns></returns> public virtual void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { }
uint Optimize(Datastream data, Header.HeaderFlags headerFlags) { // Golomb bitlength seems to be a convex down function of the parameter: // Let non-negative integers a1,a2,a3,..aN, parameter M. b=#bits to encode M, = Floor[log_2 M]+1 // golomb code then qi=Floor[ai/M],ri=ai-Mqi, unary encode qi in qi+1 bits, encode ri in b or b-1 bits using // adaptive code. Then each of these is convex (true?) in M, so sum is convex, so length is convex. // Want best parameter between M=1 (total unary) and M=max{ai} = total fixed encoding // for large ai, unary uses lots of bits, so start at high end, 2^k=M >= max{ai}, divide by 2 until stops decreasing, // then binary search on final range. // todo - writeup optimal selection as blog post var g = new GolombCodec(); g.Options &= ~OptionFlags.Optimize; // disable auto optimizer // function to compute length given the parameter Func <uint, uint> f = m1 => { g.Parameter = m1; var bs = g.CompressToStream(data, headerFlags); var len1 = bs.Length; return(len1); }; Trace.Assert(data.Max() < 0x80000000); // needs to be true to work // start parameters var m = 1U << (int)BitsRequired(data.Max()); var length = f(m); uint oldLength; do { oldLength = length; m /= 2; length = f(m); } while (length < oldLength && m > 1); // now best between length and oldLength, binary search // todo - search Trace.Assert(m > 0); var left = m; var right = 2 * m; var mid = 0U; var a = f(left); while (left <= right) { mid = (left + right) / 2; var c = f(mid); if (c < a) { left = mid + 1; } else { right = mid - 1; } } if (mid == 1) { mid = 2; } // check mid, mid+1, mid-1 uint best; if (f(mid) < f(mid + 1)) { if (f(mid - 1) < f(mid)) { best = mid - 1; } else { best = mid; } } else { best = mid + 1; } if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"Golomb opt {mid} {f(mid-2)} {f(mid-1)} {f(mid)} {f(mid+1)} {f(mid+2)} {f(mid+3)}"); } return(best); }
/// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // erase data streams decisions.Clear(); decisionRuns.Clear(); literals.Clear(); distances.Clear(); lengths.Clear(); tokens.Clear(); // fill in all the data streams uint actualMinLength, actualMaxDistance; ComputeStreams(data, out actualMinLength, out actualMaxDistance); // due to the vagaries of this format, we write the entire file in the header call, // and unfortunately ignore the encode symbol and footer sections // dump info to help analyze if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("LZCL compress:"); WriteLine($" Data length {data.Count} "); } if (Options.HasFlag(OptionFlags.ShowTallies)) { // some info to help make analyze and make decisions Write("Length tally: "); Tally(lengths); WriteLine(); Write("Distance tally: "); Tally(distances); WriteLine(); } // get compressed streams so we can decide what to output var decisionChoice = GetBestCompressor("decisions", decisions); var decisionRunsChoice = GetBestCompressor("decision runs", decisionRuns); var literalsChoice = GetBestCompressor("literals", literals); var tokensChoice = GetBestCompressor("tokens", tokens); var distancesChoice = GetBestCompressor("distances", distances); var lengthsChoice = GetBestCompressor("lengths", lengths); // write header values Header.WriteUniversalHeader(bitstream, data, headerFlags); // save max distance occurring, used to encode tokens, very useful to users to know window needed size UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMaxDistance, 10, 0); UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMinLength, 2, 0); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"actual min length {actualMinLength}"); } if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"Max distance {actualMaxDistance}"); } if (decisionChoice.Item2.Length < decisionRunsChoice.Item2.Length) { // denote choice bitstream.Write(0); // save item WriteItem(bitstream, decisionChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Decisions smaller than decision runs"); } StatRecorder.AddStat($"codec used: decisions {decisionChoice.Item1.Name}", 1); } else { // denote choice bitstream.Write(1); // save initial value bitstream.Write(decisions[0]); // save item WriteItem(bitstream, decisionRunsChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Decisions runs smaller than decisions"); } StatRecorder.AddStat($"codec used: decision runs {decisionRunsChoice.Item1.Name}", 1); } // literals WriteItem(bitstream, literalsChoice); StatRecorder.AddStat($"codec used: literals {literalsChoice.Item1.Name}", 1); // tokens or separate distance, length pairs if (tokensChoice.Item2.Length < distancesChoice.Item2.Length + lengthsChoice.Item2.Length) { // denote choice bitstream.Write(0); // save item WriteItem(bitstream, tokensChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Tokens smaller than distance,length pairs"); } StatRecorder.AddStat($"codec used: tokens {tokensChoice.Item1.Name}", 1); } else { // denote choice bitstream.Write(1); // save items WriteItem(bitstream, distancesChoice); WriteItem(bitstream, lengthsChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Distance,length pairs smaller than tokens"); } StatRecorder.AddStat($"codec used: distances {distancesChoice.Item1.Name}", 1); StatRecorder.AddStat($"codec used: lengths {lengthsChoice.Item1.Name}", 1); } }