public override uint DecompressSymbol(Bitstream bitstream) { // items for walking the table uint accumulator = 0; // store bits read in until matches a codeword uint firstCodewordOnRow = 0; // first codeword on the current table entry // read min number of bits for (uint i = 0; i < state.MinCodewordLength; ++i) { Trace.Assert(accumulator < 0x80000000); // else will overflow! accumulator = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits firstCodewordOnRow <<= 1; } uint symbol = 0; bool symbolFound = false; if (Options.HasFlag(OptionFlags.UseLowMemoryDecoding)) { uint tableIndex = state.TablePosition; // decoding table starts here while (!symbolFound) { uint numberOfCodes = state.Bitstream.ReadFrom(ref tableIndex, state.BitsPerCodelengthCount); if (numberOfCodes > 0 && accumulator - firstCodewordOnRow < numberOfCodes) { uint itemIndex = accumulator - firstCodewordOnRow; tableIndex += itemIndex * state.BitsPerSymbol; symbol = state.Bitstream.ReadFrom(ref tableIndex, state.BitsPerSymbol); symbolFound = true; } else { firstCodewordOnRow += numberOfCodes; Trace.Assert(accumulator < 0x80000000); // else will overflow! accumulator = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits firstCodewordOnRow <<= 1; // next entry tableIndex += numberOfCodes * state.BitsPerSymbol; } } } else { int tblRow = 0; // only needed for low memory decoding while (!symbolFound) { uint numberOfCodes = state.Table[tblRow].Item1; if (numberOfCodes > 0 && accumulator - firstCodewordOnRow < numberOfCodes) { uint itemIndex = accumulator - firstCodewordOnRow; symbol = state.Table[tblRow].Item2[(int)itemIndex]; symbolFound = true; } else { firstCodewordOnRow += numberOfCodes; Trace.Assert(accumulator < 0x80000000); // else will overflow! accumulator = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits firstCodewordOnRow <<= 1; // next entry ++tblRow; } } } if (Options.HasFlag(OptionFlags.DumpDecoding)) { Write($"{symbol:X2},"); } return(symbol); }
/// <summary> /// Decompress a symbol in the compression algorithm /// </summary> /// <param name="bitstream"></param> public override uint DecompressSymbol(Bitstream bitstream) { return(bitstream.Read(BitsPerSymbol)); }
/// <summary> /// Finish the stream /// </summary> /// <param name="bitstream"></param> public override void ReadFooter(Bitstream bitstream) { }
/// <summary> /// Compress a symbol in the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="symbol"></param> public override void CompressSymbol(Bitstream bitstream, uint symbol) { bitstream.Write(symbol, BitsPerSymbol); }
/// <summary> /// Finish the stream /// </summary> /// <param name="bitstream"></param> public override void WriteFooter(Bitstream bitstream) { }
public void InsertStream(uint insertPosition, Bitstream bitstream) { bits.InsertRange((int)insertPosition, bitstream.bits); // todo - position update? }
/// <summary> /// Create the frequency table as a bitsteam for ease of use/testing /// </summary> /// <returns></returns> Bitstream MakeFrequencyTable() { var bs = new Bitstream(); // write freq tables uint maxCount = counts.Max(); uint minCount = counts.Where(c => c > 0).Min(); #if true // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus only full type. Format is // - symbol min index used, max index used, Lomont1 universal coded. // - Number of bits in table, Lomont1 universal coded (allows jumping past) // - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0); UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0); var fullTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()), (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0) ); UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0); bs.WriteStream(fullTableBs); if (Options.HasFlag(OptionFlags.DumpHeader)) { WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}"); } #else // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus // - symbol min index used + 1, max index used + 1, EliasDelta coded. // - bit denoting table type 0 (full) or 1 (sparse) // - Number of bits in table + 1, elias delta coded (allows jumping past) // 0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries // 1 = sparse table. // Elias delta for number of counts in table + 1 (same as number of symbols) // Elias delta for bitlength of counts + 1, // BASC counts, // BASC symbols present // - table // compute two table lengths: uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } // common header UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1); UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1); var fullTableBs = new Bitstream(); var sparseTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray() )); var nonzeroCountIndices = counts.Select((c, n) => new { val = c, pos = n }) .Where(p => p.val > 0) .Select(p => (uint)p.pos) .ToArray(); var nonzeroCounts = counts.Where(c => c > 0).ToArray(); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); var tempBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts)); uint sparseMidPos = tempBs.Position; UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1); sparseTableBs.WriteStream(tempBs); UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices)); Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}"); // now finish table if (fullTableBs.Length < sparseTableBs.Length) { bs.Write(0); // full table UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1); bs.WriteStream(fullTableBs); } else { bs.Write(1); // sparse table UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1); bs.WriteStream(sparseTableBs); } // var cc = new CompressionChecker(); // cc.TestAll("arith",new Datastream(counts)); // all // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero // BASC wins these tests // #if false var allDs = new Datastream(); var nonzeroDs = new Datastream(); for (var i = 0U; i < counts.Length; ++i) { var index = i;//(uint)(counts.Length - 1 - i); allDs.Add(index); if (counts[i] != 0) { nonzeroDs.Add(index); } } var allBs = new Bitstream(); var nonzeroBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs); UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs); Console.WriteLine($"Arith all {allBs.Length} in "); Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in "); //foreach (var c in counts) // UniversalCodec.OneParameterCodeDelegate( //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256); //bs = ans.Item1; // 912 gamma // 918 elias delta // 988 Omega // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts)); // 1265 best Golomb #endif #endif if (Options.HasFlag(OptionFlags.DumpTable)) { WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}"); for (var i = 0; i < counts.Length; ++i) { if (counts[i] != 0) { Write($"[{i},{counts[i]}] "); } } WriteLine(); } return(bs); }
// lookup symbol and probability range using table decoding uint LookupLowMemoryCount(Bitstream bitstream, uint cumCount, out uint lowCount, out uint highCount) { // BASC encoded, decode with same process // todo - merge with BASC Codec version, make cleaner // swap bit positions to access table uint tempPosition = bitstream.Position; // save this bitstream.Position = tableStartBitPosition; lowCount = highCount = 0; uint symbol = 0; uint length = UniversalCodec.Lomont.DecodeLomont1(bitstream, 6, 0); if (length != 0) { uint b1 = UniversalCodec.Lomont.DecodeLomont1(bitstream, 6, 0); uint xi = bitstream.Read(b1); lowCount = 0; highCount = xi; symbol = symbolMin; uint i = symbolMin; while (highCount <= cumCount) { var decision = bitstream.Read(1); if (decision == 0) { // bi is <= b(i-1), so enough bits xi = bitstream.Read(b1); } else { // bi is bigger than b(i-1), must increase it uint delta = 0; do { decision = bitstream.Read(1); delta++; } while (decision != 0); b1 += delta; xi = bitstream.Read(b1 - 1); // xi has implied leading 1 xi |= 1U << (int)(b1 - 1); } b1 = BitsRequired(xi); lowCount = highCount; highCount += xi; ++i; if (xi != 0) { symbol = i; } } } // restore bit position bitstream.Position = tempPosition; return(symbol); }
/// <summary> /// Compress a symbol in the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="symbol"></param> public override void CompressSymbol(Bitstream bitstream, uint symbol) { // due to how this format is stored, all work was done in the header call }
/// <summary> /// Write the header for the compression algorithm /// </summary> /// <param name="bitstream"></param> /// <param name="data"></param> /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param> /// <returns></returns> public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags) { // erase data streams decisions.Clear(); decisionRuns.Clear(); literals.Clear(); distances.Clear(); lengths.Clear(); tokens.Clear(); // fill in all the data streams uint actualMinLength, actualMaxDistance; ComputeStreams(data, out actualMinLength, out actualMaxDistance); // due to the vagaries of this format, we write the entire file in the header call, // and unfortunately ignore the encode symbol and footer sections // dump info to help analyze if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("LZCL compress:"); WriteLine($" Data length {data.Count} "); } if (Options.HasFlag(OptionFlags.ShowTallies)) { // some info to help make analyze and make decisions Write("Length tally: "); Tally(lengths); WriteLine(); Write("Distance tally: "); Tally(distances); WriteLine(); } // get compressed streams so we can decide what to output var decisionChoice = GetBestCompressor("decisions", decisions); var decisionRunsChoice = GetBestCompressor("decision runs", decisionRuns); var literalsChoice = GetBestCompressor("literals", literals); var tokensChoice = GetBestCompressor("tokens", tokens); var distancesChoice = GetBestCompressor("distances", distances); var lengthsChoice = GetBestCompressor("lengths", lengths); // write header values Header.WriteUniversalHeader(bitstream, data, headerFlags); // save max distance occurring, used to encode tokens, very useful to users to know window needed size UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMaxDistance, 10, 0); UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMinLength, 2, 0); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"actual min length {actualMinLength}"); } if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine($"Max distance {actualMaxDistance}"); } if (decisionChoice.Item2.Length < decisionRunsChoice.Item2.Length) { // denote choice bitstream.Write(0); // save item WriteItem(bitstream, decisionChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Decisions smaller than decision runs"); } StatRecorder.AddStat($"codec used: decisions {decisionChoice.Item1.Name}", 1); } else { // denote choice bitstream.Write(1); // save initial value bitstream.Write(decisions[0]); // save item WriteItem(bitstream, decisionRunsChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Decisions runs smaller than decisions"); } StatRecorder.AddStat($"codec used: decision runs {decisionRunsChoice.Item1.Name}", 1); } // literals WriteItem(bitstream, literalsChoice); StatRecorder.AddStat($"codec used: literals {literalsChoice.Item1.Name}", 1); // tokens or separate distance, length pairs if (tokensChoice.Item2.Length < distancesChoice.Item2.Length + lengthsChoice.Item2.Length) { // denote choice bitstream.Write(0); // save item WriteItem(bitstream, tokensChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Tokens smaller than distance,length pairs"); } StatRecorder.AddStat($"codec used: tokens {tokensChoice.Item1.Name}", 1); } else { // denote choice bitstream.Write(1); // save items WriteItem(bitstream, distancesChoice); WriteItem(bitstream, lengthsChoice); if (Options.HasFlag(OptionFlags.DumpDebug)) { WriteLine("Distance,length pairs smaller than tokens"); } StatRecorder.AddStat($"codec used: distances {distancesChoice.Item1.Name}", 1); StatRecorder.AddStat($"codec used: lengths {lengthsChoice.Item1.Name}", 1); } }