// todo void DecodeTable(Bitstream bs) { // Table thus only full type. Format is // - symbol min index used, max index used, Lomont1 universal coded. // - Number of bits in table, lomont1 universal coded (allows jumping past) // - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries symbolMin = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0); symbolMax = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0); uint tableBitLength = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0); if (Options.HasFlag(OptionFlags.DumpHeader)) { WriteLine($"Arith decode: min symb index {symbolMin} max symb index {symbolMax} tbl bits {tableBitLength}"); } if (Options.HasFlag(OptionFlags.UseLowMemoryDecoding)) { tableStartBitPosition = bs.Position; bs.Position += tableBitLength; // skip table return; } // else decode table and use memory based decoding var counts1 = UniversalCodec.BinaryAdaptiveSequentialDecode(bs, b => UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0)); counts = new uint[symbolMax + 1]; for (var i = symbolMin; i <= symbolMax; ++i) { counts[i] = counts1[(int)(i - symbolMin)]; } MakeSums(); }
/// <summary> /// try various compression on the data, /// return list of compression results (bitLength, type, optional parameters) /// </summary> /// <param name="statPrefix"></param> /// <param name="data"></param> /// <param name="headerFlags"></param> /// <returns></returns> public List <Result> TestAll(string statPrefix, Datastream data, Header.HeaderFlags headerFlags) { var results = new List <Result>(); // perform compression algorithm Action <string, CodecBase, Type> tryCodec = (label, codec, codecType) => { var bitstream = codec.CompressToStream(data, headerFlags); var result = new Result(label, bitstream.Length, codecType); if (codec.GetType() == typeof(GolombCodec)) { var g = codec as GolombCodec; if (g != null) { result.Parameters.Add(g.Parameter); result.CompressorName += $"({g.Parameter})"; } } results.Add(result); }; // try compression algorithms if (Options.HasFlag(OptionFlags.UseFixed)) { tryCodec("Fixed size", new FixedSizeCodec(), typeof(FixedSizeCodec)); } if (Options.HasFlag(OptionFlags.UseArithmetic)) { tryCodec("Arithmetic", new ArithmeticCodec(), typeof(ArithmeticCodec)); } if (Options.HasFlag(OptionFlags.UseHuffman)) { tryCodec("Huffman", new HuffmanCodec(), typeof(HuffmanCodec)); } if (Options.HasFlag(OptionFlags.UseGolomb) && data.Max() < GolombCodec.GolombThreshold) { tryCodec("Golomb", new GolombCodec(), typeof(GolombCodec)); } /* // try Golomb encoding * if (Options.HasFlag(OptionFlags.UseGolomb)) * { * var bestg = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode, data, 1, Math.Min(data.Max(), 256)); * var bitstream = bestg.Item1; * var gname = $"Golomb({bestg.Item2,2})"; * results.Add(new Result(gname, bitstream.Length, typeof(GolombCodec), bestg.Item2)); * //results.Add(new Result(gname,bitstream.Length, typeof(UniversalCodec.Golomb), bestg.Item2)); * } */ Action <string, UniversalCodec.UniversalCodeDelegate, Type> tryEncoder = (label, codec, codecType) => { var bitstream = UniversalCodec.CompressStream(codec, data.Select(v => v + 1).ToList()); results.Add(new Result(label, bitstream.Length, codecType)); }; // try Elias codes - all perform poorly - todo - need way to pass this back as type? if (Options.HasFlag(OptionFlags.UseEliasDelta)) { tryEncoder("EliasDelta", UniversalCodec.Elias.EncodeDelta, typeof(UniversalCodec.Elias)); } if (Options.HasFlag(OptionFlags.UseEliasGamma)) { tryEncoder("EliasGamma", UniversalCodec.Elias.EncodeGamma, typeof(UniversalCodec.Elias)); } if (Options.HasFlag(OptionFlags.UseEliasOmega)) { tryEncoder("EliasOmega", UniversalCodec.Elias.EncodeOmega, typeof(UniversalCodec.Elias)); } // Stout if (Options.HasFlag(OptionFlags.UseStout)) { tryEncoder("Stout", (b, v) => UniversalCodec.Stout.Encode(b, v, 3), typeof(UniversalCodec.Stout)); } // BinaryAdaptiveSequentialEncode if (Options.HasFlag(OptionFlags.UseBasc)) { var bitstream = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(bitstream, data, UniversalCodec.Elias.EncodeDelta); var label = "BASC"; results.Add(new Result(label, bitstream.Length, typeof(UniversalCodec))); } // save stats foreach (var result in results) { StatRecorder.AddStat(statPrefix + "_" + result.CompressorName, result.CompressedBitLength); } return(results); }
/// <summary> /// Create the frequency table as a bitsteam for ease of use/testing /// </summary> /// <returns></returns> Bitstream MakeFrequencyTable() { var bs = new Bitstream(); // write freq tables uint maxCount = counts.Max(); uint minCount = counts.Where(c => c > 0).Min(); #if true // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus only full type. Format is // - symbol min index used, max index used, Lomont1 universal coded. // - Number of bits in table, Lomont1 universal coded (allows jumping past) // - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0); UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0); var fullTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()), (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0) ); UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0); bs.WriteStream(fullTableBs); if (Options.HasFlag(OptionFlags.DumpHeader)) { WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}"); } #else // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus // - symbol min index used + 1, max index used + 1, EliasDelta coded. // - bit denoting table type 0 (full) or 1 (sparse) // - Number of bits in table + 1, elias delta coded (allows jumping past) // 0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries // 1 = sparse table. // Elias delta for number of counts in table + 1 (same as number of symbols) // Elias delta for bitlength of counts + 1, // BASC counts, // BASC symbols present // - table // compute two table lengths: uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } // common header UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1); UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1); var fullTableBs = new Bitstream(); var sparseTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray() )); var nonzeroCountIndices = counts.Select((c, n) => new { val = c, pos = n }) .Where(p => p.val > 0) .Select(p => (uint)p.pos) .ToArray(); var nonzeroCounts = counts.Where(c => c > 0).ToArray(); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); var tempBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts)); uint sparseMidPos = tempBs.Position; UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1); sparseTableBs.WriteStream(tempBs); UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices)); Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}"); // now finish table if (fullTableBs.Length < sparseTableBs.Length) { bs.Write(0); // full table UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1); bs.WriteStream(fullTableBs); } else { bs.Write(1); // sparse table UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1); bs.WriteStream(sparseTableBs); } // var cc = new CompressionChecker(); // cc.TestAll("arith",new Datastream(counts)); // all // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero // BASC wins these tests // #if false var allDs = new Datastream(); var nonzeroDs = new Datastream(); for (var i = 0U; i < counts.Length; ++i) { var index = i;//(uint)(counts.Length - 1 - i); allDs.Add(index); if (counts[i] != 0) { nonzeroDs.Add(index); } } var allBs = new Bitstream(); var nonzeroBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs); UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs); Console.WriteLine($"Arith all {allBs.Length} in "); Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in "); //foreach (var c in counts) // UniversalCodec.OneParameterCodeDelegate( //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256); //bs = ans.Item1; // 912 gamma // 918 elias delta // 988 Omega // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts)); // 1265 best Golomb #endif #endif if (Options.HasFlag(OptionFlags.DumpTable)) { WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}"); for (var i = 0; i < counts.Length; ++i) { if (counts[i] != 0) { Write($"[{i},{counts[i]}] "); } } WriteLine(); } return(bs); }