Пример #1
0
        // todo
        void DecodeTable(Bitstream bs)
        {
            // Table thus only full type. Format is
            //   - symbol min index used, max index used, Lomont1 universal coded.
            //   - Number of bits in table, lomont1 universal coded (allows jumping past)
            //   - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries
            symbolMin = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0);
            symbolMax = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0);
            uint tableBitLength = UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0);

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine($"Arith decode: min symb index {symbolMin} max symb index {symbolMax} tbl bits {tableBitLength}");
            }

            if (Options.HasFlag(OptionFlags.UseLowMemoryDecoding))
            {
                tableStartBitPosition = bs.Position;
                bs.Position          += tableBitLength; // skip table
                return;
            }
            // else decode table and use memory based decoding
            var counts1 = UniversalCodec.BinaryAdaptiveSequentialDecode(bs, b => UniversalCodec.Lomont.DecodeLomont1(bs, 6, 0));

            counts = new uint[symbolMax + 1];
            for (var i = symbolMin; i <= symbolMax; ++i)
            {
                counts[i] = counts1[(int)(i - symbolMin)];
            }
            MakeSums();
        }
Пример #2
0
        /// <summary>
        /// try various compression on the data,
        /// return list of compression results (bitLength, type, optional parameters)
        /// </summary>
        /// <param name="statPrefix"></param>
        /// <param name="data"></param>
        /// <param name="headerFlags"></param>
        /// <returns></returns>
        public List <Result> TestAll(string statPrefix, Datastream data, Header.HeaderFlags headerFlags)
        {
            var results = new List <Result>();

            // perform compression algorithm
            Action <string, CodecBase, Type> tryCodec = (label, codec, codecType) =>
            {
                var bitstream = codec.CompressToStream(data, headerFlags);
                var result    = new Result(label, bitstream.Length, codecType);
                if (codec.GetType() == typeof(GolombCodec))
                {
                    var g = codec as GolombCodec;
                    if (g != null)
                    {
                        result.Parameters.Add(g.Parameter);
                        result.CompressorName += $"({g.Parameter})";
                    }
                }
                results.Add(result);
            };

            // try compression algorithms
            if (Options.HasFlag(OptionFlags.UseFixed))
            {
                tryCodec("Fixed size", new FixedSizeCodec(), typeof(FixedSizeCodec));
            }
            if (Options.HasFlag(OptionFlags.UseArithmetic))
            {
                tryCodec("Arithmetic", new ArithmeticCodec(), typeof(ArithmeticCodec));
            }
            if (Options.HasFlag(OptionFlags.UseHuffman))
            {
                tryCodec("Huffman", new HuffmanCodec(), typeof(HuffmanCodec));
            }
            if (Options.HasFlag(OptionFlags.UseGolomb) && data.Max() < GolombCodec.GolombThreshold)
            {
                tryCodec("Golomb", new GolombCodec(), typeof(GolombCodec));
            }

/*            // try Golomb encoding
 *          if (Options.HasFlag(OptionFlags.UseGolomb))
 *          {
 *              var bestg = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode, data, 1, Math.Min(data.Max(), 256));
 *              var bitstream = bestg.Item1;
 *              var gname = $"Golomb({bestg.Item2,2})";
 *              results.Add(new Result(gname, bitstream.Length, typeof(GolombCodec), bestg.Item2));
 *              //results.Add(new Result(gname,bitstream.Length, typeof(UniversalCodec.Golomb), bestg.Item2));
 *          } */

            Action <string, UniversalCodec.UniversalCodeDelegate, Type> tryEncoder = (label, codec, codecType) =>
            {
                var bitstream = UniversalCodec.CompressStream(codec, data.Select(v => v + 1).ToList());
                results.Add(new Result(label, bitstream.Length, codecType));
            };

            // try Elias codes - all perform poorly - todo - need way to pass this back as type?
            if (Options.HasFlag(OptionFlags.UseEliasDelta))
            {
                tryEncoder("EliasDelta", UniversalCodec.Elias.EncodeDelta, typeof(UniversalCodec.Elias));
            }
            if (Options.HasFlag(OptionFlags.UseEliasGamma))
            {
                tryEncoder("EliasGamma", UniversalCodec.Elias.EncodeGamma, typeof(UniversalCodec.Elias));
            }
            if (Options.HasFlag(OptionFlags.UseEliasOmega))
            {
                tryEncoder("EliasOmega", UniversalCodec.Elias.EncodeOmega, typeof(UniversalCodec.Elias));
            }

            // Stout
            if (Options.HasFlag(OptionFlags.UseStout))
            {
                tryEncoder("Stout", (b, v) => UniversalCodec.Stout.Encode(b, v, 3), typeof(UniversalCodec.Stout));
            }

            // BinaryAdaptiveSequentialEncode
            if (Options.HasFlag(OptionFlags.UseBasc))
            {
                var bitstream = new Bitstream();
                UniversalCodec.BinaryAdaptiveSequentialEncode(bitstream, data, UniversalCodec.Elias.EncodeDelta);
                var label = "BASC";
                results.Add(new Result(label, bitstream.Length, typeof(UniversalCodec)));
            }

            // save stats
            foreach (var result in results)
            {
                StatRecorder.AddStat(statPrefix + "_" + result.CompressorName, result.CompressedBitLength);
            }

            return(results);
        }
Пример #3
0
        /// <summary>
        /// Create the frequency table as a bitsteam for ease of use/testing
        /// </summary>
        /// <returns></returns>
        Bitstream MakeFrequencyTable()
        {
            var bs = new Bitstream();
            // write freq tables
            uint maxCount = counts.Max();
            uint minCount = counts.Where(c => c > 0).Min();

#if true
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus only full type. Format is
            //   - symbol min index used, max index used, Lomont1 universal coded.
            //   - Number of bits in table, Lomont1 universal coded (allows jumping past)
            //   - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries

            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }

            UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0);
            UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0);

            var fullTableBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()),
                                                          (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0)
                                                          );
            UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0);
            bs.WriteStream(fullTableBs);

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}");
            }
#else
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus
            //   - symbol min index used + 1, max index used + 1, EliasDelta coded.
            //   - bit denoting table type 0 (full) or 1 (sparse)
            //   - Number of bits in table + 1, elias delta coded (allows jumping past)
            //     0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries
            //     1 = sparse table.
            //         Elias delta for number of counts in table + 1 (same as number of symbols)
            //         Elias delta for bitlength of counts + 1,
            //         BASC counts,
            //         BASC symbols present
            //   - table



            // compute two table lengths:
            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }
            // common header
            UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1);
            UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1);


            var fullTableBs   = new Bitstream();
            var sparseTableBs = new Bitstream();

            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()
                                                              ));

            var nonzeroCountIndices =
                counts.Select((c, n) => new { val = c, pos = n })
                .Where(p => p.val > 0)
                .Select(p => (uint)p.pos)
                .ToArray();
            var nonzeroCounts = counts.Where(c => c > 0).ToArray();

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));
            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));

            var tempBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts));
            uint sparseMidPos = tempBs.Position;

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1);
            sparseTableBs.WriteStream(tempBs);

            UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices));

            Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}");


            // now finish table
            if (fullTableBs.Length < sparseTableBs.Length)
            {
                bs.Write(0); // full table
                UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1);

                bs.WriteStream(fullTableBs);
            }
            else
            {
                bs.Write(1); // sparse table
                UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1);

                bs.WriteStream(sparseTableBs);
            }



            // var cc = new CompressionChecker();
            // cc.TestAll("arith",new Datastream(counts)); // all
            // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero
            // BASC wins these tests
            //

#if false
            var allDs     = new Datastream();
            var nonzeroDs = new Datastream();
            for (var i = 0U; i < counts.Length; ++i)
            {
                var index = i;//(uint)(counts.Length - 1 - i);
                allDs.Add(index);
                if (counts[i] != 0)
                {
                    nonzeroDs.Add(index);
                }
            }

            var allBs     = new Bitstream();
            var nonzeroBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs);
            UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs);
            Console.WriteLine($"Arith all {allBs.Length} in ");
            Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in ");

            //foreach (var c in counts)
            //    UniversalCodec.OneParameterCodeDelegate(
            //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256);
            //bs = ans.Item1;
            // 912 gamma
            // 918 elias delta
            // 988 Omega
            // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts));
            // 1265 best Golomb
#endif
#endif
            if (Options.HasFlag(OptionFlags.DumpTable))
            {
                WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}");
                for (var i = 0; i < counts.Length; ++i)
                {
                    if (counts[i] != 0)
                    {
                        Write($"[{i},{counts[i]}] ");
                    }
                }
                WriteLine();
            }
            return(bs);
        }