Beispiel #1
0
        public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags)
        {
            Header.WriteUniversalHeader(bitstream, data, headerFlags);
            if (data.Count == 0)
            {
                return;
            }

            // make code tree
            var tree = MakeTree(data);

            // walk tree assigning codewords
            AssignBitStrings(tree);

            // get leaf nodes, which are the symbols
            // after this the rest of the tree is not needed
            leaves.Clear();
            GetLeaves(tree);
            MakeCanonical(leaves); // relabel codewords into canonical ordering


            // write symbol table
            // must have canonical labeled leaves
            bitstream.WriteStream(MakeTable(leaves));

            // prepare to add rest of data
            if (Options.HasFlag(OptionFlags.DumpEncoding))
            {
                WriteLine("Huff encode: [");
            }
        }
        // end encoding  notes http://www3.sympatico.ca/mt0000/biacode/biacode.html
        // and http://bijective.dogma.net/compres10.htm

        /// <summary>
        /// Write the header for the compression algorithm
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="data"></param>
        /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param>
        /// <returns></returns>
        public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags)
        {
            // count occurrences to get probabilities
            counts = new uint[data.Max() + 1];
            foreach (var b in data)
            {
                counts[b]++;
            }
            total = (uint)data.Count;  // total frequency
            MakeSums();

            ResetCoder();

            // arithmetic gets total probability from the header, so ensure it gets saved
            headerFlags |= Header.HeaderFlags.SymbolCount;
            Header.WriteUniversalHeader(bitstream, data, headerFlags);

            // we'll insert the bitlength of what follows at this spot during the footer
            whereToInsertBitlength = bitstream.Position;

            // write freq tables
            var tables = MakeFrequencyTable();

            bitstream.WriteStream(tables);

            if (Options.HasFlag(OptionFlags.DumpState))
            {
                Write("Enc: ");
            }
        }
Beispiel #3
0
        Header.HeaderFlags internalFlags = Header.HeaderFlags.None; // todo - make None to save bits

        void WriteItem(Bitstream bitstream, Tuple <Type, Bitstream> item)
        {
            // save type
            var codecType = item.Item1;

            if (codecType == typeof(FixedSizeCodec))
            {
                bitstream.Write(0, 2);
            }
            else if (codecType == typeof(ArithmeticCodec))
            {
                bitstream.Write(1, 2);
            }
            else if (codecType == typeof(HuffmanCodec))
            {
                bitstream.Write(2, 2);
            }
            else if (codecType == typeof(GolombCodec))
            {
                bitstream.Write(3, 2);
            }
            else
            {
                throw new NotImplementedException("Unknown compressor type");
            }
            // save bit size
            UniversalCodec.Lomont.EncodeLomont1(bitstream, item.Item2.Length, 6, 0);
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"Compressor type {codecType.Name}, length {item.Item2.Length}");
            }
            // save stream
            bitstream.WriteStream(item.Item2);
        }
        /// <summary>
        /// Create the frequency table as a bitsteam for ease of use/testing
        /// </summary>
        /// <returns></returns>
        Bitstream MakeFrequencyTable()
        {
            var bs = new Bitstream();
            // write freq tables
            uint maxCount = counts.Max();
            uint minCount = counts.Where(c => c > 0).Min();

#if true
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus only full type. Format is
            //   - symbol min index used, max index used, Lomont1 universal coded.
            //   - Number of bits in table, Lomont1 universal coded (allows jumping past)
            //   - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries

            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }

            UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0);
            UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0);

            var fullTableBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()),
                                                          (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0)
                                                          );
            UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0);
            bs.WriteStream(fullTableBs);

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}");
            }
#else
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus
            //   - symbol min index used + 1, max index used + 1, EliasDelta coded.
            //   - bit denoting table type 0 (full) or 1 (sparse)
            //   - Number of bits in table + 1, elias delta coded (allows jumping past)
            //     0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries
            //     1 = sparse table.
            //         Elias delta for number of counts in table + 1 (same as number of symbols)
            //         Elias delta for bitlength of counts + 1,
            //         BASC counts,
            //         BASC symbols present
            //   - table



            // compute two table lengths:
            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }
            // common header
            UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1);
            UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1);


            var fullTableBs   = new Bitstream();
            var sparseTableBs = new Bitstream();

            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()
                                                              ));

            var nonzeroCountIndices =
                counts.Select((c, n) => new { val = c, pos = n })
                .Where(p => p.val > 0)
                .Select(p => (uint)p.pos)
                .ToArray();
            var nonzeroCounts = counts.Where(c => c > 0).ToArray();

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));
            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));

            var tempBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts));
            uint sparseMidPos = tempBs.Position;

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1);
            sparseTableBs.WriteStream(tempBs);

            UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices));

            Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}");


            // now finish table
            if (fullTableBs.Length < sparseTableBs.Length)
            {
                bs.Write(0); // full table
                UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1);

                bs.WriteStream(fullTableBs);
            }
            else
            {
                bs.Write(1); // sparse table
                UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1);

                bs.WriteStream(sparseTableBs);
            }



            // var cc = new CompressionChecker();
            // cc.TestAll("arith",new Datastream(counts)); // all
            // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero
            // BASC wins these tests
            //

#if false
            var allDs     = new Datastream();
            var nonzeroDs = new Datastream();
            for (var i = 0U; i < counts.Length; ++i)
            {
                var index = i;//(uint)(counts.Length - 1 - i);
                allDs.Add(index);
                if (counts[i] != 0)
                {
                    nonzeroDs.Add(index);
                }
            }

            var allBs     = new Bitstream();
            var nonzeroBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs);
            UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs);
            Console.WriteLine($"Arith all {allBs.Length} in ");
            Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in ");

            //foreach (var c in counts)
            //    UniversalCodec.OneParameterCodeDelegate(
            //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256);
            //bs = ans.Item1;
            // 912 gamma
            // 918 elias delta
            // 988 Omega
            // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts));
            // 1265 best Golomb
#endif
#endif
            if (Options.HasFlag(OptionFlags.DumpTable))
            {
                WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}");
                for (var i = 0; i < counts.Length; ++i)
                {
                    if (counts[i] != 0)
                    {
                        Write($"[{i},{counts[i]}] ");
                    }
                }
                WriteLine();
            }
            return(bs);
        }