Beispiel #1
0
        public override uint DecompressSymbol(Bitstream bitstream)
        {
            // items for walking the table
            uint accumulator        = 0; // store bits read in until matches a codeword
            uint firstCodewordOnRow = 0; // first codeword on the current table entry

            // read min number of bits
            for (uint i = 0; i < state.MinCodewordLength; ++i)
            {
                Trace.Assert(accumulator < 0x80000000);                           // else will overflow!
                accumulator          = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits
                firstCodewordOnRow <<= 1;
            }

            uint symbol      = 0;
            bool symbolFound = false;

            if (Options.HasFlag(OptionFlags.UseLowMemoryDecoding))
            {
                uint tableIndex = state.TablePosition; // decoding table starts here
                while (!symbolFound)
                {
                    uint numberOfCodes = state.Bitstream.ReadFrom(ref tableIndex, state.BitsPerCodelengthCount);

                    if (numberOfCodes > 0 && accumulator - firstCodewordOnRow < numberOfCodes)
                    {
                        uint itemIndex = accumulator - firstCodewordOnRow;
                        tableIndex += itemIndex * state.BitsPerSymbol;
                        symbol      = state.Bitstream.ReadFrom(ref tableIndex, state.BitsPerSymbol);
                        symbolFound = true;
                    }
                    else
                    {
                        firstCodewordOnRow += numberOfCodes;

                        Trace.Assert(accumulator < 0x80000000);                           // else will overflow!
                        accumulator          = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits
                        firstCodewordOnRow <<= 1;

                        // next entry
                        tableIndex += numberOfCodes * state.BitsPerSymbol;
                    }
                }
            }
            else
            {
                int tblRow = 0; // only needed for low memory decoding
                while (!symbolFound)
                {
                    uint numberOfCodes = state.Table[tblRow].Item1;

                    if (numberOfCodes > 0 && accumulator - firstCodewordOnRow < numberOfCodes)
                    {
                        uint itemIndex = accumulator - firstCodewordOnRow;
                        symbol      = state.Table[tblRow].Item2[(int)itemIndex];
                        symbolFound = true;
                    }
                    else
                    {
                        firstCodewordOnRow += numberOfCodes;

                        Trace.Assert(accumulator < 0x80000000);                           // else will overflow!
                        accumulator          = 2 * accumulator + state.Bitstream.Read(1); // accumulate bits
                        firstCodewordOnRow <<= 1;

                        // next entry
                        ++tblRow;
                    }
                }
            }

            if (Options.HasFlag(OptionFlags.DumpDecoding))
            {
                Write($"{symbol:X2},");
            }
            return(symbol);
        }
 /// <summary>
 /// Decompress a symbol in the compression algorithm
 /// </summary>
 /// <param name="bitstream"></param>
 public override uint DecompressSymbol(Bitstream bitstream)
 {
     return(bitstream.Read(BitsPerSymbol));
 }
 /// <summary>
 /// Finish the stream
 /// </summary>
 /// <param name="bitstream"></param>
 public override void ReadFooter(Bitstream bitstream)
 {
 }
 /// <summary>
 /// Compress a symbol in the compression algorithm
 /// </summary>
 /// <param name="bitstream"></param>
 /// <param name="symbol"></param>
 public override void CompressSymbol(Bitstream bitstream, uint symbol)
 {
     bitstream.Write(symbol, BitsPerSymbol);
 }
 /// <summary>
 /// Finish the stream
 /// </summary>
 /// <param name="bitstream"></param>
 public override void WriteFooter(Bitstream bitstream)
 {
 }
Beispiel #6
0
 public void InsertStream(uint insertPosition, Bitstream bitstream)
 {
     bits.InsertRange((int)insertPosition, bitstream.bits);
     // todo - position update?
 }
        /// <summary>
        /// Create the frequency table as a bitsteam for ease of use/testing
        /// </summary>
        /// <returns></returns>
        Bitstream MakeFrequencyTable()
        {
            var bs = new Bitstream();
            // write freq tables
            uint maxCount = counts.Max();
            uint minCount = counts.Where(c => c > 0).Min();

#if true
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus only full type. Format is
            //   - symbol min index used, max index used, Lomont1 universal coded.
            //   - Number of bits in table, Lomont1 universal coded (allows jumping past)
            //   - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries

            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }

            UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0);
            UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0);

            var fullTableBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()),
                                                          (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0)
                                                          );
            UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0);
            bs.WriteStream(fullTableBs);

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}");
            }
#else
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus
            //   - symbol min index used + 1, max index used + 1, EliasDelta coded.
            //   - bit denoting table type 0 (full) or 1 (sparse)
            //   - Number of bits in table + 1, elias delta coded (allows jumping past)
            //     0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries
            //     1 = sparse table.
            //         Elias delta for number of counts in table + 1 (same as number of symbols)
            //         Elias delta for bitlength of counts + 1,
            //         BASC counts,
            //         BASC symbols present
            //   - table



            // compute two table lengths:
            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }
            // common header
            UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1);
            UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1);


            var fullTableBs   = new Bitstream();
            var sparseTableBs = new Bitstream();

            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()
                                                              ));

            var nonzeroCountIndices =
                counts.Select((c, n) => new { val = c, pos = n })
                .Where(p => p.val > 0)
                .Select(p => (uint)p.pos)
                .ToArray();
            var nonzeroCounts = counts.Where(c => c > 0).ToArray();

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));
            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));

            var tempBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts));
            uint sparseMidPos = tempBs.Position;

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1);
            sparseTableBs.WriteStream(tempBs);

            UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices));

            Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}");


            // now finish table
            if (fullTableBs.Length < sparseTableBs.Length)
            {
                bs.Write(0); // full table
                UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1);

                bs.WriteStream(fullTableBs);
            }
            else
            {
                bs.Write(1); // sparse table
                UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1);

                bs.WriteStream(sparseTableBs);
            }



            // var cc = new CompressionChecker();
            // cc.TestAll("arith",new Datastream(counts)); // all
            // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero
            // BASC wins these tests
            //

#if false
            var allDs     = new Datastream();
            var nonzeroDs = new Datastream();
            for (var i = 0U; i < counts.Length; ++i)
            {
                var index = i;//(uint)(counts.Length - 1 - i);
                allDs.Add(index);
                if (counts[i] != 0)
                {
                    nonzeroDs.Add(index);
                }
            }

            var allBs     = new Bitstream();
            var nonzeroBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs);
            UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs);
            Console.WriteLine($"Arith all {allBs.Length} in ");
            Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in ");

            //foreach (var c in counts)
            //    UniversalCodec.OneParameterCodeDelegate(
            //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256);
            //bs = ans.Item1;
            // 912 gamma
            // 918 elias delta
            // 988 Omega
            // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts));
            // 1265 best Golomb
#endif
#endif
            if (Options.HasFlag(OptionFlags.DumpTable))
            {
                WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}");
                for (var i = 0; i < counts.Length; ++i)
                {
                    if (counts[i] != 0)
                    {
                        Write($"[{i},{counts[i]}] ");
                    }
                }
                WriteLine();
            }
            return(bs);
        }
        // lookup symbol and probability range using table decoding
        uint LookupLowMemoryCount(Bitstream bitstream, uint cumCount, out uint lowCount, out uint highCount)
        {
            // BASC encoded, decode with same process
            // todo - merge with BASC Codec version, make cleaner

            // swap bit positions to access table
            uint tempPosition = bitstream.Position; // save this

            bitstream.Position = tableStartBitPosition;

            lowCount = highCount = 0;
            uint symbol = 0;

            uint length = UniversalCodec.Lomont.DecodeLomont1(bitstream, 6, 0);

            if (length != 0)
            {
                uint b1 = UniversalCodec.Lomont.DecodeLomont1(bitstream, 6, 0);
                uint xi = bitstream.Read(b1);

                lowCount  = 0;
                highCount = xi;
                symbol    = symbolMin;
                uint i = symbolMin;

                while (highCount <= cumCount)
                {
                    var decision = bitstream.Read(1);
                    if (decision == 0)
                    {
                        // bi is <= b(i-1), so enough bits
                        xi = bitstream.Read(b1);
                    }
                    else
                    {
                        // bi is bigger than b(i-1), must increase it
                        uint delta = 0;
                        do
                        {
                            decision = bitstream.Read(1);
                            delta++;
                        } while (decision != 0);
                        b1 += delta;
                        xi  = bitstream.Read(b1 - 1); // xi has implied leading 1
                        xi |= 1U << (int)(b1 - 1);
                    }
                    b1 = BitsRequired(xi);

                    lowCount   = highCount;
                    highCount += xi;
                    ++i;
                    if (xi != 0)
                    {
                        symbol = i;
                    }
                }
            }

            // restore bit position
            bitstream.Position = tempPosition;
            return(symbol);
        }
Beispiel #9
0
 /// <summary>
 /// Compress a symbol in the compression algorithm
 /// </summary>
 /// <param name="bitstream"></param>
 /// <param name="symbol"></param>
 public override void CompressSymbol(Bitstream bitstream, uint symbol)
 { // due to how this format is stored, all work was done in the header call
 }
Beispiel #10
0
        /// <summary>
        /// Write the header for the compression algorithm
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="data"></param>
        /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param>
        /// <returns></returns>
        public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags)
        {
            // erase data streams
            decisions.Clear();
            decisionRuns.Clear();
            literals.Clear();
            distances.Clear();
            lengths.Clear();
            tokens.Clear();

            // fill in all the data streams
            uint actualMinLength, actualMaxDistance;

            ComputeStreams(data, out actualMinLength, out actualMaxDistance);

            // due to the vagaries of this format, we write the entire file in the header call,
            // and unfortunately ignore the encode symbol and footer sections

            // dump info to help analyze
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine("LZCL compress:");
                WriteLine($"  Data length {data.Count} ");
            }

            if (Options.HasFlag(OptionFlags.ShowTallies))
            {
                // some info to help make analyze and make decisions
                Write("Length tally: ");
                Tally(lengths);
                WriteLine();

                Write("Distance tally: ");
                Tally(distances);
                WriteLine();
            }

            // get compressed streams so we can decide what to output
            var decisionChoice     = GetBestCompressor("decisions", decisions);
            var decisionRunsChoice = GetBestCompressor("decision runs", decisionRuns);
            var literalsChoice     = GetBestCompressor("literals", literals);
            var tokensChoice       = GetBestCompressor("tokens", tokens);
            var distancesChoice    = GetBestCompressor("distances", distances);
            var lengthsChoice      = GetBestCompressor("lengths", lengths);

            // write header values
            Header.WriteUniversalHeader(bitstream, data, headerFlags);

            // save max distance occurring, used to encode tokens, very useful to users to know window needed size
            UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMaxDistance, 10, 0);
            UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMinLength, 2, 0);

            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"actual min length {actualMinLength}");
            }
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"Max distance {actualMaxDistance}");
            }

            if (decisionChoice.Item2.Length < decisionRunsChoice.Item2.Length)
            {
                // denote choice
                bitstream.Write(0);
                // save item
                WriteItem(bitstream, decisionChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Decisions smaller than decision runs");
                }
                StatRecorder.AddStat($"codec used: decisions {decisionChoice.Item1.Name}", 1);
            }
            else
            {
                // denote choice
                bitstream.Write(1);
                // save initial value
                bitstream.Write(decisions[0]);
                // save item
                WriteItem(bitstream, decisionRunsChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Decisions runs smaller than decisions");
                }
                StatRecorder.AddStat($"codec used: decision runs {decisionRunsChoice.Item1.Name}", 1);
            }

            // literals
            WriteItem(bitstream, literalsChoice);
            StatRecorder.AddStat($"codec used: literals {literalsChoice.Item1.Name}", 1);


            // tokens or separate distance, length pairs
            if (tokensChoice.Item2.Length < distancesChoice.Item2.Length + lengthsChoice.Item2.Length)
            {
                // denote choice
                bitstream.Write(0);
                // save item
                WriteItem(bitstream, tokensChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Tokens smaller than distance,length pairs");
                }
                StatRecorder.AddStat($"codec used: tokens {tokensChoice.Item1.Name}", 1);
            }
            else
            {
                // denote choice
                bitstream.Write(1);
                // save items
                WriteItem(bitstream, distancesChoice);
                WriteItem(bitstream, lengthsChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Distance,length pairs smaller than tokens");
                }
                StatRecorder.AddStat($"codec used: distances {distancesChoice.Item1.Name}", 1);
                StatRecorder.AddStat($"codec used: lengths {lengthsChoice.Item1.Name}", 1);
            }
        }