Esempio n. 1
0
        Header.HeaderFlags internalFlags = Header.HeaderFlags.None; // todo - make None to save bits

        void WriteItem(Bitstream bitstream, Tuple <Type, Bitstream> item)
        {
            // save type
            var codecType = item.Item1;

            if (codecType == typeof(FixedSizeCodec))
            {
                bitstream.Write(0, 2);
            }
            else if (codecType == typeof(ArithmeticCodec))
            {
                bitstream.Write(1, 2);
            }
            else if (codecType == typeof(HuffmanCodec))
            {
                bitstream.Write(2, 2);
            }
            else if (codecType == typeof(GolombCodec))
            {
                bitstream.Write(3, 2);
            }
            else
            {
                throw new NotImplementedException("Unknown compressor type");
            }
            // save bit size
            UniversalCodec.Lomont.EncodeLomont1(bitstream, item.Item2.Length, 6, 0);
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"Compressor type {codecType.Name}, length {item.Item2.Length}");
            }
            // save stream
            bitstream.WriteStream(item.Item2);
        }
Esempio n. 2
0
            /// <summary>
            /// Even-Rodeh code
            ///
            ///  Encode a non-negative integer N :
            ///   1. If N is less than 4 then output N in 3 bits and stop.
            ///   2. If N is less than 8 then prepend the coded value with 3 bits containing the value of N and stop.
            ///   3. Prepend the coded value with the binary representation of N.
            ///   4. Store the number of bits prepended in step 3 as the new value of N.
            ///   5. Go back to step 2
            ///   6. Output a single 0 bit.
            ///
            /// </summary>
            /// <returns></returns>
            public static void Encode(Bitstream bitstream, uint value)
            {
                if (value < 4)
                {
                    bitstream.Write(value, 3);
                    return;
                }

                var  stack = new Stack <uint>();
                uint n     = value;

                while (true)
                {
                    if (n < 8)
                    {
                        stack.Push(n);
                        break;
                    }
                    stack.Push(n);
                    n = CodecBase.BitsRequired(n);
                }
                while (stack.Any())
                {
                    uint val = stack.Pop();
                    if (val < 8)
                    {
                        bitstream.Write(val, 3);
                    }
                    else
                    {
                        bitstream.Write(val);
                    }
                }
                bitstream.Write(0, 1);
            }
Esempio n. 3
0
        /// <summary>
        /// Compress a symbol in the compression algorithm
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="symbol"></param>
        public override void CompressSymbol(Bitstream bitstream, uint symbol)
        {
            encoderState.SymbolCallIndex++;
            // process streams into compressed bitstream
            while (encoderState.SymbolCallIndex > encoderState.DatumIndex)
            {
                uint decision = decisions[encoderState.DecisionIndex++];
                bitstream.Write(decision, 1); // decision
                if (decision == 0)
                {
                    // literal
                    if (Options.HasFlag(OptionFlags.DumpEncode))
                    {
                        Write($"[{literals[encoderState.LiteralIndex]}] ");
                    }
                    bitstream.Write(literals[encoderState.LiteralIndex++], encoderState.ActualBitsPerSymbol);
                    ++encoderState.DatumIndex;
                }
                else
                {
                    // (distance, length) pair, encoded
                    uint distance = distances[encoderState.TokenIndex];
                    uint length   = lengths[encoderState.TokenIndex];
                    if (Options.HasFlag(OptionFlags.DumpEncode))
                    {
                        Write($"[{distance},{length}] ");
                    }
                    uint token = (length - encoderState.ActualMinLength) * (encoderState.ActualMaxDistance + 1) + distance;
                    bitstream.Write(token, encoderState.ActualBitsPerToken);
                    ++encoderState.TokenIndex;

                    encoderState.DatumIndex += (int)length;
                }
            }
        }
Esempio n. 4
0
        void Finish(Bitstream bitstream)
        {
            // two possible lowValue and highValue distributions, so
            // two bits enough to distinguish
            // todo - need enough to decode last item, else bitstream must return 0 when empty
            if (lowValue < Range25Percent)
            {
                bitstream.Write(0);
                bitstream.Write(1);
                for (var i = 0; i < scaling + 1; ++i) //final e3 scaling
                {
                    bitstream.Write(1);
                }
                //Console.WriteLine($"A finish 0 - {output.symbolsToWrite-output.symbolsWritten} to go");
            }
            else
            {
                bitstream.Write(1);
                bitstream.Write(0);
                // no need to write more final scaling 0 values since decoder returns all 0s after end of stream

                //for (var i = 0; i < scaling + 1; ++i) //final e3 scaling
                //    bitstream.Write(0);
                //Console.WriteLine($"A finish 1 - {output.symbolsToWrite - output.symbolsWritten} to go");
            }
        }
Esempio n. 5
0
            /// <summary>
            /// Encode a 32 bit value into the bitstream using Lomont method 1
            /// value is broken into 'chunckSize' bit chunks, then 0 or 1 written before
            /// each chunk, where 1 means not last chunk, 0 means last chunk. Chunks are
            /// written least significant first. 'chunkSize' 6 is default and works best for
            /// common file sizes.
            /// </summary>
            /// <param name="bitstream"></param>
            /// <param name="value32"></param>
            /// <param name="chunkSize">A good value is 6</param>
            /// <param name="deltaChunk">A good starting value is 0</param>
            public static void EncodeLomont1(Bitstream bitstream, uint value32, int chunkSize, int deltaChunk)
            {
                uint mask = (1U << chunkSize) - 1;

                while (value32 >= (1 << chunkSize))
                {
                    bitstream.Write(1, 1);                            // another chunk
                    bitstream.Write(value32 & mask, (uint)chunkSize); // write chunkSize bits
                    value32 >>= chunkSize;

                    if (deltaChunk != 0)
                    {
                        chunkSize += deltaChunk;
                        if (chunkSize <= 0)
                        {
                            chunkSize = 1;
                        }
                        mask = (1U << chunkSize) - 1;
                    }

                    //Console.Write('#');
                }
                bitstream.Write(0, 1); // last chunk
                bitstream.Write(value32 & mask, (uint)chunkSize);
            }
Esempio n. 6
0
            /// <summary>
            /// Encode integer N via Sk(N) 0 B(N,floor(log_2 N)+1)
            ///
            /// Recursively define: for fixed integer k > 1
            /// Sk(n) = B(n,l) if n in [0,2^k-1], else = Sk(Floor[Log_2 n]-k) B(n,Floor[Log_2 n]+1)
            /// where B(n,l) writes n in k bits
            /// Note the final B value is 0 prefixed, which lets the decoder know this is the last block.
            /// </summary>
            public static void Encode(Bitstream bitstream, uint value, uint k)
            {
                uint bitLength = CodecBase.BitsRequired(value);

                Recurse(bitstream, bitLength, k);
                bitstream.Write(0);
                bitstream.Write(value);
            }
Esempio n. 7
0
            /// <summary>
            /// L = bits needed to store value
            /// Write L-1 zero bits, then the value in binary (which necessarily starts with one)
            /// </summary>
            /// <param name="bitstream"></param>
            /// <param name="value32"></param>
            public static void EncodeGamma(Bitstream bitstream, uint value32)
            {
                Trace.Assert(value32 >= 1);
                uint n = CodecBase.BitsRequired(value32);

                for (var i = 0; i < n - 1; ++i)
                {
                    bitstream.Write(0);
                }
                bitstream.Write(value32, n);
            }
Esempio n. 8
0
        /// <summary>
        /// Compress a symbol in the compression algorithm
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="symbol"></param>
        public override void CompressSymbol(Bitstream bitstream, uint symbol)
        {
            if (Options.HasFlag(OptionFlags.DumpState))
            {
                Write($"[{symbol:X2},{lowValue:X8},{highValue:X8}] ");
            }

            uint lowCount, highCount;

            GetCounts(symbol, out lowCount, out highCount);
            Trace.Assert(total < (1 << 29));

            // update bounds
            uint step = (highValue - lowValue + 1) / total; // interval open at top gives + 1

            highValue = lowValue + step * highCount - 1;    // interval open at top gives -1
            lowValue  = lowValue + step * lowCount;

            // apply e1/e2 scaling to keep ranges in bounds
            while ((highValue < Range50Percent) || (lowValue >= Range50Percent))
            {
                if (highValue < Range50Percent)
                {
                    bitstream.Write(0);
                    lowValue  = 2 * lowValue;
                    highValue = 2 * highValue + 1;
                    // e3 scaling
                    for (; scaling > 0; scaling--)
                    {
                        bitstream.Write(1);
                    }
                }
                else if (lowValue >= Range50Percent)
                {
                    bitstream.Write(1);
                    lowValue  = 2 * (lowValue - Range50Percent);
                    highValue = 2 * (highValue - Range50Percent) + 1;

                    // e3 scaling
                    for (; scaling > 0; scaling--)
                    {
                        bitstream.Write(0);
                    }
                }
            }

            // get e3 scaling value
            while ((Range25Percent <= lowValue) && (highValue < Range75Percent))
            {
                scaling++;
                lowValue  = 2 * (lowValue - Range25Percent);
                highValue = 2 * (highValue - Range25Percent) + 1;
            }
        }
Esempio n. 9
0
            /// <summary>
            /// Encode a 32 bit value into the bitstream using Lomont method 3
            /// </summary>
            /// <param name="bitstream"></param>
            /// <param name="value32"></param>
            public static void EncodeLomont3(Bitstream bitstream, uint value32)
            {
                Trace.Assert(value32 > 0);
                uint n = CodecBase.BitsRequired(value32);

                for (var i = 0; i < n - 1; ++i)
                {
                    bitstream.Write(0, 1);       // n-1 of these
                }
                bitstream.Write(1, 1);           // end of n count
                bitstream.Write(value32, n - 1); // remove leading 1
            }
Esempio n. 10
0
 /// <summary>
 /// Encode a 32 bit value into the bitstream using Lomont method 2
 /// </summary>
 /// <param name="bitstream"></param>
 /// <param name="value32"></param>
 public static void EncodeLomont2(Bitstream bitstream, uint value32)
 {
     while (value32 > 255)
     {
         bitstream.Write(1, 1);             // another byte
         bitstream.Write(value32 & 255, 8); // write 8 bits
         value32 >>= 8;
     }
     Trace.Assert(value32 > 0);
     bitstream.Write(0, 1); // last byte
     bitstream.Write(value32 & 255, 8);
 }
Esempio n. 11
0
            /// <summary>
            /// Encode a 32 bit value into the bitstream using Elias Delta coding
            /// To encode a number X greater than 0:
            /// 1. N = number of bits needed to store X. N >=1
            /// 2. L = number of bits needed to store N. L >= 1
            /// 3. Write L-1 zeroes.
            /// 4. Write the L bit representation of N (which starts with a 1)
            /// 5. Write all but the leading 1 bit of X (i.e., the last N-1 bits)
            /// </summary>
            /// <param name="bitstream"></param>
            /// <param name="value32"></param>
            public static void EncodeDelta(Bitstream bitstream, uint value32)
            {
                Trace.Assert(value32 >= 1);
                uint n = CodecBase.BitsRequired(value32);
                uint l = CodecBase.BitsRequired(n);

                for (var i = 1; i <= l - 1; ++i)
                {
                    bitstream.Write(0, 1);
                }
                bitstream.Write(n, l);
                bitstream.Write(value32, n - 1);
            }
Esempio n. 12
0
            /// <summary>
            /// Golomb code, useful for geometric distributions
            ///
            /// encode values using int parameter m
            /// value N is encoded via: q=Floor(N/M),r=N%M,
            /// q 1's, then one 0, then Log2M bits for r
            /// good for geometric distribution
            ///
            /// </summary>
            /// <returns></returns>
            public static void Encode(Bitstream bitstream, uint value, uint m)
            {
                Trace.Assert(m > 0);
                var n = value;
                var q = n / m;
                var r = n % m;

                for (var i = 1; i <= q; ++i)
                {
                    bitstream.Write(1);
                }
                bitstream.Write(0);
                Truncated.Encode(bitstream, r, m);
            }
Esempio n. 13
0
 static void Recurse(Bitstream bitstream, uint n, uint k)
 {
     Trace.Assert(k > 1);
     if (n < (1U << (int)k))
     {
         bitstream.Write(n, k);
     }
     else
     {
         uint m = CodecBase.FloorLog2(n);
         Recurse(bitstream, m - k, k);
         bitstream.Write(n, m + 1);
     }
 }
Esempio n. 14
0
 /// <summary>
 /// Exponential Golumb code for x geq 0
 /// 1. Write (x+1) in binary in n bits
 /// 2. Prepend n-1 zero bits
 ///
 /// Order k > 0, do above to Floor[x/2^k]
 /// Then x mod 2^k in binary
 /// </summary>
 /// <param name="bitstream"></param>
 /// <param name="value"></param>
 /// <param name="k"></param>
 public static void EncodeExp(Bitstream bitstream, uint value, uint k)
 {
     if (k > 0)
     {
         EncodeExp(bitstream, value >> (int)k, 0);
         uint mask = (1U << (int)k) - 1;
         bitstream.Write(value & mask, k);
     }
     else
     {
         uint n = CodecBase.BitsRequired(value + 1);
         bitstream.Write(0, n - 1);
         bitstream.Write(value + 1, n);
     }
 }
Esempio n. 15
0
            /// <summary>
            /// Useful for encoding a value in [0,N).
            /// k = bit length N, k > 0
            /// If N is a power of 2, uses k bits.
            /// If N is not a power of two, encodes some choices in k-1 bits, others in k bits
            /// </summary>
            public static void Encode(Bitstream bitstream, uint value, uint n)
            {
                Trace.Assert(value < n);
                uint k = CodecBase.BitsRequired(n);
                uint u = (1U << (int)k) - n; // u = number of unused codewords

                if (value < u)
                {
                    bitstream.Write(value, k - 1);
                }
                else
                {
                    bitstream.Write(value + u, k);
                }
            }
Esempio n. 16
0
            public static void EncodeOmega(Bitstream bitstream, uint value32)
            {
                Trace.Assert(value32 >= 1);
                var stack = new Stack <uint>();

                while (value32 != 1)
                {
                    stack.Push(value32);
                    value32 = CodecBase.BitsRequired(value32) - 1;
                }
                while (stack.Any())
                {
                    bitstream.Write(stack.Pop());
                }
                bitstream.Write(0);
            }
Esempio n. 17
0
        /// <summary>
        /// Compress stream of numbers:
        /// Store length+1 using EliasDelta (to avoid 0 case)
        /// First number x0 requires b0 bits. Write b0 in EliasDelta. Write x0 in b0 bits.
        /// Each subsequent number xi requires bi bits. If bi leq b(i-1) then
        /// write a 0 bit, then xi in b(i-1) bits. Else write (bi-b(i-1)) 1 bits, then a 0,
        /// then the least sig bi - 1 bits of xi (the leading 1 in xi is implied, xi>0).
        /// TODO - alternatives - allow the bi to change slowly, removes some hiccups for odd data points, set b to avg of some prev values
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="data"></param>
        /// <param name="universalCoder">How to encode/decode a data length and first bitlength items. Elias.EncodeDelta is useful</param>
        public static void BinaryAdaptiveSequentialEncode(Bitstream bitstream, Datastream data, Action <Bitstream, uint> universalCoder)
        {
            universalCoder(bitstream, (uint)data.Count + 1);
            if (data.Count == 0)
            {
                return;
            }
            uint b1 = CodecBase.BitsRequired(data[0]);

            universalCoder(bitstream, b1);
            bitstream.Write(data[0]);
            for (var i = 1; i < data.Count; ++i)
            {
                uint d  = data[i];
                uint b2 = CodecBase.BitsRequired(d);

                if (b2 <= b1)
                { // b1 is enough bits
                    bitstream.Write(0);
                    bitstream.Write(d, b1);
                }
                else
                { // b2 requires more bits, tell how many
                    Trace.Assert(d > 0);
                    for (var ik = 0; ik < b2 - b1; ++ik)
                    {
                        bitstream.Write(1);
                    }
                    bitstream.Write(0, 1);      // end of bit count
                    bitstream.Write(d, b2 - 1); // strip off leading '1'
                }
                b1 = CodecBase.BitsRequired(d); // for next pass
            }
        }
Esempio n. 18
0
        public override void CompressSymbol(Bitstream bitstream, uint symbol)
        {
            var node = leaves.Find(n => n.Symbol == symbol);

            // write MSB first
            for (var i = (int)node.Codeword.BitLength - 1; i >= 0; --i)
            {
                bitstream.Write(node.Codeword.GetBit((uint)i), 1);
            }

            if (Options.HasFlag(OptionFlags.DumpEncoding))
            {
                Write($"{symbol:X2},");
            }
        }
Esempio n. 19
0
        /// <summary>
        /// Write the header for the compression algorithm
        /// </summary>
        /// <param name="bitstream"></param>
        /// <param name="data"></param>
        /// <param name="headerFlags">Flags telling what to put in the header. Useful when embedding in other streams.</param>
        /// <returns></returns>
        public override void WriteHeader(Bitstream bitstream, Datastream data, Header.HeaderFlags headerFlags)
        {
            // erase data streams
            decisions.Clear();
            decisionRuns.Clear();
            literals.Clear();
            distances.Clear();
            lengths.Clear();
            tokens.Clear();

            // fill in all the data streams
            uint actualMinLength, actualMaxDistance;

            ComputeStreams(data, out actualMinLength, out actualMaxDistance);

            // due to the vagaries of this format, we write the entire file in the header call,
            // and unfortunately ignore the encode symbol and footer sections

            // dump info to help analyze
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine("LZCL compress:");
                WriteLine($"  Data length {data.Count} ");
            }

            if (Options.HasFlag(OptionFlags.ShowTallies))
            {
                // some info to help make analyze and make decisions
                Write("Length tally: ");
                Tally(lengths);
                WriteLine();

                Write("Distance tally: ");
                Tally(distances);
                WriteLine();
            }

            // get compressed streams so we can decide what to output
            var decisionChoice     = GetBestCompressor("decisions", decisions);
            var decisionRunsChoice = GetBestCompressor("decision runs", decisionRuns);
            var literalsChoice     = GetBestCompressor("literals", literals);
            var tokensChoice       = GetBestCompressor("tokens", tokens);
            var distancesChoice    = GetBestCompressor("distances", distances);
            var lengthsChoice      = GetBestCompressor("lengths", lengths);

            // write header values
            Header.WriteUniversalHeader(bitstream, data, headerFlags);

            // save max distance occurring, used to encode tokens, very useful to users to know window needed size
            UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMaxDistance, 10, 0);
            UniversalCodec.Lomont.EncodeLomont1(bitstream, actualMinLength, 2, 0);

            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"actual min length {actualMinLength}");
            }
            if (Options.HasFlag(OptionFlags.DumpDebug))
            {
                WriteLine($"Max distance {actualMaxDistance}");
            }

            if (decisionChoice.Item2.Length < decisionRunsChoice.Item2.Length)
            {
                // denote choice
                bitstream.Write(0);
                // save item
                WriteItem(bitstream, decisionChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Decisions smaller than decision runs");
                }
                StatRecorder.AddStat($"codec used: decisions {decisionChoice.Item1.Name}", 1);
            }
            else
            {
                // denote choice
                bitstream.Write(1);
                // save initial value
                bitstream.Write(decisions[0]);
                // save item
                WriteItem(bitstream, decisionRunsChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Decisions runs smaller than decisions");
                }
                StatRecorder.AddStat($"codec used: decision runs {decisionRunsChoice.Item1.Name}", 1);
            }

            // literals
            WriteItem(bitstream, literalsChoice);
            StatRecorder.AddStat($"codec used: literals {literalsChoice.Item1.Name}", 1);


            // tokens or separate distance, length pairs
            if (tokensChoice.Item2.Length < distancesChoice.Item2.Length + lengthsChoice.Item2.Length)
            {
                // denote choice
                bitstream.Write(0);
                // save item
                WriteItem(bitstream, tokensChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Tokens smaller than distance,length pairs");
                }
                StatRecorder.AddStat($"codec used: tokens {tokensChoice.Item1.Name}", 1);
            }
            else
            {
                // denote choice
                bitstream.Write(1);
                // save items
                WriteItem(bitstream, distancesChoice);
                WriteItem(bitstream, lengthsChoice);
                if (Options.HasFlag(OptionFlags.DumpDebug))
                {
                    WriteLine("Distance,length pairs smaller than tokens");
                }
                StatRecorder.AddStat($"codec used: distances {distancesChoice.Item1.Name}", 1);
                StatRecorder.AddStat($"codec used: lengths {lengthsChoice.Item1.Name}", 1);
            }
        }
Esempio n. 20
0
        /// <summary>
        /// Create the frequency table as a bitsteam for ease of use/testing
        /// </summary>
        /// <returns></returns>
        Bitstream MakeFrequencyTable()
        {
            var bs = new Bitstream();
            // write freq tables
            uint maxCount = counts.Max();
            uint minCount = counts.Where(c => c > 0).Min();

#if true
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus only full type. Format is
            //   - symbol min index used, max index used, Lomont1 universal coded.
            //   - Number of bits in table, Lomont1 universal coded (allows jumping past)
            //   - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries

            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }

            UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0);
            UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0);

            var fullTableBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()),
                                                          (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0)
                                                          );
            UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0);
            bs.WriteStream(fullTableBs);

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}");
            }
#else
            // have determined the following:
            // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts
            // Also using BASC for counts present is good.
            // Also determined the sparse table type is much bigger in every file we tested!
            // so, check two types:
            //    1) BASC on all counts, versus
            //    2) BASC on those present for both count and symbol
            // Table thus
            //   - symbol min index used + 1, max index used + 1, EliasDelta coded.
            //   - bit denoting table type 0 (full) or 1 (sparse)
            //   - Number of bits in table + 1, elias delta coded (allows jumping past)
            //     0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries
            //     1 = sparse table.
            //         Elias delta for number of counts in table + 1 (same as number of symbols)
            //         Elias delta for bitlength of counts + 1,
            //         BASC counts,
            //         BASC symbols present
            //   - table



            // compute two table lengths:
            uint minSymbolIndex = UInt32.MaxValue;
            uint maxSymbolIndex = 0;
            for (var i = 0U; i < counts.Length; ++i)
            {
                if (counts[i] != 0)
                {
                    maxSymbolIndex = i;
                    if (minSymbolIndex == UInt32.MaxValue)
                    {
                        minSymbolIndex = i;
                    }
                }
            }
            // common header
            UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1);
            UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1);


            var fullTableBs   = new Bitstream();
            var sparseTableBs = new Bitstream();

            UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream(
                                                              counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()
                                                              ));

            var nonzeroCountIndices =
                counts.Select((c, n) => new { val = c, pos = n })
                .Where(p => p.val > 0)
                .Select(p => (uint)p.pos)
                .ToArray();
            var nonzeroCounts = counts.Where(c => c > 0).ToArray();

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));
            UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1));

            var tempBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts));
            uint sparseMidPos = tempBs.Position;

            UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1);
            sparseTableBs.WriteStream(tempBs);

            UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices));

            Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}");


            // now finish table
            if (fullTableBs.Length < sparseTableBs.Length)
            {
                bs.Write(0); // full table
                UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1);

                bs.WriteStream(fullTableBs);
            }
            else
            {
                bs.Write(1); // sparse table
                UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1);

                bs.WriteStream(sparseTableBs);
            }



            // var cc = new CompressionChecker();
            // cc.TestAll("arith",new Datastream(counts)); // all
            // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero
            // BASC wins these tests
            //

#if false
            var allDs     = new Datastream();
            var nonzeroDs = new Datastream();
            for (var i = 0U; i < counts.Length; ++i)
            {
                var index = i;//(uint)(counts.Length - 1 - i);
                allDs.Add(index);
                if (counts[i] != 0)
                {
                    nonzeroDs.Add(index);
                }
            }

            var allBs     = new Bitstream();
            var nonzeroBs = new Bitstream();
            UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs);
            UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs);
            Console.WriteLine($"Arith all {allBs.Length} in ");
            Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in ");

            //foreach (var c in counts)
            //    UniversalCodec.OneParameterCodeDelegate(
            //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256);
            //bs = ans.Item1;
            // 912 gamma
            // 918 elias delta
            // 988 Omega
            // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts));
            // 1265 best Golomb
#endif
#endif
            if (Options.HasFlag(OptionFlags.DumpTable))
            {
                WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}");
                for (var i = 0; i < counts.Length; ++i)
                {
                    if (counts[i] != 0)
                    {
                        Write($"[{i},{counts[i]}] ");
                    }
                }
                WriteLine();
            }
            return(bs);
        }
Esempio n. 21
0
 /// <summary>
 /// Compress a symbol in the compression algorithm
 /// </summary>
 /// <param name="bitstream"></param>
 /// <param name="symbol"></param>
 public override void CompressSymbol(Bitstream bitstream, uint symbol)
 {
     bitstream.Write(symbol, BitsPerSymbol);
 }
Esempio n. 22
0
        /// <summary>
        /// Given the leaf nodes, create a canonical Huffman compression table
        /// Format is
        ///    Elias delta code bitsPerSymbol
        ///    Elias delta code maxCodeWordLength
        /// Then maxCodeWordLength counts of each codeword length,
        /// Then sum of those lengths of symbols, each of the given length
        /// </summary>
        /// <param name="leaves1"></param>
        /// <returns></returns>
        Bitstream MakeTable(List <Node> leaves1)
        {
            Trace.Assert(leaves1.Count > 0);

            // longest codeword
            uint maxCodewordLength = leaves1.Max(n => n.Codeword.BitLength);
            uint minCodewordLength = leaves1.Min(n => n.Codeword.BitLength);

            WriteLine($"Min, max codeword lengths {minCodewordLength} {maxCodewordLength}");

            // get counts of each codeword length
            var codewordLengthCounts = new List <int>();

            for (var codewordLength = minCodewordLength; codewordLength <= maxCodewordLength; ++codewordLength)
            {
                codewordLengthCounts.Add(leaves1.Count(n => n.Codeword.BitLength == codewordLength));
            }

            if (Options.HasFlag(OptionFlags.LogCodewordLengths))
            {
                for (var codewordLength = minCodewordLength; codewordLength <= maxCodewordLength; ++codewordLength)
                {
                    var count = codewordLengthCounts[(int)(codewordLength - minCodewordLength)];
                    StatRecorder.AddStat($"Huffman_Codeword_{codewordLength}", (uint)count);
                }
            }

            Trace.Assert(codewordLengthCounts.Sum() == leaves1.Count);

            // bits for each item to store
            uint bitsPerSymbol = BitsRequired(leaves1.Max(n => n.Symbol));

            // codeword length is < alphabet size (proof: look at tree to make codewords)

            // the largest count of codewords of a given length is ceiling (log_2(alphabet size))
            // look at construction tree to see this
            var bitsPerCodelengthCount = BitsRequired((uint)codewordLengthCounts.Max());

            if (Options.HasFlag(OptionFlags.DumpDictionary))
            {
                // write table for debugging
                WriteLine("Make huffman tree:");
                for (var length = minCodewordLength; length <= maxCodewordLength; ++length)
                {
                    Write($"  {length,3}: {codewordLengthCounts[(int)(length - minCodewordLength)],3} -> ");
                    var length1 = length; // avoid modified closure
                    foreach (var s in leaves1.Where(n => n.Codeword.BitLength == length1))
                    {
                        Write($"x{s.Symbol:X2}, ");
                    }
                    WriteLine();
                }
            }

            // now write the bit sizes of each entry type, then counts of distinct lengths, then the symbols
            var bs = new Bitstream();


            // want to save the minimum codeword length and the delta to the max codeword length
            // size of codeword min and delta to max
            uint deltaCodewordLength = maxCodewordLength - minCodewordLength;

            // all header values
            UniversalCodec.Lomont.EncodeLomont1(bs, bitsPerSymbol - 1, 3, 0);          // 1-32, usually 8, subtracting 1 gives 7, fits in 3 bits
            UniversalCodec.Lomont.EncodeLomont1(bs, bitsPerCodelengthCount - 1, 3, 0); // usually 4,5,6
            UniversalCodec.Lomont.EncodeLomont1(bs, minCodewordLength - 1, 2, 0);      // quite often 1,2,3,4, usually small
            UniversalCodec.Lomont.EncodeLomont1(bs, deltaCodewordLength - 1, 4, -1);   // 9-12, up to 16,17

            if (Options.HasFlag(OptionFlags.DumpHeader))
            {
                WriteLine("Huffman encode header:");
                WriteLine($"   bits per symbol {bitsPerSymbol} bits per code length count {bitsPerCodelengthCount}");
                WriteLine($"   min len code {minCodewordLength} delta code len {deltaCodewordLength}");
            }

            // write table - one entry for each codeword length present, entry is count then symbols
            int symbolIndex = 0;

            for (uint length = minCodewordLength; length <= maxCodewordLength; ++length)
            {
                int count = codewordLengthCounts[(int)(length - minCodewordLength)];
                bs.Write((uint)count, bitsPerCodelengthCount);
                // write 'count' symbols
                for (int j = 0; j < count; ++j)
                {
                    bs.Write(leaves1[symbolIndex++].Symbol, bitsPerSymbol);
                }
            }
            return(bs);
        }