/// <summary> /// Stream version /// </summary> /// <param name="bitstream"></param> /// <param name="headerFlags"></param> /// <returns></returns> public Datastream DecompressFromStream(Bitstream bitstream, Header.HeaderFlags headerFlags) { uint symbolCount = ReadHeader(bitstream, headerFlags); var datastream = new Datastream(); for (var i = 0U; i < symbolCount; ++i) { datastream.Add(DecompressSymbol(bitstream)); } ReadFooter(bitstream); return(datastream); }
/// <summary> /// Create the frequency table as a bitsteam for ease of use/testing /// </summary> /// <returns></returns> Bitstream MakeFrequencyTable() { var bs = new Bitstream(); // write freq tables uint maxCount = counts.Max(); uint minCount = counts.Where(c => c > 0).Min(); #if true // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus only full type. Format is // - symbol min index used, max index used, Lomont1 universal coded. // - Number of bits in table, Lomont1 universal coded (allows jumping past) // - Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } UniversalCodec.Lomont.EncodeLomont1(bs, minSymbolIndex, 6, 0); UniversalCodec.Lomont.EncodeLomont1(bs, maxSymbolIndex, 6, 0); var fullTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray()), (b, v) => UniversalCodec.Lomont.EncodeLomont1(b, v, 6, 0) ); UniversalCodec.Lomont.EncodeLomont1(bs, fullTableBs.Length, 6, 0); bs.WriteStream(fullTableBs); if (Options.HasFlag(OptionFlags.DumpHeader)) { WriteLine($"Arith encode: min symb index {minSymbolIndex} max symb index {maxSymbolIndex} tbl bits {fullTableBs.Length}"); } #else // have determined the following: // Of all three Elias, Golomb optimized, BASC, that BASC is slightly best for storing counts // Also using BASC for counts present is good. // Also determined the sparse table type is much bigger in every file we tested! // so, check two types: // 1) BASC on all counts, versus // 2) BASC on those present for both count and symbol // Table thus // - symbol min index used + 1, max index used + 1, EliasDelta coded. // - bit denoting table type 0 (full) or 1 (sparse) // - Number of bits in table + 1, elias delta coded (allows jumping past) // 0 = Full table. Counts are BASC encoded, maxIndex - minIndex+1 entries // 1 = sparse table. // Elias delta for number of counts in table + 1 (same as number of symbols) // Elias delta for bitlength of counts + 1, // BASC counts, // BASC symbols present // - table // compute two table lengths: uint minSymbolIndex = UInt32.MaxValue; uint maxSymbolIndex = 0; for (var i = 0U; i < counts.Length; ++i) { if (counts[i] != 0) { maxSymbolIndex = i; if (minSymbolIndex == UInt32.MaxValue) { minSymbolIndex = i; } } } // common header UniversalCodec.Elias.EncodeDelta(bs, minSymbolIndex + 1); UniversalCodec.Elias.EncodeDelta(bs, maxSymbolIndex + 1); var fullTableBs = new Bitstream(); var sparseTableBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(fullTableBs, new Datastream( counts.Skip((int)minSymbolIndex).Take((int)(maxSymbolIndex - minSymbolIndex + 1)).ToArray() )); var nonzeroCountIndices = counts.Select((c, n) => new { val = c, pos = n }) .Where(p => p.val > 0) .Select(p => (uint)p.pos) .ToArray(); var nonzeroCounts = counts.Where(c => c > 0).ToArray(); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); UniversalCodec.Elias.EncodeDelta(sparseTableBs, (uint)(nonzeroCounts.Length + 1)); var tempBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(tempBs, new Datastream(nonzeroCounts)); uint sparseMidPos = tempBs.Position; UniversalCodec.Elias.EncodeDelta(sparseTableBs, sparseMidPos + 1); sparseTableBs.WriteStream(tempBs); UniversalCodec.BinaryAdaptiveSequentialEncode(sparseTableBs, new Datastream(nonzeroCountIndices)); Console.WriteLine($"Arith full table {fullTableBs.Length} sparse table {sparseTableBs.Length}"); // now finish table if (fullTableBs.Length < sparseTableBs.Length) { bs.Write(0); // full table UniversalCodec.Elias.EncodeDelta(bs, fullTableBs.Length + 1); bs.WriteStream(fullTableBs); } else { bs.Write(1); // sparse table UniversalCodec.Elias.EncodeDelta(bs, sparseTableBs.Length + 1); bs.WriteStream(sparseTableBs); } // var cc = new CompressionChecker(); // cc.TestAll("arith",new Datastream(counts)); // all // cc.TestAll("arith",new Datastream(counts.Where(c=>c>0).ToArray())); // nonzero // BASC wins these tests // #if false var allDs = new Datastream(); var nonzeroDs = new Datastream(); for (var i = 0U; i < counts.Length; ++i) { var index = i;//(uint)(counts.Length - 1 - i); allDs.Add(index); if (counts[i] != 0) { nonzeroDs.Add(index); } } var allBs = new Bitstream(); var nonzeroBs = new Bitstream(); UniversalCodec.BinaryAdaptiveSequentialEncode(allBs, allDs); UniversalCodec.BinaryAdaptiveSequentialEncode(nonzeroBs, nonzeroDs); Console.WriteLine($"Arith all {allBs.Length} in "); Console.WriteLine($"Arith nonzero {nonzeroBs.Length} in "); //foreach (var c in counts) // UniversalCodec.OneParameterCodeDelegate( //var ans = UniversalCodec.Optimize(UniversalCodec.Golomb.Encode,counts.ToList(),1,256); //bs = ans.Item1; // 912 gamma // 918 elias delta // 988 Omega // 1152 bits UniversalCodec.BinaryAdaptiveSequentialEncode(bs,new Datastream(counts)); // 1265 best Golomb #endif #endif if (Options.HasFlag(OptionFlags.DumpTable)) { WriteLine($"Arith table bitsize {bs.Length}, min symbol ? max symbol ? min count {minCount} max count {maxCount}"); for (var i = 0; i < counts.Length; ++i) { if (counts[i] != 0) { Write($"[{i},{counts[i]}] "); } } WriteLine(); } return(bs); }