public override ushort Read(int count) { this.CheckBuffer(); if (this.remainingBits < count) { int delta = count - this.remainingBits; ushort lowBits = (ushort)(this.byteBuffer << delta); this.byteBuffer = BigEndian.Read2(stream); this.remainingBits = 16 - delta; ushort highBits = (ushort)(this.byteBuffer >> this.remainingBits); this.byteBuffer ^= (ushort)(highBits << this.remainingBits); return((ushort)(lowBits | highBits)); } this.remainingBits -= count; ushort bits = (ushort)(this.byteBuffer >> this.remainingBits); this.byteBuffer ^= (ushort)(bits << this.remainingBits); return(bits); }
public override bool Push(bool bit) { bool flushed = false; if (this.waitingBits >= 16) { BigEndian.Write2(this.stream, this.byteBuffer); this.waitingBits = 0; this.byteBuffer = 0; flushed = true; } if (bit) { this.byteBuffer |= (ushort)(0x8000 >> this.waitingBits); } ++this.waitingBits; return(flushed); }
internal static void EncodeModuled(Stream source, Stream destination) { int size = (int)(source.Length - source.Position); byte[] buffer = new byte[size]; source.Read(buffer, 0, size); int pos = 0; if (size > 0xffff) { throw new CompressionException(Properties.Resources.KosinskiPlusTotalSizeTooLarge); } int remainingSize = size; int compBytes = 0; if (remainingSize > 0x1000) { remainingSize = 0x1000; } BigEndian.Write2(destination, (ushort)size); for (; ;) { EncodeInternal(destination, buffer, pos, remainingSize); compBytes += remainingSize; pos += remainingSize; if (compBytes >= size) { break; } remainingSize = Math.Min(0x1000, size - compBytes); } }
internal static void Decode(Stream source, Stream destination, Endianness headerEndianness) { long decompressedBytes = 0; long fullSize; if (headerEndianness == Endianness.BigEndian) { fullSize = BigEndian.Read2(source); } else { fullSize = LittleEndian.Read2(source); } for (;;) { DecodeInternal(source, destination, ref decompressedBytes); if (decompressedBytes >= fullSize) { break; } // Skip the padding between modules int b; long paddingEnd = (((source.Position - 2) + 0xF) & ~0xF) + 2; while (source.Position < paddingEnd) { b = source.ReadByte(); if (b == -1) { throw new EndOfStreamException(); } } } }
private static void EncodeInternal(Stream destination, byte[] buffer, long pos, long slidingWindow, long recLength, long size) { UInt16LEOutputBitStream bitStream = new UInt16LEOutputBitStream(destination); MemoryStream data = new MemoryStream(); if (size > 0) { long bPointer = 1, iOffset = 0; bitStream.Push(true); NeutralEndian.Write1(data, buffer[pos]); while (bPointer < size) { long iCount = Math.Min(recLength, size - bPointer); long iMax = Math.Max(bPointer - slidingWindow, 0); long k = 1; long i = bPointer - 1; do { long j = 0; while (buffer[pos + i + j] == buffer[pos + bPointer + j]) { if (++j >= iCount) { break; } } if (j > k) { k = j; iOffset = i; } } while (i-- > iMax); iCount = k; if (iCount == 1) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); } else if (iCount == 2 && bPointer - iOffset > 256) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); --iCount; } else if (iCount < 6 && bPointer - iOffset <= 256) { Push(bitStream, false, destination, data); Push(bitStream, false, destination, data); Push(bitStream, (((iCount - 2) >> 1) & 1) != 0, destination, data); Push(bitStream, ((iCount - 2) & 1) != 0, destination, data); NeutralEndian.Write1(data, (byte)(~(bPointer - iOffset - 1))); } else { Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); long off = bPointer - iOffset - 1; ushort info = (ushort)(~((off << 8) | (off >> 5)) & 0xFFF8); if (iCount < 10) // iCount - 2 < 8 { info |= (ushort)(iCount - 2); BigEndian.Write2(data, info); } else { BigEndian.Write2(data, info); NeutralEndian.Write1(data, (byte)(iCount - 1)); } } bPointer += iCount; } } Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); // If the bit stream was just flushed, write an empty bit stream that will be read just before the end-of-data // sequence below. if (!bitStream.HasWaitingBits) { NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); } NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0xF0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }
private static void Decode(Stream input, Stream output, Endianness endianness) { using (PaddedStream paddedInput = new PaddedStream(input, 2, PaddedStreamMode.Read)) { byte packetLength = NeutralEndian.Read1(paddedInput); var readBitfield = GetBitfieldReader(NeutralEndian.Read1(paddedInput)); ushort incrementingValue; ushort commonValue; InputBitStream <ushort> bitStream; Action <Stream, ushort> write2; if (endianness == Endianness.BigEndian) { incrementingValue = BigEndian.Read2(paddedInput); commonValue = BigEndian.Read2(paddedInput); bitStream = new UInt16BE_E_L_InputBitStream(paddedInput); write2 = Write2BE; } else { incrementingValue = LittleEndian.Read2(paddedInput); commonValue = LittleEndian.Read2(paddedInput); bitStream = new UInt16LE_E_L_InputBitStream(paddedInput); write2 = Write2LE; } // Loop until the end-of-data marker is found (if it is not found before the end of the stream, UInt8InputBitStream // will throw an exception) for (; ;) { if (bitStream.Get()) { int mode = bitStream.Read(2); int count = bitStream.Read(4); switch (mode) { case 0: case 1: { ushort flags = readBitfield(bitStream); ushort outv = (ushort)(bitStream.Read(packetLength) | flags); do { write2(output, outv); outv += (ushort)mode; } while (--count >= 0); } break; case 2: mode = -1; goto case 0; case 3: { // End of compressed data if (count == 0xf) { return; } do { ushort flags = readBitfield(bitStream); ushort outv = bitStream.Read(packetLength); write2(output, (ushort)(outv | flags)); } while (--count >= 0); } break; } } else { bool mode = bitStream.Get(); int count = bitStream.Read(4); if (mode) { do { write2(output, commonValue); } while (--count >= 0); } else { do { write2(output, incrementingValue++); } while (--count >= 0); } } } } }
private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength) { var rleSource = new List <NibbleRun>(); var counts = new SortedList <NibbleRun, long>(); using (IEnumerator <byte> unpacked = Unpacked(input)) { // Build RLE nibble runs, RLE-encoding the nibble runs as we go along. // Maximum run length is 8, meaning 7 repetitions. if (unpacked.MoveNext()) { NibbleRun current = new NibbleRun(unpacked.Current, 0); while (unpacked.MoveNext()) { NibbleRun next = new NibbleRun(unpacked.Current, 0); if (next.Nibble != current.Nibble || current.Count >= 7) { rleSource.Add(current); long count; counts.TryGetValue(current, out count); counts[current] = count + 1; current = next; } else { ++current.Count; } } } } // We will use the Package-merge algorithm to build the optimal length-limited // Huffman code for the current file. To do this, we must map the current // problem onto the Coin Collector's problem. // Build the basic coin collection. var qt = new List <EncodingCodeTreeNode>(); foreach (var kvp in counts) { // No point in including anything with weight less than 2, as they // would actually increase compressed file size if used. if (kvp.Value > 1) { qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value)); } } qt.Sort(); // The base coin collection for the length-limited Huffman coding has // one coin list per character in length of the limmitation. Each coin list // has a constant "face value", and each coin in a list has its own // "numismatic value". The "face value" is unimportant in the way the code // is structured below; the "numismatic value" of each coin is the number // of times the underlying nibble run appears in the source file. // This will hold the Huffman code map. // NOTE: while the codes that will be written in the header will not be // longer than 8 bits, it is possible that a supplementary code map will // add "fake" codes that are longer than 8 bits. var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); // Size estimate. This is used to build the optimal compressed file. long sizeEstimate = long.MaxValue; // We will solve the Coin Collector's problem several times, each time // ignoring more of the least frequent nibble runs. This allows us to find // *the* lowest file size. while (qt.Count > 1) { // Make a copy of the basic coin collection. var q0 = new List <EncodingCodeTreeNode>(qt); // Ignore the lowest weighted item. Will only affect the next iteration // of the loop. If it can be proven that there is a single global // minimum (and no local minima for file size), then this could be // simplified to a binary search. qt.RemoveAt(qt.Count - 1); // We now solve the Coin collector's problem using the Package-merge // algorithm. The solution goes here. var solution = new List <EncodingCodeTreeNode>(); // This holds the packages from the last iteration. var q = new List <EncodingCodeTreeNode>(q0); int target = (q0.Count - 1) << 8, idx = 0; while (target != 0) { // Gets lowest bit set in its proper place: int val = (target & -target), r = 1 << idx; // Is the current denomination equal to the least denomination? if (r == val) { // If yes, take the least valuable node and put it into the solution. solution.Add(q[q.Count - 1]); q.RemoveAt(q.Count - 1); target -= r; } // The coin collection has coins of values 1 to 8; copy from the // original in those cases for the next step. var q1 = new List <EncodingCodeTreeNode>(); if (idx < 7) { q1.AddRange(q0); } // Split the current list into pairs and insert the packages into // the next list. while (q.Count > 1) { EncodingCodeTreeNode child1 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); EncodingCodeTreeNode child0 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); q1.Add(new EncodingCodeTreeNode(child0, child1)); } idx++; q.Clear(); q.AddRange(q1); q.Sort(); } // The Coin Collector's problem has been solved. Now it is time to // map the solution back into the length-limited Huffman coding problem. // To do that, we iterate through the solution and count how many times // each nibble run has been used (remember that the coin collection had // had multiple coins associated with each nibble run) -- this number // is the optimal bit length for the nibble run. var baseSizeMap = new SortedList <NibbleRun, long>(); foreach (var item in solution) { item.Traverse(baseSizeMap); } // With the length-limited Huffman coding problem solved, it is now time // to build the code table. As input, we have a map associating a nibble // run to its optimal encoded bit length. We will build the codes using // the canonical Huffman code. // To do that, we must invert the size map so we can sort it by code size. var sizeOnlyMap = new MultiSet <long>(); // This map contains lots more information, and is used to associate // the nibble run with its optimal code. It is sorted by code size, // then by frequency of the nibble run, then by the nibble run. var sizeMap = new MultiSet <SizeMapItem>(); foreach (var item in baseSizeMap) { long size = item.Value; sizeOnlyMap.Add(size); sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key)); } // We now build the canonical Huffman code table. // "baseCode" is the code for the first nibble run with a given bit length. // "carry" is how many nibble runs were demoted to a higher bit length // at an earlier step. // "cnt" is how many nibble runs have a given bit length. long baseCode = 0; long carry = 0, cnt; // This list contains the codes sorted by size. var codes = new List <KeyValuePair <long, byte> >(); for (byte j = 1; j <= 8; j++) { // How many nibble runs have the desired bit length. cnt = sizeOnlyMap.Count(j) + carry; carry = 0; for (int k = 0; k < cnt; k++) { // Sequential binary numbers for codes. long code = baseCode + k; long mask = (1L << j) - 1; // We do not want any codes composed solely of 1's or which // start with 111111, as that sequence is reserved. if ((j <= 6 && code == mask) || (j > 6 && code == (mask & ~((1L << (j - 6)) - 1)))) { // We must demote this many nibble runs to a longer code. carry = cnt - k; cnt = k; break; } codes.Add(new KeyValuePair <long, byte>(code, j)); } // This is the beginning bit pattern for the next bit length. baseCode = (baseCode + cnt) << 1; } // With the canonical table build, the codemap can finally be built. var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator()) { int pos = 0; while (enumerator.MoveNext() && pos < codes.Count) { tempCodemap[enumerator.Current.NibbleRun] = codes[pos]; ++pos; } } // We now compute the final file size for this code table. // 2 bytes at the start of the file, plus 1 byte at the end of the // code table. long tempsize_est = 3 * 8; byte last = 0xff; // Start with any nibble runs with their own code. foreach (var item in tempCodemap) { // Each new nibble needs an extra byte. if (item.Key.Nibble != last) { tempsize_est += 8; last = item.Key.Nibble; } // 2 bytes per nibble run in the table. tempsize_est += 2 * 8; // How many bits this nibble run uses in the file. tempsize_est += counts[item.Key] * item.Value.Value; } // Supplementary code map for the nibble runs that can be broken up into // shorter nibble runs with a smaller bit length than inlining. var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >(); // Now we will compute the size requirements for inline nibble runs. foreach (var item in counts) { if (!tempCodemap.ContainsKey(item.Key)) { // Nibble run does not have its own code. We need to find out if // we can break it up into smaller nibble runs with total code // size less than 13 bits or if we need to inline it (13 bits). if (item.Key.Count == 0) { // If this is a nibble run with zero repeats, we can't break // it up into smaller runs, so we inline it. tempsize_est += (6 + 7) * item.Value; } else if (item.Key.Count == 1) { // We stand a chance of breaking the nibble run. // This case is rather trivial, so we hard-code it. // We can break this up only as 2 consecutive runs of a nibble // run with count == 0. KeyValuePair <long, byte> value; if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6) { // The smaller nibble run either does not have its own code // or it results in a longer bit code when doubled up than // would result from inlining the run. In either case, we // inline the nibble run. tempsize_est += (6 + 7) * item.Value; } else { // The smaller nibble run has a small enough code that it is // more efficient to use it twice than to inline our nibble // run. So we do exactly that, by adding a (temporary) entry // in the supplementary codemap, which will later be merged // into the main codemap. long code = value.Key; byte len = value.Value; code = (code << len) | code; len <<= 1; tempsize_est += len * item.Value; supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len)); } } else { // We stand a chance of breaking the nibble run. byte n = item.Key.Count; // This is a linear optimization problem subjected to 2 // constraints. If the number of repeats of the current nibble // run is N, then we have N dimensions. // Reference to table of linear coefficients. This table has // N columns for each line. byte[,] myLinearCoeffs = linearCoeffs[n - 2]; int rows = myLinearCoeffs.GetLength(0); byte nibble = item.Key.Nibble; // List containing the code length of each nibble run, or 13 // if the nibble run is not in the codemap. var runlen = new List <long>(); // Initialize the list. for (byte i = 0; i < n; i++) { // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is. // Put code length in the vector. runlen.Add(value.Value); } else { // It is not. // Put inline length in the vector. runlen.Add(6 + 7); } } // Now go through the linear coefficient table and tally up // the total code size, looking for the best case. // The best size is initialized to be the inlined case. long bestSize = 6 + 7; int bestLine = -1; for (int i = 0; i < rows; i++) { // Tally up the code length for this coefficient line. long len = 0; for (byte j = 0; j < n; j++) { byte c = myLinearCoeffs[i, j]; if (c == 0) { continue; } len += c * runlen[j]; } // Is the length better than the best yet? if (len < bestSize) { // If yes, store it as the best. bestSize = len; bestLine = i; } } // Have we found a better code than inlining? if (bestLine >= 0) { // We have; use it. To do so, we have to build the code // and add it to the supplementary code table. long code = 0, len = 0; for (byte i = 0; i < n; i++) { byte c = myLinearCoeffs[bestLine, i]; if (c == 0) { continue; } // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is; it MUST be, as the other case is impossible // by construction. for (int j = 0; j < c; j++) { len += value.Value; code <<= value.Value; code |= value.Key; } } } if (len != bestSize) { // ERROR! DANGER! THIS IS IMPOSSIBLE! // But just in case... tempsize_est += (6 + 7) * item.Value; } else { // By construction, best_size is at most 12. byte c = (byte)bestSize; // Add it to supplementary code map. supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c)); tempsize_est += bestSize * item.Value; } } else { // No, we will have to inline it. tempsize_est += (6 + 7) * item.Value; } } } } // Merge the supplementary code map into the temporary code map. foreach (var item in supCodemap) { tempCodemap[item.Key] = item.Value; } // Round up to a full byte. tempsize_est = (tempsize_est + 7) & ~7; // Is this iteration better than the best? if (tempsize_est < sizeEstimate) { // If yes, save the codemap and file size. codeMap = tempCodemap; sizeEstimate = tempsize_est; } } // We now have a prefix-free code map associating the RLE-encoded nibble // runs with their code. Now we write the file. // Write header. BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5))); byte lastNibble = 0xff; foreach (var item in codeMap) { byte length = item.Value.Value; // length with bit 7 set is a special device for further reducing file size, and // should NOT be on the table. if ((length & 0x80) != 0) { continue; } NibbleRun nibbleRun = item.Key; if (nibbleRun.Nibble != lastNibble) { // 0x80 marks byte as setting a new nibble. NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble)); lastNibble = nibbleRun.Nibble; } long code = item.Value.Key; NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length)); NeutralEndian.Write1(output, (byte)code); } // Mark end of header. NeutralEndian.Write1(output, 0xff); // Write the encoded bitstream. UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output); // The RLE-encoded source makes for a far faster encode as we simply // use the nibble runs as an index into the map, meaning a quick binary // search gives us the code to use (if in the map) or tells us that we // need to use inline RLE. foreach (var nibbleRun in rleSource) { KeyValuePair <long, byte> value; if (codeMap.TryGetValue(nibbleRun, out value)) { long code = value.Key; byte len = value.Value; // len with bit 7 set is a device to bypass the code table at the // start of the file. We need to clear the bit here before writing // the code to the file. len &= 0x7f; // We can have codes in the 9-12 range due to the break up of large // inlined runs into smaller non-inlined runs. Deal with those high // bits first, if needed. if (len > 8) { bitStream.Write((byte)((code >> 8) & 0xff), len - 8); len = 8; } bitStream.Write((byte)(code & 0xff), len); } else { bitStream.Write(0x3f, 6); bitStream.Write(nibbleRun.Count, 3); bitStream.Write(nibbleRun.Nibble, 4); } } // Fill remainder of last byte with zeroes and write if needed. bitStream.Flush(false); }
internal static void Encode(Stream source, Stream destination) { int size_bytes = (int)(source.Length - source.Position); byte[] buffer_bytes = new byte[size_bytes + (size_bytes & 1)]; source.Read(buffer_bytes, 0, size_bytes); int size = (size_bytes + 1) / 2; ushort[] buffer = new ushort[size]; for (int i = 0; i < size; ++i) { buffer[i] = (ushort)((buffer_bytes[i * 2] << 8) | buffer_bytes[(i * 2) + 1]); } /* * Here we create and populate the "LZSS graph": * * Each value in the uncompressed file forms a node in this graph. * The various edges between these nodes represent LZSS matches. * * Using a shortest-path algorithm, these edges can be used to * find the optimal combination of matches needed to produce the * smallest possible file. * * The outputted array only contains one edge per node: the optimal * one. This means, in order to produce the smallest file, you just * have to traverse the graph from one edge to the next, encoding * each match as you go along. */ LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[size + 1]; // Initialise the array node_meta_array[0].cost = 0; for (int i = 1; i < size + 1; ++i) { node_meta_array[i].cost = int.MaxValue; } // Find matches for (int i = 0; i < size; ++i) { int max_read_ahead = Math.Min(0x100, size - i); int max_read_behind = Math.Max(0, i - 0x100); // Search for dictionary matches for (int j = i; j-- > max_read_behind;) { for (int k = 0; k < max_read_ahead; ++k) { if (buffer[i + k] == buffer[j + k]) { int distance = i - j; int length = k + 1; // Update this node's optimal edge if this one is better if (node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = j; } } else { break; } } } // Do literal match // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster) if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 16) { node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + 1].previous_node_index = i; node_meta_array[i + 1].match_length = 0; } } // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa node_meta_array[0].previous_node_index = int.MaxValue; node_meta_array[size].next_node_index = int.MaxValue; for (int node_index = size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index) { node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index; } /* * LZSS graph complete */ UInt16BE_NE_H_OutputBitStream bitStream = new UInt16BE_NE_H_OutputBitStream(destination); MemoryStream data = new MemoryStream(); for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index) { int next_index = node_meta_array[node_index].next_node_index; int length = node_meta_array[next_index].match_length; int distance = next_index - node_meta_array[next_index].match_length - node_meta_array[next_index].match_offset; if (length != 0) { // Compressed Push(bitStream, true, destination, data); NeutralEndian.Write1(data, (byte)-distance); NeutralEndian.Write1(data, (byte)(length - 1)); } else { // Uncompressed Push(bitStream, false, destination, data); BigEndian.Write2(data, buffer[node_index]); } } Push(bitStream, true, destination, data); NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }