private void CheckBuffer() { if (this.remainingBits == 0) { this.byteBuffer = NeutralEndian.Read1(stream); this.remainingBits = 8; } }
public UInt8InputBitStream(Stream stream) { if (stream == null) { throw new ArgumentNullException("stream"); } this.stream = stream; this.remainingBits = 8; this.byteBuffer = NeutralEndian.Read1(stream); }
public override bool Push(bool bit) { this.byteBuffer |= (byte)(Convert.ToByte(bit) << this.waitingBits); if (++this.waitingBits >= 8) { NeutralEndian.Write1(this.stream, this.byteBuffer); this.waitingBits = 0; this.byteBuffer = 0; return(true); } return(false); }
public override bool Write(byte data, int size) { if (this.waitingBits + size >= 8) { int delta = 8 - this.waitingBits; this.waitingBits = (this.waitingBits + size) % 8; NeutralEndian.Write1(this.stream, (byte)((this.byteBuffer << delta) | (data >> this.waitingBits))); this.byteBuffer = data; return(true); } this.byteBuffer <<= size; this.byteBuffer |= data; this.waitingBits += size; return(false); }
public override bool Flush(bool unchanged) { if (this.waitingBits != 0) { if (!unchanged) { this.byteBuffer <<= 8 - this.waitingBits; } NeutralEndian.Write1(this.stream, this.byteBuffer); this.waitingBits = 0; return(true); } return(false); }
public override byte Read(int count) { this.CheckBuffer(); if (this.remainingBits < count) { int delta = count - this.remainingBits; byte lowBits = (byte)(this.byteBuffer << delta); this.byteBuffer = NeutralEndian.Read1(stream); this.remainingBits = 8 - delta; ushort highBits = (byte)(this.byteBuffer >> this.remainingBits); this.byteBuffer ^= (byte)(highBits << this.remainingBits); return((byte)(lowBits | highBits)); } this.remainingBits -= count; byte bits = (byte)(this.byteBuffer >> this.remainingBits); this.byteBuffer ^= (byte)(bits << this.remainingBits); return(bits); }
public override bool Push(bool bit) { bool flushed = false; if (this.waitingBits >= 8) { NeutralEndian.Write1(this.stream, this.byteBuffer); this.waitingBits = 0; this.byteBuffer = 0; flushed = true; } if (bit) { this.byteBuffer |= (byte)(1 << this.waitingBits); } ++this.waitingBits; return(flushed); }
private static void DecodeHeader(Stream input, Stream output, DecodingCodeTreeNode codeTree) { byte outputValue = 0; byte inputValue; // Loop until a byte with value 0xFF is encountered while ((inputValue = NeutralEndian.Read1(input)) != 0xFF) { if ((inputValue & 0x80) != 0) { outputValue = (byte)(inputValue & 0xF); inputValue = NeutralEndian.Read1(input); } codeTree.SetCode( NeutralEndian.Read1(input), inputValue & 0xF, new NibbleRun(outputValue, (byte)(((inputValue & 0x70) >> 4) + 1))); } // Store a special nibble run for inline RLE sequences (code = 0b111111, length = 6) // Length = 0xFF in the nibble run is just a marker value that will be handled specially in DecodeInternal codeTree.SetCode(0x3F, 6, new NibbleRun(0, 0xFF)); }
private static void Decode(Stream input, Stream output, long size) { long end = input.Position + size; UInt8_NE_L_InputBitStream bitStream = new UInt8_NE_L_InputBitStream(input); List <byte> outputBuffer = new List <byte>(); while (input.Position < end) { if (bitStream.Pop()) { if (input.Position >= end) { break; } outputBuffer.Add(NeutralEndian.Read1(input)); } else { if (input.Position >= end) { break; } int offset = NeutralEndian.Read1(input); if (input.Position >= end) { break; } byte count = NeutralEndian.Read1(input); // We've just read 2 bytes: %llllllll %hhhhcccc // offset = %hhhhllllllll + 0x12, count = %cccc + 3 offset |= (ushort)((count & 0xF0) << 4); offset += 0x12; offset &= 0xFFF; offset |= (ushort)(outputBuffer.Count & 0xF000); count &= 0xF; count += 3; if (offset >= outputBuffer.Count) { offset -= 0x1000; } outputBuffer.AddRange(new byte[count]); if (offset < 0) { // Zero-fill for (int destinationIndex = outputBuffer.Count - count; destinationIndex < outputBuffer.Count; ++destinationIndex) { outputBuffer[destinationIndex] = 0; } } else { // Dictionary reference if (offset < outputBuffer.Count) { for (int sourceIndex = offset, destinationIndex = outputBuffer.Count - count; destinationIndex < outputBuffer.Count; sourceIndex++, destinationIndex++) { outputBuffer[destinationIndex] = outputBuffer[sourceIndex]; } } } } } byte[] bytes = outputBuffer.ToArray(); output.Write(bytes, 0, bytes.Length); }
private static void EncodeInternal(Stream destination, byte[] buffer, long pos, long slidingWindow, long recLength, long size) { UInt16LEOutputBitStream bitStream = new UInt16LEOutputBitStream(destination); MemoryStream data = new MemoryStream(); if (size > 0) { long bPointer = 1, iOffset = 0; bitStream.Push(true); NeutralEndian.Write1(data, buffer[pos]); while (bPointer < size) { long iCount = Math.Min(recLength, size - bPointer); long iMax = Math.Max(bPointer - slidingWindow, 0); long k = 1; long i = bPointer - 1; do { long j = 0; while (buffer[pos + i + j] == buffer[pos + bPointer + j]) { if (++j >= iCount) { break; } } if (j > k) { k = j; iOffset = i; } } while (i-- > iMax); iCount = k; if (iCount == 1) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); } else if (iCount == 2 && bPointer - iOffset > 256) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); --iCount; } else if (iCount < 6 && bPointer - iOffset <= 256) { Push(bitStream, false, destination, data); Push(bitStream, false, destination, data); Push(bitStream, (((iCount - 2) >> 1) & 1) != 0, destination, data); Push(bitStream, ((iCount - 2) & 1) != 0, destination, data); NeutralEndian.Write1(data, (byte)(~(bPointer - iOffset - 1))); } else { Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); long off = bPointer - iOffset - 1; ushort info = (ushort)(~((off << 8) | (off >> 5)) & 0xFFF8); if (iCount < 10) // iCount - 2 < 8 { info |= (ushort)(iCount - 2); BigEndian.Write2(data, info); } else { BigEndian.Write2(data, info); NeutralEndian.Write1(data, (byte)(iCount - 1)); } } bPointer += iCount; } } Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); // If the bit stream was just flushed, write an empty bit stream that will be read just before the end-of-data // sequence below. if (!bitStream.HasWaitingBits) { NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); } NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0xF0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }
private static void Encode(Stream input, Stream output, Endianness endianness) { Action <Stream, ushort> write2; OutputBitStream <ushort> bitStream; if (endianness == Endianness.BigEndian) { write2 = Write2BE; bitStream = new UInt16BE_E_L_OutputBitStream(output); } else { write2 = Write2LE; bitStream = new UInt16LE_E_L_OutputBitStream(output); } // To unpack source into 2-byte words. ushort[] words = new ushort[(input.Length - input.Position) / 2]; if (words.Length == 0) { throw new CompressionException(Properties.Resources.EmptySource); } // Frequency map. SortedList <ushort, long> counts = new SortedList <ushort, long>(); // Presence map. HashSet <ushort> elements = new HashSet <ushort>(); // Unpack source into array. Along the way, build frequency and presence maps. ushort maskValue = 0; { byte[] buffer = new byte[2]; int i = 0, bytesRead; while ((bytesRead = input.Read(buffer, 0, 2)) == 2) { ushort v = (ushort)(buffer[0] << 8 | buffer[1]); maskValue |= v; long count; counts.TryGetValue(v, out count); counts[v] = count + 1; elements.Add(v); words[i++] = v; } } var writeBitfield = GetBitfieldWriter((byte)(maskValue >> 11)); byte packetLength = (byte)(Log2((ushort)(maskValue & 0x7ff)) + 1); // Find the most common 2-byte value. ushort commonValue = FindMostFrequentWord(counts); // Find incrementing (not necessarily contiguous) runs. // The original algorithm does this for all 65536 2-byte words, while // this version only checks the 2-byte words actually in the file. SortedList <ushort, long> runs = new SortedList <ushort, long>(); foreach (ushort element in elements) { ushort next = element; long runLength = 0; foreach (ushort word in words) { if (word == next) { ++next; ++runLength; } } runs[element] = runLength; } // Find the starting 2-byte value with the longest incrementing run. ushort incrementingValue = FindMostFrequentWord(runs); // Output header. NeutralEndian.Write1(output, packetLength); NeutralEndian.Write1(output, (byte)(maskValue >> 11)); write2(output, incrementingValue); write2(output, commonValue); // Output compressed data. List <ushort> buf = new List <ushort>(); int pos = 0; while (pos < words.Length) { ushort v = words[pos]; if (v == incrementingValue) { FlushBuffer(buf, bitStream, writeBitfield, packetLength); ushort next = (ushort)(v + 1); ushort count = 0; for (int i = pos + 1; i < words.Length && count < 0xf; i++) { if (next != words[i]) { break; } ++next; ++count; } bitStream.Write((ushort)(0x00 | count), 6); incrementingValue = next; pos += count; } else if (v == commonValue) { FlushBuffer(buf, bitStream, writeBitfield, packetLength); ushort count = 0; for (int i = pos + 1; i < words.Length && count < 0xf; i++) { if (v != words[i]) { break; } ++count; } bitStream.Write((ushort)(0x10 | count), 6); pos += count; } else { ushort next; int delta; if (pos + 1 < words.Length && (next = words[pos + 1]) != incrementingValue && ((delta = (int)next - (int)v) == -1 || delta == 0 || delta == 1)) { FlushBuffer(buf, bitStream, writeBitfield, packetLength); ushort count = 1; next = (ushort)(next + delta); for (int i = pos + 2; i < words.Length && count < 0xf; i++) { if (next != words[i]) { break; } // If the word is equal to the incrementing word value, stop this run early so we can use the // incrementing value in the next iteration of the main loop. if (words[i] == incrementingValue) { break; } next = (ushort)(next + delta); ++count; } if (delta == -1) { delta = 2; } delta |= 4; delta <<= 4; bitStream.Write((ushort)(delta | count), 7); writeBitfield(bitStream, v); bitStream.Write((ushort)(v & 0x7ff), packetLength); pos += count; } else { if (buf.Count >= 0xf) { FlushBuffer(buf, bitStream, writeBitfield, packetLength); } buf.Add(v); } } ++pos; } FlushBuffer(buf, bitStream, writeBitfield, packetLength); // Terminator bitStream.Write(0x7f, 7); bitStream.Flush(false); }
private static void Decode(Stream input, Stream output, Endianness endianness) { using (PaddedStream paddedInput = new PaddedStream(input, 2, PaddedStreamMode.Read)) { byte packetLength = NeutralEndian.Read1(paddedInput); var readBitfield = GetBitfieldReader(NeutralEndian.Read1(paddedInput)); ushort incrementingValue; ushort commonValue; InputBitStream <ushort> bitStream; Action <Stream, ushort> write2; if (endianness == Endianness.BigEndian) { incrementingValue = BigEndian.Read2(paddedInput); commonValue = BigEndian.Read2(paddedInput); bitStream = new UInt16BE_E_L_InputBitStream(paddedInput); write2 = Write2BE; } else { incrementingValue = LittleEndian.Read2(paddedInput); commonValue = LittleEndian.Read2(paddedInput); bitStream = new UInt16LE_E_L_InputBitStream(paddedInput); write2 = Write2LE; } // Loop until the end-of-data marker is found (if it is not found before the end of the stream, UInt8InputBitStream // will throw an exception) for (; ;) { if (bitStream.Get()) { int mode = bitStream.Read(2); int count = bitStream.Read(4); switch (mode) { case 0: case 1: { ushort flags = readBitfield(bitStream); ushort outv = (ushort)(bitStream.Read(packetLength) | flags); do { write2(output, outv); outv += (ushort)mode; } while (--count >= 0); } break; case 2: mode = -1; goto case 0; case 3: { // End of compressed data if (count == 0xf) { return; } do { ushort flags = readBitfield(bitStream); ushort outv = bitStream.Read(packetLength); write2(output, (ushort)(outv | flags)); } while (--count >= 0); } break; } } else { bool mode = bitStream.Get(); int count = bitStream.Read(4); if (mode) { do { write2(output, commonValue); } while (--count >= 0); } else { do { write2(output, incrementingValue++); } while (--count >= 0); } } } } }
private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength) { var rleSource = new List <NibbleRun>(); var counts = new SortedList <NibbleRun, long>(); using (IEnumerator <byte> unpacked = Unpacked(input)) { // Build RLE nibble runs, RLE-encoding the nibble runs as we go along. // Maximum run length is 8, meaning 7 repetitions. if (unpacked.MoveNext()) { NibbleRun current = new NibbleRun(unpacked.Current, 0); while (unpacked.MoveNext()) { NibbleRun next = new NibbleRun(unpacked.Current, 0); if (next.Nibble != current.Nibble || current.Count >= 7) { rleSource.Add(current); long count; counts.TryGetValue(current, out count); counts[current] = count + 1; current = next; } else { ++current.Count; } } } } // We will use the Package-merge algorithm to build the optimal length-limited // Huffman code for the current file. To do this, we must map the current // problem onto the Coin Collector's problem. // Build the basic coin collection. var qt = new List <EncodingCodeTreeNode>(); foreach (var kvp in counts) { // No point in including anything with weight less than 2, as they // would actually increase compressed file size if used. if (kvp.Value > 1) { qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value)); } } qt.Sort(); // The base coin collection for the length-limited Huffman coding has // one coin list per character in length of the limmitation. Each coin list // has a constant "face value", and each coin in a list has its own // "numismatic value". The "face value" is unimportant in the way the code // is structured below; the "numismatic value" of each coin is the number // of times the underlying nibble run appears in the source file. // This will hold the Huffman code map. // NOTE: while the codes that will be written in the header will not be // longer than 8 bits, it is possible that a supplementary code map will // add "fake" codes that are longer than 8 bits. var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); // Size estimate. This is used to build the optimal compressed file. long sizeEstimate = long.MaxValue; // We will solve the Coin Collector's problem several times, each time // ignoring more of the least frequent nibble runs. This allows us to find // *the* lowest file size. while (qt.Count > 1) { // Make a copy of the basic coin collection. var q0 = new List <EncodingCodeTreeNode>(qt); // Ignore the lowest weighted item. Will only affect the next iteration // of the loop. If it can be proven that there is a single global // minimum (and no local minima for file size), then this could be // simplified to a binary search. qt.RemoveAt(qt.Count - 1); // We now solve the Coin collector's problem using the Package-merge // algorithm. The solution goes here. var solution = new List <EncodingCodeTreeNode>(); // This holds the packages from the last iteration. var q = new List <EncodingCodeTreeNode>(q0); int target = (q0.Count - 1) << 8, idx = 0; while (target != 0) { // Gets lowest bit set in its proper place: int val = (target & -target), r = 1 << idx; // Is the current denomination equal to the least denomination? if (r == val) { // If yes, take the least valuable node and put it into the solution. solution.Add(q[q.Count - 1]); q.RemoveAt(q.Count - 1); target -= r; } // The coin collection has coins of values 1 to 8; copy from the // original in those cases for the next step. var q1 = new List <EncodingCodeTreeNode>(); if (idx < 7) { q1.AddRange(q0); } // Split the current list into pairs and insert the packages into // the next list. while (q.Count > 1) { EncodingCodeTreeNode child1 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); EncodingCodeTreeNode child0 = q[q.Count - 1]; q.RemoveAt(q.Count - 1); q1.Add(new EncodingCodeTreeNode(child0, child1)); } idx++; q.Clear(); q.AddRange(q1); q.Sort(); } // The Coin Collector's problem has been solved. Now it is time to // map the solution back into the length-limited Huffman coding problem. // To do that, we iterate through the solution and count how many times // each nibble run has been used (remember that the coin collection had // had multiple coins associated with each nibble run) -- this number // is the optimal bit length for the nibble run. var baseSizeMap = new SortedList <NibbleRun, long>(); foreach (var item in solution) { item.Traverse(baseSizeMap); } // With the length-limited Huffman coding problem solved, it is now time // to build the code table. As input, we have a map associating a nibble // run to its optimal encoded bit length. We will build the codes using // the canonical Huffman code. // To do that, we must invert the size map so we can sort it by code size. var sizeOnlyMap = new MultiSet <long>(); // This map contains lots more information, and is used to associate // the nibble run with its optimal code. It is sorted by code size, // then by frequency of the nibble run, then by the nibble run. var sizeMap = new MultiSet <SizeMapItem>(); foreach (var item in baseSizeMap) { long size = item.Value; sizeOnlyMap.Add(size); sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key)); } // We now build the canonical Huffman code table. // "baseCode" is the code for the first nibble run with a given bit length. // "carry" is how many nibble runs were demoted to a higher bit length // at an earlier step. // "cnt" is how many nibble runs have a given bit length. long baseCode = 0; long carry = 0, cnt; // This list contains the codes sorted by size. var codes = new List <KeyValuePair <long, byte> >(); for (byte j = 1; j <= 8; j++) { // How many nibble runs have the desired bit length. cnt = sizeOnlyMap.Count(j) + carry; carry = 0; for (int k = 0; k < cnt; k++) { // Sequential binary numbers for codes. long code = baseCode + k; long mask = (1L << j) - 1; // We do not want any codes composed solely of 1's or which // start with 111111, as that sequence is reserved. if ((j <= 6 && code == mask) || (j > 6 && code == (mask & ~((1L << (j - 6)) - 1)))) { // We must demote this many nibble runs to a longer code. carry = cnt - k; cnt = k; break; } codes.Add(new KeyValuePair <long, byte>(code, j)); } // This is the beginning bit pattern for the next bit length. baseCode = (baseCode + cnt) << 1; } // With the canonical table build, the codemap can finally be built. var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >(); using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator()) { int pos = 0; while (enumerator.MoveNext() && pos < codes.Count) { tempCodemap[enumerator.Current.NibbleRun] = codes[pos]; ++pos; } } // We now compute the final file size for this code table. // 2 bytes at the start of the file, plus 1 byte at the end of the // code table. long tempsize_est = 3 * 8; byte last = 0xff; // Start with any nibble runs with their own code. foreach (var item in tempCodemap) { // Each new nibble needs an extra byte. if (item.Key.Nibble != last) { tempsize_est += 8; last = item.Key.Nibble; } // 2 bytes per nibble run in the table. tempsize_est += 2 * 8; // How many bits this nibble run uses in the file. tempsize_est += counts[item.Key] * item.Value.Value; } // Supplementary code map for the nibble runs that can be broken up into // shorter nibble runs with a smaller bit length than inlining. var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >(); // Now we will compute the size requirements for inline nibble runs. foreach (var item in counts) { if (!tempCodemap.ContainsKey(item.Key)) { // Nibble run does not have its own code. We need to find out if // we can break it up into smaller nibble runs with total code // size less than 13 bits or if we need to inline it (13 bits). if (item.Key.Count == 0) { // If this is a nibble run with zero repeats, we can't break // it up into smaller runs, so we inline it. tempsize_est += (6 + 7) * item.Value; } else if (item.Key.Count == 1) { // We stand a chance of breaking the nibble run. // This case is rather trivial, so we hard-code it. // We can break this up only as 2 consecutive runs of a nibble // run with count == 0. KeyValuePair <long, byte> value; if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6) { // The smaller nibble run either does not have its own code // or it results in a longer bit code when doubled up than // would result from inlining the run. In either case, we // inline the nibble run. tempsize_est += (6 + 7) * item.Value; } else { // The smaller nibble run has a small enough code that it is // more efficient to use it twice than to inline our nibble // run. So we do exactly that, by adding a (temporary) entry // in the supplementary codemap, which will later be merged // into the main codemap. long code = value.Key; byte len = value.Value; code = (code << len) | code; len <<= 1; tempsize_est += len * item.Value; supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len)); } } else { // We stand a chance of breaking the nibble run. byte n = item.Key.Count; // This is a linear optimization problem subjected to 2 // constraints. If the number of repeats of the current nibble // run is N, then we have N dimensions. // Reference to table of linear coefficients. This table has // N columns for each line. byte[,] myLinearCoeffs = linearCoeffs[n - 2]; int rows = myLinearCoeffs.GetLength(0); byte nibble = item.Key.Nibble; // List containing the code length of each nibble run, or 13 // if the nibble run is not in the codemap. var runlen = new List <long>(); // Initialize the list. for (byte i = 0; i < n; i++) { // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is. // Put code length in the vector. runlen.Add(value.Value); } else { // It is not. // Put inline length in the vector. runlen.Add(6 + 7); } } // Now go through the linear coefficient table and tally up // the total code size, looking for the best case. // The best size is initialized to be the inlined case. long bestSize = 6 + 7; int bestLine = -1; for (int i = 0; i < rows; i++) { // Tally up the code length for this coefficient line. long len = 0; for (byte j = 0; j < n; j++) { byte c = myLinearCoeffs[i, j]; if (c == 0) { continue; } len += c * runlen[j]; } // Is the length better than the best yet? if (len < bestSize) { // If yes, store it as the best. bestSize = len; bestLine = i; } } // Have we found a better code than inlining? if (bestLine >= 0) { // We have; use it. To do so, we have to build the code // and add it to the supplementary code table. long code = 0, len = 0; for (byte i = 0; i < n; i++) { byte c = myLinearCoeffs[bestLine, i]; if (c == 0) { continue; } // Is this run in the codemap? KeyValuePair <long, byte> value; if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value)) { // It is; it MUST be, as the other case is impossible // by construction. for (int j = 0; j < c; j++) { len += value.Value; code <<= value.Value; code |= value.Key; } } } if (len != bestSize) { // ERROR! DANGER! THIS IS IMPOSSIBLE! // But just in case... tempsize_est += (6 + 7) * item.Value; } else { // By construction, best_size is at most 12. byte c = (byte)bestSize; // Add it to supplementary code map. supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c)); tempsize_est += bestSize * item.Value; } } else { // No, we will have to inline it. tempsize_est += (6 + 7) * item.Value; } } } } // Merge the supplementary code map into the temporary code map. foreach (var item in supCodemap) { tempCodemap[item.Key] = item.Value; } // Round up to a full byte. tempsize_est = (tempsize_est + 7) & ~7; // Is this iteration better than the best? if (tempsize_est < sizeEstimate) { // If yes, save the codemap and file size. codeMap = tempCodemap; sizeEstimate = tempsize_est; } } // We now have a prefix-free code map associating the RLE-encoded nibble // runs with their code. Now we write the file. // Write header. BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5))); byte lastNibble = 0xff; foreach (var item in codeMap) { byte length = item.Value.Value; // length with bit 7 set is a special device for further reducing file size, and // should NOT be on the table. if ((length & 0x80) != 0) { continue; } NibbleRun nibbleRun = item.Key; if (nibbleRun.Nibble != lastNibble) { // 0x80 marks byte as setting a new nibble. NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble)); lastNibble = nibbleRun.Nibble; } long code = item.Value.Key; NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length)); NeutralEndian.Write1(output, (byte)code); } // Mark end of header. NeutralEndian.Write1(output, 0xff); // Write the encoded bitstream. UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output); // The RLE-encoded source makes for a far faster encode as we simply // use the nibble runs as an index into the map, meaning a quick binary // search gives us the code to use (if in the map) or tells us that we // need to use inline RLE. foreach (var nibbleRun in rleSource) { KeyValuePair <long, byte> value; if (codeMap.TryGetValue(nibbleRun, out value)) { long code = value.Key; byte len = value.Value; // len with bit 7 set is a device to bypass the code table at the // start of the file. We need to clear the bit here before writing // the code to the file. len &= 0x7f; // We can have codes in the 9-12 range due to the break up of large // inlined runs into smaller non-inlined runs. Deal with those high // bits first, if needed. if (len > 8) { bitStream.Write((byte)((code >> 8) & 0xff), len - 8); len = 8; } bitStream.Write((byte)(code & 0xff), len); } else { bitStream.Write(0x3f, 6); bitStream.Write(nibbleRun.Count, 3); bitStream.Write(nibbleRun.Nibble, 4); } } // Fill remainder of last byte with zeroes and write if needed. bitStream.Flush(false); }
private static void EncodeInternal(Stream destination, byte[] buffer, long pos, long slidingWindow, long recLength, long size) { UInt8_NE_H_OutputBitStream bitStream = new UInt8_NE_H_OutputBitStream(destination); MemoryStream data = new MemoryStream(); if (size > 0) { long bPointer = 1, iOffset = 0; bitStream.Push(true); NeutralEndian.Write1(data, buffer[pos]); while (bPointer < size) { long iCount = Math.Min(recLength, size - bPointer); long iMax = Math.Max(bPointer - slidingWindow, 0); long k = 1; long i = bPointer - 1; do { long j = 0; while (buffer[pos + i + j] == buffer[pos + bPointer + j]) { if (++j >= iCount) { break; } } if (j > k) { k = j; iOffset = i; } } while (i-- > iMax); iCount = k; if (iCount == 1) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); } else if (iCount == 2 && bPointer - iOffset > 256) { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + bPointer]); --iCount; } else if (iCount < 6 && bPointer - iOffset <= 256) { Push(bitStream, false, destination, data); Push(bitStream, false, destination, data); NeutralEndian.Write1(data, (byte)(~(bPointer - iOffset - 1))); Push(bitStream, (((iCount - 2) >> 1) & 1) != 0, destination, data); Push(bitStream, ((iCount - 2) & 1) != 0, destination, data); } else { Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); long off = bPointer - iOffset - 1; ushort info = (ushort)(~((off << 8) | (off >> 5)) & 0xFFF8); if (iCount < 10) // iCount - 2 < 8 { info |= (ushort)(10 - iCount); LittleEndian.Write2(data, info); } else { LittleEndian.Write2(data, info); NeutralEndian.Write1(data, (byte)(iCount - 9)); } } bPointer += iCount; } } Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); NeutralEndian.Write1(data, 0xF0); NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }
private static void EncodeInternal(Stream destination, byte[] buffer, long slidingWindow, long recLength, long size) { UInt16BE_NE_H_OutputBitStream bitStream = new UInt16BE_NE_H_OutputBitStream(destination); MemoryStream data = new MemoryStream(); if (size > 0) { long bPointer = 2, longestMatchOffset = 0; bitStream.Push(false); NeutralEndian.Write1(data, buffer[0]); NeutralEndian.Write1(data, buffer[1]); while (bPointer < size) { long matchMax = Math.Min(recLength, size - bPointer); long backSearchMax = Math.Max(bPointer - slidingWindow, 0); long longestMatch = 2; long backSearch = bPointer; do { backSearch -= 2; long currentCount = 0; while (buffer[backSearch + currentCount] == buffer[bPointer + currentCount] && buffer[backSearch + currentCount + 1] == buffer[bPointer + currentCount + 1]) { currentCount += 2; if (currentCount >= matchMax) { // Match is as big as the look-forward buffer (or file) will let it be break; } } if (currentCount > longestMatch) { // New 'best' match longestMatch = currentCount; longestMatchOffset = backSearch; } } while (backSearch > backSearchMax); // Repeat for as far back as search buffer will let us long iCount = longestMatch / 2; // Comper counts in words (16 bits) long iOffset = (longestMatchOffset - bPointer) / 2; // Comper's offsets count in words (16-bits) if (iCount == 1) { // Symbolwise match Push(bitStream, false, destination, data); NeutralEndian.Write1(data, buffer[bPointer]); NeutralEndian.Write1(data, buffer[bPointer + 1]); } else { // Dictionary match Push(bitStream, true, destination, data); NeutralEndian.Write1(data, (byte)(iOffset)); NeutralEndian.Write1(data, (byte)(iCount - 1)); } bPointer += iCount * 2; // iCount counts in words (16-bits), so we correct it to bytes (8-bits) here } } Push(bitStream, true, destination, data); NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }
private static void DecodeInternal(Stream source, Stream destination, ref long decompressedBytes) { UInt16LEInputBitStream bitStream = new UInt16LEInputBitStream(source); for (;;) { if (bitStream.Pop()) { NeutralEndian.Write1(destination, NeutralEndian.Read1(source)); ++decompressedBytes; } else { long count = 0; long offset = 0; if (bitStream.Pop()) { byte low = NeutralEndian.Read1(source); byte high = NeutralEndian.Read1(source); count = high & 0x07; if (count == 0) { count = NeutralEndian.Read1(source); if (count == 0) { break; } if (count == 1) { continue; } } else { ++count; } offset = ~0x1FFFL | ((0xF8 & high) << 5) | low; } else { byte low = Convert.ToByte(bitStream.Pop()); byte high = Convert.ToByte(bitStream.Pop()); count = (low << 1 | high) + 1; offset = NeutralEndian.Read1(source); offset |= ~0xFFL; } for (long i = 0; i <= count; i++) { long writePosition = destination.Position; destination.Seek(writePosition + offset, SeekOrigin.Begin); byte b = NeutralEndian.Read1(destination); destination.Seek(writePosition, SeekOrigin.Begin); NeutralEndian.Write1(destination, b); } decompressedBytes += count + 1; } } }
private static void Encode(Stream input, Stream output, bool with_size) { int input_size = (int)(input.Length - input.Position); byte[] input_buffer = new byte[input_size]; input.Read(input_buffer, 0, input_size); long outputInitialPosition = output.Position; if (with_size) { output.Seek(2, SeekOrigin.Current); } /* * Here we create and populate the "LZSS graph": * * Each value in the uncompressed file forms a node in this graph. * The various edges between these nodes represent LZSS matches. * * Using a shortest-path algorithm, these edges can be used to * find the optimal combination of matches needed to produce the * smallest possible file. * * The outputted array only contains one edge per node: the optimal * one. This means, in order to produce the smallest file, you just * have to traverse the graph from one edge to the next, encoding * each match as you go along. */ LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[input_size + 1]; // Initialise the array node_meta_array[0].cost = 0; for (int i = 1; i < input_size + 1; ++i) { node_meta_array[i].cost = int.MaxValue; } // Find matches for (int i = 0; i < input_size; ++i) { int max_read_ahead = Math.Min(0xF + 3, input_size - i); int max_read_behind = Math.Max(0, i - 0x1000); // Search for zero-fill matches if (i < 0x1000) { for (int k = 0; k < 0xF + 3; ++k) { if (input_buffer[i + k] == 0) { int length = k + 1; // Update this node's optimal edge if this one is better if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = 0xFFF; } } else { break; } } } // Search for dictionary matches for (int j = i; j-- > max_read_behind;) { for (int k = 0; k < max_read_ahead; ++k) { if (input_buffer[i + k] == input_buffer[j + k]) { int distance = i - j; int length = k + 1; // Update this node's optimal edge if this one is better if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = j; } } else { break; } } } // Do literal match // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster) if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8) { node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8; node_meta_array[i + 1].previous_node_index = i; node_meta_array[i + 1].match_length = 0; } } // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa node_meta_array[0].previous_node_index = int.MaxValue; node_meta_array[input_size].next_node_index = int.MaxValue; for (int node_index = input_size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index) { node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index; } /* * LZSS graph complete */ UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output); MemoryStream data = new MemoryStream(); for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index) { int next_index = node_meta_array[node_index].next_node_index; if (node_meta_array[next_index].match_length != 0) { // Compressed Push(bitStream, false, output, data); int match_offset_adjusted = node_meta_array[next_index].match_offset - 0x12; // I don't think there's any reason for this, the format's just stupid NeutralEndian.Write1(data, (byte)(match_offset_adjusted & 0xFF)); NeutralEndian.Write1(data, (byte)(((match_offset_adjusted & 0xF00) >> 4) | ((node_meta_array[next_index].match_length - 3) & 0x0F))); } else { // Uncompressed Push(bitStream, true, output, data); NeutralEndian.Write1(data, input_buffer[node_index]); } } // Write remaining data (normally we don't flush until we have a full descriptor byte) bitStream.Flush(true); byte[] dataArray = data.ToArray(); output.Write(dataArray, 0, dataArray.Length); if (with_size) { ushort size = (ushort)(outputInitialPosition - output.Position - 2); output.Seek(outputInitialPosition, SeekOrigin.Begin); LittleEndian.Write2(output, size); } }
private static void EncodeInternal(Stream destination, byte[] buffer, int pos, int size) { /* * Here we create and populate the "LZSS graph": * * Each value in the uncompressed file forms a node in this graph. * The various edges between these nodes represent LZSS matches. * * Using a shortest-path algorithm, these edges can be used to * find the optimal combination of matches needed to produce the * smallest possible file. * * The outputted array only contains one edge per node: the optimal * one. This means, in order to produce the smallest file, you just * have to traverse the graph from one edge to the next, encoding * each match as you go along. */ LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[size + 1]; // Initialise the array node_meta_array[0].cost = 0; for (int i = 1; i < size + 1; ++i) { node_meta_array[i].cost = int.MaxValue; } // Find matches for (int i = 0; i < size; ++i) { int max_read_ahead = Math.Min(0x100 + 8, size - i); int max_read_behind = Math.Max(0, i - 0x2000); // Search for dictionary matches for (int j = i; j-- > max_read_behind;) { for (int k = 0; k < max_read_ahead; ++k) { if (buffer[pos + i + k] == buffer[pos + j + k]) { int distance = i - j; int length = k + 1; // Get the cost of the match (or bail if it can't be compressed) int cost; if (length >= 2 && length <= 5 && distance <= 256) { cost = 2 + 2 + 8; // Descriptor bits, length bits, offset byte } else if (length >= 3 && length <= 9) { cost = 2 + 16; // Descriptor bits, offset/length bytes } else if (length >= 10) { cost = 2 + 16 + 8; // Descriptor bits, offset bytes, length byte } else { continue; // In the event a match cannot be compressed } // Update this node's optimal edge if this one is better if (node_meta_array[i + k + 1].cost > node_meta_array[i].cost + cost) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + cost; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = j; } } else { break; } } } // Do literal match // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster) if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8) { node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8; node_meta_array[i + 1].previous_node_index = i; node_meta_array[i + 1].match_length = 0; } } // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa node_meta_array[0].previous_node_index = int.MaxValue; node_meta_array[size].next_node_index = int.MaxValue; for (int node_index = size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index) { node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index; } /* * LZSS graph complete */ UInt8_NE_H_OutputBitStream bitStream = new UInt8_NE_H_OutputBitStream(destination); MemoryStream data = new MemoryStream(); for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index) { int next_index = node_meta_array[node_index].next_node_index; int length = node_meta_array[next_index].match_length; int distance = next_index - node_meta_array[next_index].match_length - node_meta_array[next_index].match_offset; if (length != 0) { if (length >= 2 && length <= 5 && distance <= 256) { Push(bitStream, false, destination, data); Push(bitStream, false, destination, data); NeutralEndian.Write1(data, (byte)-distance); Push(bitStream, ((length - 2) & 2) != 0, destination, data); Push(bitStream, ((length - 2) & 1) != 0, destination, data); } else if (length >= 3 && length <= 9) { Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); NeutralEndian.Write1(data, (byte)(((-distance >> (8 - 3)) & 0xF8) | ((10 - length) & 7))); NeutralEndian.Write1(data, (byte)(-distance & 0xFF)); } else //if (length >= 3) { Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); NeutralEndian.Write1(data, (byte)((-distance >> (8 - 3)) & 0xF8)); NeutralEndian.Write1(data, (byte)(-distance & 0xFF)); NeutralEndian.Write1(data, (byte)(length - 9)); } } else { Push(bitStream, true, destination, data); NeutralEndian.Write1(data, buffer[pos + node_index]); } } Push(bitStream, false, destination, data); Push(bitStream, true, destination, data); NeutralEndian.Write1(data, 0xF0); NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }
internal static void Encode(Stream source, Stream destination) { int size_bytes = (int)(source.Length - source.Position); byte[] buffer_bytes = new byte[size_bytes + (size_bytes & 1)]; source.Read(buffer_bytes, 0, size_bytes); int size = (size_bytes + 1) / 2; ushort[] buffer = new ushort[size]; for (int i = 0; i < size; ++i) { buffer[i] = (ushort)((buffer_bytes[i * 2] << 8) | buffer_bytes[(i * 2) + 1]); } /* * Here we create and populate the "LZSS graph": * * Each value in the uncompressed file forms a node in this graph. * The various edges between these nodes represent LZSS matches. * * Using a shortest-path algorithm, these edges can be used to * find the optimal combination of matches needed to produce the * smallest possible file. * * The outputted array only contains one edge per node: the optimal * one. This means, in order to produce the smallest file, you just * have to traverse the graph from one edge to the next, encoding * each match as you go along. */ LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[size + 1]; // Initialise the array node_meta_array[0].cost = 0; for (int i = 1; i < size + 1; ++i) { node_meta_array[i].cost = int.MaxValue; } // Find matches for (int i = 0; i < size; ++i) { int max_read_ahead = Math.Min(0x100, size - i); int max_read_behind = Math.Max(0, i - 0x100); // Search for dictionary matches for (int j = i; j-- > max_read_behind;) { for (int k = 0; k < max_read_ahead; ++k) { if (buffer[i + k] == buffer[j + k]) { int distance = i - j; int length = k + 1; // Update this node's optimal edge if this one is better if (node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = j; } } else { break; } } } // Do literal match // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster) if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 16) { node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + 1].previous_node_index = i; node_meta_array[i + 1].match_length = 0; } } // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa node_meta_array[0].previous_node_index = int.MaxValue; node_meta_array[size].next_node_index = int.MaxValue; for (int node_index = size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index) { node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index; } /* * LZSS graph complete */ UInt16BE_NE_H_OutputBitStream bitStream = new UInt16BE_NE_H_OutputBitStream(destination); MemoryStream data = new MemoryStream(); for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index) { int next_index = node_meta_array[node_index].next_node_index; int length = node_meta_array[next_index].match_length; int distance = next_index - node_meta_array[next_index].match_length - node_meta_array[next_index].match_offset; if (length != 0) { // Compressed Push(bitStream, true, destination, data); NeutralEndian.Write1(data, (byte)-distance); NeutralEndian.Write1(data, (byte)(length - 1)); } else { // Uncompressed Push(bitStream, false, destination, data); BigEndian.Write2(data, buffer[node_index]); } } Push(bitStream, true, destination, data); NeutralEndian.Write1(data, 0); NeutralEndian.Write1(data, 0); bitStream.Flush(true); byte[] bytes = data.ToArray(); destination.Write(bytes, 0, bytes.Length); }