Ejemplo n.º 1
0
        private static void DecodeInternal(Stream source, Stream destination, ref long decompressedBytes)
        {
            UInt8_NE_H_InputBitStream bitStream = new UInt8_NE_H_InputBitStream(source);

            for (; ;)
            {
                if (bitStream.Pop())
                {
                    NeutralEndian.Write1(destination, NeutralEndian.Read1(source));
                    ++decompressedBytes;
                }
                else
                {
                    long count  = 0;
                    long offset = 0;

                    if (bitStream.Pop())
                    {
                        byte high = NeutralEndian.Read1(source);
                        byte low  = NeutralEndian.Read1(source);
                        count = high & 0x07;
                        if (count == 0)
                        {
                            count = NeutralEndian.Read1(source);
                            if (count == 0)
                            {
                                break;
                            }

                            count += 9;
                        }
                        else
                        {
                            count = 10 - count;
                        }

                        offset = ~0x1FFFL | ((0xF8 & high) << 5) | low;
                    }
                    else
                    {
                        offset  = NeutralEndian.Read1(source);
                        offset |= ~0xFFL;
                        byte low  = Convert.ToByte(bitStream.Pop());
                        byte high = Convert.ToByte(bitStream.Pop());
                        count = (low << 1 | high) + 2;
                    }

                    for (long i = 0; i < count; i++)
                    {
                        long writePosition = destination.Position;
                        destination.Seek(writePosition + offset, SeekOrigin.Begin);
                        byte b = NeutralEndian.Read1(destination);
                        destination.Seek(writePosition, SeekOrigin.Begin);
                        NeutralEndian.Write1(destination, b);
                    }

                    decompressedBytes += count;
                }
            }
        }
Ejemplo n.º 2
0
        public override bool Push(bool bit)
        {
            this.byteBuffer |= (byte)(Convert.ToByte(bit) << this.waitingBits);
            if (++this.waitingBits >= 8)
            {
                NeutralEndian.Write1(this.stream, this.byteBuffer);
                this.waitingBits = 0;
                this.byteBuffer  = 0;
                return(true);
            }

            return(false);
        }
Ejemplo n.º 3
0
        public override bool Flush(bool unchanged)
        {
            if (this.waitingBits != 0)
            {
                if (!unchanged)
                {
                    this.byteBuffer <<= 8 - this.waitingBits;
                }

                NeutralEndian.Write1(this.stream, this.byteBuffer);
                this.waitingBits = 0;
                return(true);
            }

            return(false);
        }
        public override bool Write(byte data, int size)
        {
            if (this.waitingBits + size >= 8)
            {
                int delta = 8 - this.waitingBits;
                this.waitingBits = (this.waitingBits + size) % 8;
                NeutralEndian.Write1(this.stream, (byte)((this.byteBuffer << delta) | (data >> this.waitingBits)));
                this.byteBuffer = data;
                return(true);
            }

            this.byteBuffer <<= size;
            this.byteBuffer  |= data;
            this.waitingBits += size;
            return(false);
        }
Ejemplo n.º 5
0
        public override bool Push(bool bit)
        {
            bool flushed = false;

            if (this.waitingBits >= 8)
            {
                NeutralEndian.Write1(this.stream, this.byteBuffer);
                this.waitingBits = 0;
                this.byteBuffer  = 0;
                flushed          = true;
            }

            if (bit)
            {
                this.byteBuffer |= (byte)(1 << this.waitingBits);
            }

            ++this.waitingBits;

            return(flushed);
        }
Ejemplo n.º 6
0
        private static void EncodeInternal(Stream destination, byte[] buffer, int pos, int size)
        {
            /*
             * Here we create and populate the "LZSS graph":
             *
             * Each value in the uncompressed file forms a node in this graph.
             * The various edges between these nodes represent LZSS matches.
             *
             * Using a shortest-path algorithm, these edges can be used to
             * find the optimal combination of matches needed to produce the
             * smallest possible file.
             *
             * The outputted array only contains one edge per node: the optimal
             * one. This means, in order to produce the smallest file, you just
             * have to traverse the graph from one edge to the next, encoding
             * each match as you go along.
             */

            LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[size + 1];

            // Initialise the array
            node_meta_array[0].cost = 0;
            for (int i = 1; i < size + 1; ++i)
            {
                node_meta_array[i].cost = int.MaxValue;
            }

            // Find matches
            for (int i = 0; i < size; ++i)
            {
                int max_read_ahead  = Math.Min(0x100 + 8, size - i);
                int max_read_behind = Math.Max(0, i - 0x2000);

                // Search for dictionary matches
                for (int j = i; j-- > max_read_behind;)
                {
                    for (int k = 0; k < max_read_ahead; ++k)
                    {
                        if (buffer[pos + i + k] == buffer[pos + j + k])
                        {
                            int distance = i - j;
                            int length   = k + 1;

                            // Get the cost of the match (or bail if it can't be compressed)
                            int cost;
                            if (length >= 2 && length <= 5 && distance <= 256)
                            {
                                cost = 2 + 2 + 8;   // Descriptor bits, length bits, offset byte
                            }
                            else if (length >= 3 && length <= 9)
                            {
                                cost = 2 + 16;      // Descriptor bits, offset/length bytes
                            }
                            else if (length >= 10)
                            {
                                cost = 2 + 16 + 8;  // Descriptor bits, offset bytes, length byte
                            }
                            else
                            {
                                continue;           // In the event a match cannot be compressed
                            }
                            // Update this node's optimal edge if this one is better
                            if (node_meta_array[i + k + 1].cost > node_meta_array[i].cost + cost)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + cost;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = j;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Do literal match
                // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster)
                if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8)
                {
                    node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8;
                    node_meta_array[i + 1].previous_node_index = i;
                    node_meta_array[i + 1].match_length        = 0;
                }
            }

            // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa
            node_meta_array[0].previous_node_index = int.MaxValue;
            node_meta_array[size].next_node_index  = int.MaxValue;
            for (int node_index = size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index)
            {
                node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index;
            }

            /*
             * LZSS graph complete
             */

            UInt8_NE_H_OutputBitStream bitStream = new UInt8_NE_H_OutputBitStream(destination);
            MemoryStream data = new MemoryStream();

            for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index)
            {
                int next_index = node_meta_array[node_index].next_node_index;

                int length   = node_meta_array[next_index].match_length;
                int distance = next_index - node_meta_array[next_index].match_length - node_meta_array[next_index].match_offset;

                if (length != 0)
                {
                    if (length >= 2 && length <= 5 && distance <= 256)
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, false, destination, data);
                        NeutralEndian.Write1(data, (byte)-distance);
                        Push(bitStream, ((length - 2) & 2) != 0, destination, data);
                        Push(bitStream, ((length - 2) & 1) != 0, destination, data);
                    }
                    else if (length >= 3 && length <= 9)
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, (byte)(((-distance >> (8 - 3)) & 0xF8) | ((10 - length) & 7)));
                        NeutralEndian.Write1(data, (byte)(-distance & 0xFF));
                    }
                    else //if (length >= 3)
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, (byte)((-distance >> (8 - 3)) & 0xF8));
                        NeutralEndian.Write1(data, (byte)(-distance & 0xFF));
                        NeutralEndian.Write1(data, (byte)(length - 9));
                    }
                }
                else
                {
                    Push(bitStream, true, destination, data);
                    NeutralEndian.Write1(data, buffer[pos + node_index]);
                }
            }

            Push(bitStream, false, destination, data);
            Push(bitStream, true, destination, data);

            NeutralEndian.Write1(data, 0xF0);
            NeutralEndian.Write1(data, 0);
            NeutralEndian.Write1(data, 0);
            bitStream.Flush(true);

            byte[] bytes = data.ToArray();
            destination.Write(bytes, 0, bytes.Length);
        }
Ejemplo n.º 7
0
        private static void EncodeInternal(Stream destination, byte[] buffer, long pos, long slidingWindow, long recLength, long size)
        {
            UInt16LEOutputBitStream bitStream = new UInt16LEOutputBitStream(destination);
            MemoryStream            data      = new MemoryStream();

            if (size > 0)
            {
                long bPointer = 1, iOffset = 0;
                bitStream.Push(true);
                NeutralEndian.Write1(data, buffer[pos]);

                while (bPointer < size)
                {
                    long iCount = Math.Min(recLength, size - bPointer);
                    long iMax   = Math.Max(bPointer - slidingWindow, 0);
                    long k      = 1;
                    long i      = bPointer - 1;

                    do
                    {
                        long j = 0;
                        while (buffer[pos + i + j] == buffer[pos + bPointer + j])
                        {
                            if (++j >= iCount)
                            {
                                break;
                            }
                        }

                        if (j > k)
                        {
                            k       = j;
                            iOffset = i;
                        }
                    } while (i-- > iMax);

                    iCount = k;

                    if (iCount == 1)
                    {
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, buffer[pos + bPointer]);
                    }
                    else if (iCount == 2 && bPointer - iOffset > 256)
                    {
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, buffer[pos + bPointer]);
                        --iCount;
                    }
                    else if (iCount < 6 && bPointer - iOffset <= 256)
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, false, destination, data);
                        Push(bitStream, (((iCount - 2) >> 1) & 1) != 0, destination, data);
                        Push(bitStream, ((iCount - 2) & 1) != 0, destination, data);
                        NeutralEndian.Write1(data, (byte)(~(bPointer - iOffset - 1)));
                    }
                    else
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, true, destination, data);

                        long   off  = bPointer - iOffset - 1;
                        ushort info = (ushort)(~((off << 8) | (off >> 5)) & 0xFFF8);

                        if (iCount < 10) // iCount - 2 < 8
                        {
                            info |= (ushort)(iCount - 2);
                            BigEndian.Write2(data, info);
                        }
                        else
                        {
                            BigEndian.Write2(data, info);
                            NeutralEndian.Write1(data, (byte)(iCount - 1));
                        }
                    }

                    bPointer += iCount;
                }
            }

            Push(bitStream, false, destination, data);
            Push(bitStream, true, destination, data);

            // If the bit stream was just flushed, write an empty bit stream that will be read just before the end-of-data
            // sequence below.
            if (!bitStream.HasWaitingBits)
            {
                NeutralEndian.Write1(data, 0);
                NeutralEndian.Write1(data, 0);
            }

            NeutralEndian.Write1(data, 0);
            NeutralEndian.Write1(data, 0xF0);
            NeutralEndian.Write1(data, 0);
            bitStream.Flush(true);

            byte[] bytes = data.ToArray();
            destination.Write(bytes, 0, bytes.Length);
        }
Ejemplo n.º 8
0
        private static void Encode(Stream input, Stream output, bool with_size)
        {
            int input_size = (int)(input.Length - input.Position);

            byte[] input_buffer = new byte[input_size];
            input.Read(input_buffer, 0, input_size);

            long outputInitialPosition = output.Position;

            if (with_size)
            {
                output.Seek(2, SeekOrigin.Current);
            }

            /*
             * Here we create and populate the "LZSS graph":
             *
             * Each value in the uncompressed file forms a node in this graph.
             * The various edges between these nodes represent LZSS matches.
             *
             * Using a shortest-path algorithm, these edges can be used to
             * find the optimal combination of matches needed to produce the
             * smallest possible file.
             *
             * The outputted array only contains one edge per node: the optimal
             * one. This means, in order to produce the smallest file, you just
             * have to traverse the graph from one edge to the next, encoding
             * each match as you go along.
             */

            LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[input_size + 1];

            // Initialise the array
            node_meta_array[0].cost = 0;
            for (int i = 1; i < input_size + 1; ++i)
            {
                node_meta_array[i].cost = int.MaxValue;
            }

            // Find matches
            for (int i = 0; i < input_size; ++i)
            {
                int max_read_ahead  = Math.Min(0xF + 3, input_size - i);
                int max_read_behind = Math.Max(0, i - 0x1000);

                // Search for zero-fill matches
                if (i < 0x1000)
                {
                    for (int k = 0; k < 0xF + 3; ++k)
                    {
                        if (input_buffer[i + k] == 0)
                        {
                            int length = k + 1;

                            // Update this node's optimal edge if this one is better
                            if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = 0xFFF;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Search for dictionary matches
                for (int j = i; j-- > max_read_behind;)
                {
                    for (int k = 0; k < max_read_ahead; ++k)
                    {
                        if (input_buffer[i + k] == input_buffer[j + k])
                        {
                            int distance = i - j;
                            int length   = k + 1;

                            // Update this node's optimal edge if this one is better
                            if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = j;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Do literal match
                // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster)
                if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8)
                {
                    node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8;
                    node_meta_array[i + 1].previous_node_index = i;
                    node_meta_array[i + 1].match_length        = 0;
                }
            }

            // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa
            node_meta_array[0].previous_node_index      = int.MaxValue;
            node_meta_array[input_size].next_node_index = int.MaxValue;
            for (int node_index = input_size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index)
            {
                node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index;
            }

            /*
             * LZSS graph complete
             */

            UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output);
            MemoryStream data = new MemoryStream();

            for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index)
            {
                int next_index = node_meta_array[node_index].next_node_index;

                if (node_meta_array[next_index].match_length != 0)
                {
                    // Compressed
                    Push(bitStream, false, output, data);
                    int match_offset_adjusted = node_meta_array[next_index].match_offset - 0x12;   // I don't think there's any reason for this, the format's just stupid
                    NeutralEndian.Write1(data, (byte)(match_offset_adjusted & 0xFF));
                    NeutralEndian.Write1(data, (byte)(((match_offset_adjusted & 0xF00) >> 4) | ((node_meta_array[next_index].match_length - 3) & 0x0F)));
                }
                else
                {
                    // Uncompressed
                    Push(bitStream, true, output, data);
                    NeutralEndian.Write1(data, input_buffer[node_index]);
                }
            }

            // Write remaining data (normally we don't flush until we have a full descriptor byte)
            bitStream.Flush(true);
            byte[] dataArray = data.ToArray();
            output.Write(dataArray, 0, dataArray.Length);

            if (with_size)
            {
                ushort size = (ushort)(outputInitialPosition - output.Position - 2);
                output.Seek(outputInitialPosition, SeekOrigin.Begin);
                LittleEndian.Write2(output, size);
            }
        }
Ejemplo n.º 9
0
        private static void EncodeInternal(Stream destination, byte[] buffer, long slidingWindow, long recLength, long size)
        {
            UInt16BE_NE_H_OutputBitStream bitStream = new UInt16BE_NE_H_OutputBitStream(destination);
            MemoryStream data = new MemoryStream();

            if (size > 0)
            {
                long bPointer = 2, longestMatchOffset = 0;
                bitStream.Push(false);
                NeutralEndian.Write1(data, buffer[0]);
                NeutralEndian.Write1(data, buffer[1]);

                while (bPointer < size)
                {
                    long matchMax      = Math.Min(recLength, size - bPointer);
                    long backSearchMax = Math.Max(bPointer - slidingWindow, 0);
                    long longestMatch  = 2;
                    long backSearch    = bPointer;

                    do
                    {
                        backSearch -= 2;
                        long currentCount = 0;
                        while (buffer[backSearch + currentCount] == buffer[bPointer + currentCount] && buffer[backSearch + currentCount + 1] == buffer[bPointer + currentCount + 1])
                        {
                            currentCount += 2;
                            if (currentCount >= matchMax)
                            {
                                // Match is as big as the look-forward buffer (or file) will let it be
                                break;
                            }
                        }

                        if (currentCount > longestMatch)
                        {
                            // New 'best' match
                            longestMatch       = currentCount;
                            longestMatchOffset = backSearch;
                        }
                    } while (backSearch > backSearchMax);               // Repeat for as far back as search buffer will let us

                    long iCount  = longestMatch / 2;                    // Comper counts in words (16 bits)
                    long iOffset = (longestMatchOffset - bPointer) / 2; // Comper's offsets count in words (16-bits)

                    if (iCount == 1)
                    {
                        // Symbolwise match
                        Push(bitStream, false, destination, data);
                        NeutralEndian.Write1(data, buffer[bPointer]);
                        NeutralEndian.Write1(data, buffer[bPointer + 1]);
                    }
                    else
                    {
                        // Dictionary match
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, (byte)(iOffset));
                        NeutralEndian.Write1(data, (byte)(iCount - 1));
                    }

                    bPointer += iCount * 2;   // iCount counts in words (16-bits), so we correct it to bytes (8-bits) here
                }
            }

            Push(bitStream, true, destination, data);

            NeutralEndian.Write1(data, 0);
            NeutralEndian.Write1(data, 0);
            bitStream.Flush(true);

            byte[] bytes = data.ToArray();
            destination.Write(bytes, 0, bytes.Length);
        }
Ejemplo n.º 10
0
        private static void Encode(Stream input, Stream output, Endianness endianness)
        {
            Action <Stream, ushort>  write2;
            OutputBitStream <ushort> bitStream;

            if (endianness == Endianness.BigEndian)
            {
                write2    = Write2BE;
                bitStream = new UInt16BE_E_L_OutputBitStream(output);
            }
            else
            {
                write2    = Write2LE;
                bitStream = new UInt16LE_E_L_OutputBitStream(output);
            }

            // To unpack source into 2-byte words.
            ushort[] words = new ushort[(input.Length - input.Position) / 2];
            if (words.Length == 0)
            {
                throw new CompressionException(Properties.Resources.EmptySource);
            }

            // Frequency map.
            SortedList <ushort, long> counts = new SortedList <ushort, long>();

            // Presence map.
            HashSet <ushort> elements = new HashSet <ushort>();

            // Unpack source into array. Along the way, build frequency and presence maps.
            ushort maskValue = 0;

            {
                byte[] buffer = new byte[2];
                int    i = 0, bytesRead;
                while ((bytesRead = input.Read(buffer, 0, 2)) == 2)
                {
                    ushort v = (ushort)(buffer[0] << 8 | buffer[1]);
                    maskValue |= v;
                    long count;
                    counts.TryGetValue(v, out count);
                    counts[v] = count + 1;
                    elements.Add(v);
                    words[i++] = v;
                }
            }

            var  writeBitfield = GetBitfieldWriter((byte)(maskValue >> 11));
            byte packetLength  = (byte)(Log2((ushort)(maskValue & 0x7ff)) + 1);

            // Find the most common 2-byte value.
            ushort commonValue = FindMostFrequentWord(counts);

            // Find incrementing (not necessarily contiguous) runs.
            // The original algorithm does this for all 65536 2-byte words, while
            // this version only checks the 2-byte words actually in the file.
            SortedList <ushort, long> runs = new SortedList <ushort, long>();

            foreach (ushort element in elements)
            {
                ushort next      = element;
                long   runLength = 0;
                foreach (ushort word in words)
                {
                    if (word == next)
                    {
                        ++next;
                        ++runLength;
                    }
                }

                runs[element] = runLength;
            }

            // Find the starting 2-byte value with the longest incrementing run.
            ushort incrementingValue = FindMostFrequentWord(runs);

            // Output header.
            NeutralEndian.Write1(output, packetLength);
            NeutralEndian.Write1(output, (byte)(maskValue >> 11));
            write2(output, incrementingValue);
            write2(output, commonValue);

            // Output compressed data.
            List <ushort> buf = new List <ushort>();
            int           pos = 0;

            while (pos < words.Length)
            {
                ushort v = words[pos];
                if (v == incrementingValue)
                {
                    FlushBuffer(buf, bitStream, writeBitfield, packetLength);
                    ushort next  = (ushort)(v + 1);
                    ushort count = 0;
                    for (int i = pos + 1; i < words.Length && count < 0xf; i++)
                    {
                        if (next != words[i])
                        {
                            break;
                        }

                        ++next;
                        ++count;
                    }

                    bitStream.Write((ushort)(0x00 | count), 6);
                    incrementingValue = next;
                    pos += count;
                }
                else if (v == commonValue)
                {
                    FlushBuffer(buf, bitStream, writeBitfield, packetLength);
                    ushort count = 0;
                    for (int i = pos + 1; i < words.Length && count < 0xf; i++)
                    {
                        if (v != words[i])
                        {
                            break;
                        }

                        ++count;
                    }

                    bitStream.Write((ushort)(0x10 | count), 6);
                    pos += count;
                }
                else
                {
                    ushort next;
                    int    delta;
                    if (pos + 1 < words.Length &&
                        (next = words[pos + 1]) != incrementingValue &&
                        ((delta = (int)next - (int)v) == -1 || delta == 0 || delta == 1))
                    {
                        FlushBuffer(buf, bitStream, writeBitfield, packetLength);
                        ushort count = 1;
                        next = (ushort)(next + delta);
                        for (int i = pos + 2; i < words.Length && count < 0xf; i++)
                        {
                            if (next != words[i])
                            {
                                break;
                            }

                            // If the word is equal to the incrementing word value, stop this run early so we can use the
                            // incrementing value in the next iteration of the main loop.
                            if (words[i] == incrementingValue)
                            {
                                break;
                            }

                            next = (ushort)(next + delta);
                            ++count;
                        }

                        if (delta == -1)
                        {
                            delta = 2;
                        }

                        delta  |= 4;
                        delta <<= 4;
                        bitStream.Write((ushort)(delta | count), 7);
                        writeBitfield(bitStream, v);
                        bitStream.Write((ushort)(v & 0x7ff), packetLength);
                        pos += count;
                    }
                    else
                    {
                        if (buf.Count >= 0xf)
                        {
                            FlushBuffer(buf, bitStream, writeBitfield, packetLength);
                        }

                        buf.Add(v);
                    }
                }

                ++pos;
            }

            FlushBuffer(buf, bitStream, writeBitfield, packetLength);

            // Terminator
            bitStream.Write(0x7f, 7);
            bitStream.Flush(false);
        }
Ejemplo n.º 11
0
        private static void EncodeInternal(Stream input, Stream output, bool xor, long inputLength)
        {
            var rleSource = new List <NibbleRun>();
            var counts    = new SortedList <NibbleRun, long>();

            using (IEnumerator <byte> unpacked = Unpacked(input))
            {
                // Build RLE nibble runs, RLE-encoding the nibble runs as we go along.
                // Maximum run length is 8, meaning 7 repetitions.
                if (unpacked.MoveNext())
                {
                    NibbleRun current = new NibbleRun(unpacked.Current, 0);
                    while (unpacked.MoveNext())
                    {
                        NibbleRun next = new NibbleRun(unpacked.Current, 0);
                        if (next.Nibble != current.Nibble || current.Count >= 7)
                        {
                            rleSource.Add(current);
                            long count;
                            counts.TryGetValue(current, out count);
                            counts[current] = count + 1;
                            current         = next;
                        }
                        else
                        {
                            ++current.Count;
                        }
                    }
                }
            }

            // We will use the Package-merge algorithm to build the optimal length-limited
            // Huffman code for the current file. To do this, we must map the current
            // problem onto the Coin Collector's problem.
            // Build the basic coin collection.
            var qt = new List <EncodingCodeTreeNode>();

            foreach (var kvp in counts)
            {
                // No point in including anything with weight less than 2, as they
                // would actually increase compressed file size if used.
                if (kvp.Value > 1)
                {
                    qt.Add(new EncodingCodeTreeNode(kvp.Key, kvp.Value));
                }
            }

            qt.Sort();

            // The base coin collection for the length-limited Huffman coding has
            // one coin list per character in length of the limmitation. Each coin list
            // has a constant "face value", and each coin in a list has its own
            // "numismatic value". The "face value" is unimportant in the way the code
            // is structured below; the "numismatic value" of each coin is the number
            // of times the underlying nibble run appears in the source file.

            // This will hold the Huffman code map.
            // NOTE: while the codes that will be written in the header will not be
            // longer than 8 bits, it is possible that a supplementary code map will
            // add "fake" codes that are longer than 8 bits.
            var codeMap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();

            // Size estimate. This is used to build the optimal compressed file.
            long sizeEstimate = long.MaxValue;

            // We will solve the Coin Collector's problem several times, each time
            // ignoring more of the least frequent nibble runs. This allows us to find
            // *the* lowest file size.
            while (qt.Count > 1)
            {
                // Make a copy of the basic coin collection.
                var q0 = new List <EncodingCodeTreeNode>(qt);

                // Ignore the lowest weighted item. Will only affect the next iteration
                // of the loop. If it can be proven that there is a single global
                // minimum (and no local minima for file size), then this could be
                // simplified to a binary search.
                qt.RemoveAt(qt.Count - 1);

                // We now solve the Coin collector's problem using the Package-merge
                // algorithm. The solution goes here.
                var solution = new List <EncodingCodeTreeNode>();

                // This holds the packages from the last iteration.
                var q = new List <EncodingCodeTreeNode>(q0);

                int target = (q0.Count - 1) << 8, idx = 0;
                while (target != 0)
                {
                    // Gets lowest bit set in its proper place:
                    int val = (target & -target), r = 1 << idx;

                    // Is the current denomination equal to the least denomination?
                    if (r == val)
                    {
                        // If yes, take the least valuable node and put it into the solution.
                        solution.Add(q[q.Count - 1]);
                        q.RemoveAt(q.Count - 1);
                        target -= r;
                    }

                    // The coin collection has coins of values 1 to 8; copy from the
                    // original in those cases for the next step.
                    var q1 = new List <EncodingCodeTreeNode>();
                    if (idx < 7)
                    {
                        q1.AddRange(q0);
                    }

                    // Split the current list into pairs and insert the packages into
                    // the next list.
                    while (q.Count > 1)
                    {
                        EncodingCodeTreeNode child1 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        EncodingCodeTreeNode child0 = q[q.Count - 1];
                        q.RemoveAt(q.Count - 1);
                        q1.Add(new EncodingCodeTreeNode(child0, child1));
                    }

                    idx++;
                    q.Clear();
                    q.AddRange(q1);
                    q.Sort();
                }

                // The Coin Collector's problem has been solved. Now it is time to
                // map the solution back into the length-limited Huffman coding problem.

                // To do that, we iterate through the solution and count how many times
                // each nibble run has been used (remember that the coin collection had
                // had multiple coins associated with each nibble run) -- this number
                // is the optimal bit length for the nibble run.
                var baseSizeMap = new SortedList <NibbleRun, long>();
                foreach (var item in solution)
                {
                    item.Traverse(baseSizeMap);
                }

                // With the length-limited Huffman coding problem solved, it is now time
                // to build the code table. As input, we have a map associating a nibble
                // run to its optimal encoded bit length. We will build the codes using
                // the canonical Huffman code.

                // To do that, we must invert the size map so we can sort it by code size.
                var sizeOnlyMap = new MultiSet <long>();

                // This map contains lots more information, and is used to associate
                // the nibble run with its optimal code. It is sorted by code size,
                // then by frequency of the nibble run, then by the nibble run.
                var sizeMap = new MultiSet <SizeMapItem>();

                foreach (var item in baseSizeMap)
                {
                    long size = item.Value;
                    sizeOnlyMap.Add(size);
                    sizeMap.Add(new SizeMapItem(size, counts[item.Key], item.Key));
                }

                // We now build the canonical Huffman code table.
                // "baseCode" is the code for the first nibble run with a given bit length.
                // "carry" is how many nibble runs were demoted to a higher bit length
                // at an earlier step.
                // "cnt" is how many nibble runs have a given bit length.
                long baseCode = 0;
                long carry = 0, cnt;

                // This list contains the codes sorted by size.
                var codes = new List <KeyValuePair <long, byte> >();
                for (byte j = 1; j <= 8; j++)
                {
                    // How many nibble runs have the desired bit length.
                    cnt   = sizeOnlyMap.Count(j) + carry;
                    carry = 0;

                    for (int k = 0; k < cnt; k++)
                    {
                        // Sequential binary numbers for codes.
                        long code = baseCode + k;
                        long mask = (1L << j) - 1;

                        // We do not want any codes composed solely of 1's or which
                        // start with 111111, as that sequence is reserved.
                        if ((j <= 6 && code == mask) ||
                            (j > 6 && code == (mask & ~((1L << (j - 6)) - 1))))
                        {
                            // We must demote this many nibble runs to a longer code.
                            carry = cnt - k;
                            cnt   = k;
                            break;
                        }

                        codes.Add(new KeyValuePair <long, byte>(code, j));
                    }

                    // This is the beginning bit pattern for the next bit length.
                    baseCode = (baseCode + cnt) << 1;
                }

                // With the canonical table build, the codemap can finally be built.
                var tempCodemap = new SortedList <NibbleRun, KeyValuePair <long, byte> >();
                using (IEnumerator <SizeMapItem> enumerator = sizeMap.GetEnumerator())
                {
                    int pos = 0;
                    while (enumerator.MoveNext() && pos < codes.Count)
                    {
                        tempCodemap[enumerator.Current.NibbleRun] = codes[pos];
                        ++pos;
                    }
                }

                // We now compute the final file size for this code table.
                // 2 bytes at the start of the file, plus 1 byte at the end of the
                // code table.
                long tempsize_est = 3 * 8;
                byte last         = 0xff;

                // Start with any nibble runs with their own code.
                foreach (var item in tempCodemap)
                {
                    // Each new nibble needs an extra byte.
                    if (item.Key.Nibble != last)
                    {
                        tempsize_est += 8;
                        last          = item.Key.Nibble;
                    }

                    // 2 bytes per nibble run in the table.
                    tempsize_est += 2 * 8;

                    // How many bits this nibble run uses in the file.
                    tempsize_est += counts[item.Key] * item.Value.Value;
                }

                // Supplementary code map for the nibble runs that can be broken up into
                // shorter nibble runs with a smaller bit length than inlining.
                var supCodemap = new Dictionary <NibbleRun, KeyValuePair <long, byte> >();

                // Now we will compute the size requirements for inline nibble runs.
                foreach (var item in counts)
                {
                    if (!tempCodemap.ContainsKey(item.Key))
                    {
                        // Nibble run does not have its own code. We need to find out if
                        // we can break it up into smaller nibble runs with total code
                        // size less than 13 bits or if we need to inline it (13 bits).
                        if (item.Key.Count == 0)
                        {
                            // If this is a nibble run with zero repeats, we can't break
                            // it up into smaller runs, so we inline it.
                            tempsize_est += (6 + 7) * item.Value;
                        }
                        else if (item.Key.Count == 1)
                        {
                            // We stand a chance of breaking the nibble run.

                            // This case is rather trivial, so we hard-code it.
                            // We can break this up only as 2 consecutive runs of a nibble
                            // run with count == 0.
                            KeyValuePair <long, byte> value;
                            if (!tempCodemap.TryGetValue(new NibbleRun(item.Key.Nibble, 0), out value) || value.Value > 6)
                            {
                                // The smaller nibble run either does not have its own code
                                // or it results in a longer bit code when doubled up than
                                // would result from inlining the run. In either case, we
                                // inline the nibble run.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                            else
                            {
                                // The smaller nibble run has a small enough code that it is
                                // more efficient to use it twice than to inline our nibble
                                // run. So we do exactly that, by adding a (temporary) entry
                                // in the supplementary codemap, which will later be merged
                                // into the main codemap.
                                long code = value.Key;
                                byte len  = value.Value;
                                code                 = (code << len) | code;
                                len                <<= 1;
                                tempsize_est        += len * item.Value;
                                supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | len));
                            }
                        }
                        else
                        {
                            // We stand a chance of breaking the nibble run.
                            byte n = item.Key.Count;

                            // This is a linear optimization problem subjected to 2
                            // constraints. If the number of repeats of the current nibble
                            // run is N, then we have N dimensions.
                            // Reference to table of linear coefficients. This table has
                            // N columns for each line.
                            byte[,] myLinearCoeffs = linearCoeffs[n - 2];
                            int rows = myLinearCoeffs.GetLength(0);

                            byte nibble = item.Key.Nibble;

                            // List containing the code length of each nibble run, or 13
                            // if the nibble run is not in the codemap.
                            var runlen = new List <long>();

                            // Initialize the list.
                            for (byte i = 0; i < n; i++)
                            {
                                // Is this run in the codemap?
                                KeyValuePair <long, byte> value;
                                if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                {
                                    // It is.
                                    // Put code length in the vector.
                                    runlen.Add(value.Value);
                                }
                                else
                                {
                                    // It is not.
                                    // Put inline length in the vector.
                                    runlen.Add(6 + 7);
                                }
                            }

                            // Now go through the linear coefficient table and tally up
                            // the total code size, looking for the best case.
                            // The best size is initialized to be the inlined case.
                            long bestSize = 6 + 7;
                            int  bestLine = -1;
                            for (int i = 0; i < rows; i++)
                            {
                                // Tally up the code length for this coefficient line.
                                long len = 0;
                                for (byte j = 0; j < n; j++)
                                {
                                    byte c = myLinearCoeffs[i, j];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    len += c * runlen[j];
                                }

                                // Is the length better than the best yet?
                                if (len < bestSize)
                                {
                                    // If yes, store it as the best.
                                    bestSize = len;
                                    bestLine = i;
                                }
                            }

                            // Have we found a better code than inlining?
                            if (bestLine >= 0)
                            {
                                // We have; use it. To do so, we have to build the code
                                // and add it to the supplementary code table.
                                long code = 0, len = 0;
                                for (byte i = 0; i < n; i++)
                                {
                                    byte c = myLinearCoeffs[bestLine, i];
                                    if (c == 0)
                                    {
                                        continue;
                                    }

                                    // Is this run in the codemap?
                                    KeyValuePair <long, byte> value;
                                    if (tempCodemap.TryGetValue(new NibbleRun(nibble, i), out value))
                                    {
                                        // It is; it MUST be, as the other case is impossible
                                        // by construction.
                                        for (int j = 0; j < c; j++)
                                        {
                                            len   += value.Value;
                                            code <<= value.Value;
                                            code  |= value.Key;
                                        }
                                    }
                                }

                                if (len != bestSize)
                                {
                                    // ERROR! DANGER! THIS IS IMPOSSIBLE!
                                    // But just in case...
                                    tempsize_est += (6 + 7) * item.Value;
                                }
                                else
                                {
                                    // By construction, best_size is at most 12.
                                    byte c = (byte)bestSize;

                                    // Add it to supplementary code map.
                                    supCodemap[item.Key] = new KeyValuePair <long, byte>(code, (byte)(0x80 | c));
                                    tempsize_est        += bestSize * item.Value;
                                }
                            }
                            else
                            {
                                // No, we will have to inline it.
                                tempsize_est += (6 + 7) * item.Value;
                            }
                        }
                    }
                }

                // Merge the supplementary code map into the temporary code map.
                foreach (var item in supCodemap)
                {
                    tempCodemap[item.Key] = item.Value;
                }

                // Round up to a full byte.
                tempsize_est = (tempsize_est + 7) & ~7;

                // Is this iteration better than the best?
                if (tempsize_est < sizeEstimate)
                {
                    // If yes, save the codemap and file size.
                    codeMap      = tempCodemap;
                    sizeEstimate = tempsize_est;
                }
            }

            // We now have a prefix-free code map associating the RLE-encoded nibble
            // runs with their code. Now we write the file.
            // Write header.
            BigEndian.Write2(output, (ushort)((Convert.ToInt32(xor) << 15) | ((int)inputLength >> 5)));
            byte lastNibble = 0xff;

            foreach (var item in codeMap)
            {
                byte length = item.Value.Value;

                // length with bit 7 set is a special device for further reducing file size, and
                // should NOT be on the table.
                if ((length & 0x80) != 0)
                {
                    continue;
                }

                NibbleRun nibbleRun = item.Key;
                if (nibbleRun.Nibble != lastNibble)
                {
                    // 0x80 marks byte as setting a new nibble.
                    NeutralEndian.Write1(output, (byte)(0x80 | nibbleRun.Nibble));
                    lastNibble = nibbleRun.Nibble;
                }

                long code = item.Value.Key;
                NeutralEndian.Write1(output, (byte)((nibbleRun.Count << 4) | length));
                NeutralEndian.Write1(output, (byte)code);
            }

            // Mark end of header.
            NeutralEndian.Write1(output, 0xff);

            // Write the encoded bitstream.
            UInt8_E_L_OutputBitStream bitStream = new UInt8_E_L_OutputBitStream(output);

            // The RLE-encoded source makes for a far faster encode as we simply
            // use the nibble runs as an index into the map, meaning a quick binary
            // search gives us the code to use (if in the map) or tells us that we
            // need to use inline RLE.
            foreach (var nibbleRun in rleSource)
            {
                KeyValuePair <long, byte> value;
                if (codeMap.TryGetValue(nibbleRun, out value))
                {
                    long code = value.Key;
                    byte len  = value.Value;

                    // len with bit 7 set is a device to bypass the code table at the
                    // start of the file. We need to clear the bit here before writing
                    // the code to the file.
                    len &= 0x7f;

                    // We can have codes in the 9-12 range due to the break up of large
                    // inlined runs into smaller non-inlined runs. Deal with those high
                    // bits first, if needed.
                    if (len > 8)
                    {
                        bitStream.Write((byte)((code >> 8) & 0xff), len - 8);
                        len = 8;
                    }

                    bitStream.Write((byte)(code & 0xff), len);
                }
                else
                {
                    bitStream.Write(0x3f, 6);
                    bitStream.Write(nibbleRun.Count, 3);
                    bitStream.Write(nibbleRun.Nibble, 4);
                }
            }

            // Fill remainder of last byte with zeroes and write if needed.
            bitStream.Flush(false);
        }
Ejemplo n.º 12
0
        private static void EncodeInternal(Stream destination, byte[] buffer, long pos, long slidingWindow, long recLength, long size)
        {
            UInt8_NE_H_OutputBitStream bitStream = new UInt8_NE_H_OutputBitStream(destination);
            MemoryStream data = new MemoryStream();

            if (size > 0)
            {
                long bPointer = 1, iOffset = 0;
                bitStream.Push(true);
                NeutralEndian.Write1(data, buffer[pos]);

                while (bPointer < size)
                {
                    long iCount = Math.Min(recLength, size - bPointer);
                    long iMax   = Math.Max(bPointer - slidingWindow, 0);
                    long k      = 1;
                    long i      = bPointer - 1;

                    do
                    {
                        long j = 0;
                        while (buffer[pos + i + j] == buffer[pos + bPointer + j])
                        {
                            if (++j >= iCount)
                            {
                                break;
                            }
                        }

                        if (j > k)
                        {
                            k       = j;
                            iOffset = i;
                        }
                    } while (i-- > iMax);

                    iCount = k;

                    if (iCount == 1)
                    {
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, buffer[pos + bPointer]);
                    }
                    else if (iCount == 2 && bPointer - iOffset > 256)
                    {
                        Push(bitStream, true, destination, data);
                        NeutralEndian.Write1(data, buffer[pos + bPointer]);
                        --iCount;
                    }
                    else if (iCount < 6 && bPointer - iOffset <= 256)
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, false, destination, data);
                        NeutralEndian.Write1(data, (byte)(~(bPointer - iOffset - 1)));
                        Push(bitStream, (((iCount - 2) >> 1) & 1) != 0, destination, data);
                        Push(bitStream, ((iCount - 2) & 1) != 0, destination, data);
                    }
                    else
                    {
                        Push(bitStream, false, destination, data);
                        Push(bitStream, true, destination, data);

                        long   off  = bPointer - iOffset - 1;
                        ushort info = (ushort)(~((off << 8) | (off >> 5)) & 0xFFF8);

                        if (iCount < 10) // iCount - 2 < 8
                        {
                            info |= (ushort)(10 - iCount);
                            LittleEndian.Write2(data, info);
                        }
                        else
                        {
                            LittleEndian.Write2(data, info);
                            NeutralEndian.Write1(data, (byte)(iCount - 9));
                        }
                    }

                    bPointer += iCount;
                }
            }

            Push(bitStream, false, destination, data);
            Push(bitStream, true, destination, data);

            NeutralEndian.Write1(data, 0xF0);
            NeutralEndian.Write1(data, 0);
            NeutralEndian.Write1(data, 0);
            bitStream.Flush(true);

            byte[] bytes = data.ToArray();
            destination.Write(bytes, 0, bytes.Length);
        }
Ejemplo n.º 13
0
        internal static void Encode(Stream source, Stream destination)
        {
            int size_bytes = (int)(source.Length - source.Position);

            byte[] buffer_bytes = new byte[size_bytes + (size_bytes & 1)];
            source.Read(buffer_bytes, 0, size_bytes);

            int size = (size_bytes + 1) / 2;

            ushort[] buffer = new ushort[size];
            for (int i = 0; i < size; ++i)
            {
                buffer[i] = (ushort)((buffer_bytes[i * 2] << 8) | buffer_bytes[(i * 2) + 1]);
            }

            /*
             * Here we create and populate the "LZSS graph":
             *
             * Each value in the uncompressed file forms a node in this graph.
             * The various edges between these nodes represent LZSS matches.
             *
             * Using a shortest-path algorithm, these edges can be used to
             * find the optimal combination of matches needed to produce the
             * smallest possible file.
             *
             * The outputted array only contains one edge per node: the optimal
             * one. This means, in order to produce the smallest file, you just
             * have to traverse the graph from one edge to the next, encoding
             * each match as you go along.
             */

            LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[size + 1];

            // Initialise the array
            node_meta_array[0].cost = 0;
            for (int i = 1; i < size + 1; ++i)
            {
                node_meta_array[i].cost = int.MaxValue;
            }

            // Find matches
            for (int i = 0; i < size; ++i)
            {
                int max_read_ahead  = Math.Min(0x100, size - i);
                int max_read_behind = Math.Max(0, i - 0x100);

                // Search for dictionary matches
                for (int j = i; j-- > max_read_behind;)
                {
                    for (int k = 0; k < max_read_ahead; ++k)
                    {
                        if (buffer[i + k] == buffer[j + k])
                        {
                            int distance = i - j;
                            int length   = k + 1;

                            // Update this node's optimal edge if this one is better
                            if (node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = j;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Do literal match
                // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster)
                if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 16)
                {
                    node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 16;
                    node_meta_array[i + 1].previous_node_index = i;
                    node_meta_array[i + 1].match_length        = 0;
                }
            }

            // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa
            node_meta_array[0].previous_node_index = int.MaxValue;
            node_meta_array[size].next_node_index  = int.MaxValue;
            for (int node_index = size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index)
            {
                node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index;
            }

            /*
             * LZSS graph complete
             */

            UInt16BE_NE_H_OutputBitStream bitStream = new UInt16BE_NE_H_OutputBitStream(destination);
            MemoryStream data = new MemoryStream();

            for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index)
            {
                int next_index = node_meta_array[node_index].next_node_index;

                int length   = node_meta_array[next_index].match_length;
                int distance = next_index - node_meta_array[next_index].match_length - node_meta_array[next_index].match_offset;

                if (length != 0)
                {
                    // Compressed
                    Push(bitStream, true, destination, data);
                    NeutralEndian.Write1(data, (byte)-distance);
                    NeutralEndian.Write1(data, (byte)(length - 1));
                }
                else
                {
                    // Uncompressed
                    Push(bitStream, false, destination, data);
                    BigEndian.Write2(data, buffer[node_index]);
                }
            }

            Push(bitStream, true, destination, data);

            NeutralEndian.Write1(data, 0);
            NeutralEndian.Write1(data, 0);
            bitStream.Flush(true);

            byte[] bytes = data.ToArray();
            destination.Write(bytes, 0, bytes.Length);
        }