Exemple #1
0
        private static void Encode(Stream input, Stream output, bool with_size)
        {
            int input_size = (int)(input.Length - input.Position);

            byte[] input_buffer = new byte[input_size];
            input.Read(input_buffer, 0, input_size);

            long outputInitialPosition = output.Position;

            if (with_size)
            {
                output.Seek(2, SeekOrigin.Current);
            }

            /*
             * Here we create and populate the "LZSS graph":
             *
             * Each value in the uncompressed file forms a node in this graph.
             * The various edges between these nodes represent LZSS matches.
             *
             * Using a shortest-path algorithm, these edges can be used to
             * find the optimal combination of matches needed to produce the
             * smallest possible file.
             *
             * The outputted array only contains one edge per node: the optimal
             * one. This means, in order to produce the smallest file, you just
             * have to traverse the graph from one edge to the next, encoding
             * each match as you go along.
             */

            LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[input_size + 1];

            // Initialise the array
            node_meta_array[0].cost = 0;
            for (int i = 1; i < input_size + 1; ++i)
            {
                node_meta_array[i].cost = int.MaxValue;
            }

            // Find matches
            for (int i = 0; i < input_size; ++i)
            {
                int max_read_ahead  = Math.Min(0xF + 3, input_size - i);
                int max_read_behind = Math.Max(0, i - 0x1000);

                // Search for zero-fill matches
                if (i < 0x1000)
                {
                    for (int k = 0; k < 0xF + 3; ++k)
                    {
                        if (input_buffer[i + k] == 0)
                        {
                            int length = k + 1;

                            // Update this node's optimal edge if this one is better
                            if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = 0xFFF;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Search for dictionary matches
                for (int j = i; j-- > max_read_behind;)
                {
                    for (int k = 0; k < max_read_ahead; ++k)
                    {
                        if (input_buffer[i + k] == input_buffer[j + k])
                        {
                            int distance = i - j;
                            int length   = k + 1;

                            // Update this node's optimal edge if this one is better
                            if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16)
                            {
                                node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16;
                                node_meta_array[i + k + 1].previous_node_index = i;
                                node_meta_array[i + k + 1].match_length        = k + 1;
                                node_meta_array[i + k + 1].match_offset        = j;
                            }
                        }
                        else
                        {
                            break;
                        }
                    }
                }

                // Do literal match
                // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster)
                if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8)
                {
                    node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8;
                    node_meta_array[i + 1].previous_node_index = i;
                    node_meta_array[i + 1].match_length        = 0;
                }
            }

            // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa
            node_meta_array[0].previous_node_index      = int.MaxValue;
            node_meta_array[input_size].next_node_index = int.MaxValue;
            for (int node_index = input_size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index)
            {
                node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index;
            }

            /*
             * LZSS graph complete
             */

            UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output);
            MemoryStream data = new MemoryStream();

            for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index)
            {
                int next_index = node_meta_array[node_index].next_node_index;

                if (node_meta_array[next_index].match_length != 0)
                {
                    // Compressed
                    Push(bitStream, false, output, data);
                    int match_offset_adjusted = node_meta_array[next_index].match_offset - 0x12;   // I don't think there's any reason for this, the format's just stupid
                    NeutralEndian.Write1(data, (byte)(match_offset_adjusted & 0xFF));
                    NeutralEndian.Write1(data, (byte)(((match_offset_adjusted & 0xF00) >> 4) | ((node_meta_array[next_index].match_length - 3) & 0x0F)));
                }
                else
                {
                    // Uncompressed
                    Push(bitStream, true, output, data);
                    NeutralEndian.Write1(data, input_buffer[node_index]);
                }
            }

            // Write remaining data (normally we don't flush until we have a full descriptor byte)
            bitStream.Flush(true);
            byte[] dataArray = data.ToArray();
            output.Write(dataArray, 0, dataArray.Length);

            if (with_size)
            {
                ushort size = (ushort)(outputInitialPosition - output.Position - 2);
                output.Seek(outputInitialPosition, SeekOrigin.Begin);
                LittleEndian.Write2(output, size);
            }
        }
Exemple #2
0
        private static void Encode(Stream input, Stream output, bool with_size)
        {
            long input_size = input.Length - input.Position;

            byte[] input_buffer = new byte[input_size];
            input.Read(input_buffer, 0, (int)input_size);

            long outputInitialPosition = output.Position;

            if (with_size)
            {
                output.Seek(2, SeekOrigin.Current);
            }

            List <byte> data = new List <byte>();
            UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output);

            long input_pointer = 0;

            while (input_pointer < input_size)
            {
                // The maximum recurrence length that can be encoded is 0x12
                // Of course, if the remaining file is smaller, cap to that instead
                long maximum_match_length = Math.Min(input_size - input_pointer, 0x12);
                // The furthest back Saxman can address is 0x1000 bytes
                // Again, if there's less than 0x1000 bytes of data available, then cap at that instead
                long maximum_backsearch = Math.Min(input_pointer, 0x1000);

                // These are our default values for the longest match found
                long longest_match_offset = input_pointer;      // This one doesn't really need initialising, but it does shut up some moronic warnings
                long longest_match_length = 1;

                // First, look for dictionary matches
                for (long backsearch_pointer = input_pointer - 1; backsearch_pointer >= input_pointer - maximum_backsearch; --backsearch_pointer)
                {
                    long match_length = 0;
                    while (input_buffer[backsearch_pointer + match_length] == input_buffer[input_pointer + match_length] && ++match_length < maximum_match_length)
                    {
                        ;
                    }

                    if (match_length > longest_match_length)
                    {
                        longest_match_length = match_length;
                        longest_match_offset = backsearch_pointer;
                    }
                }

                // Then, look for zero-fill matches
                if (input_pointer < 0xFFF)  // Saxman cannot perform zero-fills past the first 0xFFF bytes (it relies on some goofy logic in the decompressor)
                {
                    long match_length = 0;
                    while (input_buffer[input_pointer + match_length] == 0 && ++match_length < maximum_match_length)
                    {
                        ;
                    }

                    if (match_length > longest_match_length)
                    {
                        longest_match_length = match_length;
                        // Saxman detects zero-fills by checking if the dictionary reference offset is somehow
                        // pointing to *after* the decompressed data, so we set it to the highest possible value here
                        longest_match_offset = 0xFFF;
                    }
                }

                // We cannot compress runs shorter than three bytes
                if (longest_match_length < 3)
                {
                    // Uncompressed
                    Push(bitStream, true, output, data);
                    data.Add(input_buffer[input_pointer]);

                    longest_match_length = 1;
                }
                else
                {
                    // Compressed
                    Push(bitStream, false, output, data);
                    long match_offset_adjusted = longest_match_offset - 0x12;   // I don't think there's any reason for this, the format's just stupid
                    data.Add((byte)(match_offset_adjusted & 0xFF));
                    data.Add((byte)(((match_offset_adjusted & 0xF00) >> 4) | ((longest_match_length - 3) & 0x0F)));
                }

                input_pointer += longest_match_length;
            }

            // Write remaining data (normally we don't flush until we have a full descriptor byte)
            bitStream.Flush(true);
            byte[] dataArray = data.ToArray();
            output.Write(dataArray, 0, dataArray.Length);

            if (with_size)
            {
                ushort size = (ushort)(output.Position - 2);
                output.Seek(outputInitialPosition, SeekOrigin.Begin);
                LittleEndian.Write2(output, size);
            }
        }