private static void Encode(Stream input, Stream output, bool with_size) { int input_size = (int)(input.Length - input.Position); byte[] input_buffer = new byte[input_size]; input.Read(input_buffer, 0, input_size); long outputInitialPosition = output.Position; if (with_size) { output.Seek(2, SeekOrigin.Current); } /* * Here we create and populate the "LZSS graph": * * Each value in the uncompressed file forms a node in this graph. * The various edges between these nodes represent LZSS matches. * * Using a shortest-path algorithm, these edges can be used to * find the optimal combination of matches needed to produce the * smallest possible file. * * The outputted array only contains one edge per node: the optimal * one. This means, in order to produce the smallest file, you just * have to traverse the graph from one edge to the next, encoding * each match as you go along. */ LZSSGraphEdge[] node_meta_array = new LZSSGraphEdge[input_size + 1]; // Initialise the array node_meta_array[0].cost = 0; for (int i = 1; i < input_size + 1; ++i) { node_meta_array[i].cost = int.MaxValue; } // Find matches for (int i = 0; i < input_size; ++i) { int max_read_ahead = Math.Min(0xF + 3, input_size - i); int max_read_behind = Math.Max(0, i - 0x1000); // Search for zero-fill matches if (i < 0x1000) { for (int k = 0; k < 0xF + 3; ++k) { if (input_buffer[i + k] == 0) { int length = k + 1; // Update this node's optimal edge if this one is better if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = 0xFFF; } } else { break; } } } // Search for dictionary matches for (int j = i; j-- > max_read_behind;) { for (int k = 0; k < max_read_ahead; ++k) { if (input_buffer[i + k] == input_buffer[j + k]) { int distance = i - j; int length = k + 1; // Update this node's optimal edge if this one is better if (length >= 3 && node_meta_array[i + k + 1].cost > node_meta_array[i].cost + 1 + 16) { node_meta_array[i + k + 1].cost = node_meta_array[i].cost + 1 + 16; node_meta_array[i + k + 1].previous_node_index = i; node_meta_array[i + k + 1].match_length = k + 1; node_meta_array[i + k + 1].match_offset = j; } } else { break; } } } // Do literal match // Update this node's optimal edge if this one is better (or the same, since literal matches usually decode faster) if (node_meta_array[i + 1].cost >= node_meta_array[i].cost + 1 + 8) { node_meta_array[i + 1].cost = node_meta_array[i].cost + 1 + 8; node_meta_array[i + 1].previous_node_index = i; node_meta_array[i + 1].match_length = 0; } } // Reverse the edge link order, so the array can be traversed from start to end, rather than vice versa node_meta_array[0].previous_node_index = int.MaxValue; node_meta_array[input_size].next_node_index = int.MaxValue; for (int node_index = input_size; node_meta_array[node_index].previous_node_index != int.MaxValue; node_index = node_meta_array[node_index].previous_node_index) { node_meta_array[node_meta_array[node_index].previous_node_index].next_node_index = node_index; } /* * LZSS graph complete */ UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output); MemoryStream data = new MemoryStream(); for (int node_index = 0; node_meta_array[node_index].next_node_index != int.MaxValue; node_index = node_meta_array[node_index].next_node_index) { int next_index = node_meta_array[node_index].next_node_index; if (node_meta_array[next_index].match_length != 0) { // Compressed Push(bitStream, false, output, data); int match_offset_adjusted = node_meta_array[next_index].match_offset - 0x12; // I don't think there's any reason for this, the format's just stupid NeutralEndian.Write1(data, (byte)(match_offset_adjusted & 0xFF)); NeutralEndian.Write1(data, (byte)(((match_offset_adjusted & 0xF00) >> 4) | ((node_meta_array[next_index].match_length - 3) & 0x0F))); } else { // Uncompressed Push(bitStream, true, output, data); NeutralEndian.Write1(data, input_buffer[node_index]); } } // Write remaining data (normally we don't flush until we have a full descriptor byte) bitStream.Flush(true); byte[] dataArray = data.ToArray(); output.Write(dataArray, 0, dataArray.Length); if (with_size) { ushort size = (ushort)(outputInitialPosition - output.Position - 2); output.Seek(outputInitialPosition, SeekOrigin.Begin); LittleEndian.Write2(output, size); } }
private static void Encode(Stream input, Stream output, bool with_size) { long input_size = input.Length - input.Position; byte[] input_buffer = new byte[input_size]; input.Read(input_buffer, 0, (int)input_size); long outputInitialPosition = output.Position; if (with_size) { output.Seek(2, SeekOrigin.Current); } List <byte> data = new List <byte>(); UInt8_NE_L_OutputBitStream bitStream = new UInt8_NE_L_OutputBitStream(output); long input_pointer = 0; while (input_pointer < input_size) { // The maximum recurrence length that can be encoded is 0x12 // Of course, if the remaining file is smaller, cap to that instead long maximum_match_length = Math.Min(input_size - input_pointer, 0x12); // The furthest back Saxman can address is 0x1000 bytes // Again, if there's less than 0x1000 bytes of data available, then cap at that instead long maximum_backsearch = Math.Min(input_pointer, 0x1000); // These are our default values for the longest match found long longest_match_offset = input_pointer; // This one doesn't really need initialising, but it does shut up some moronic warnings long longest_match_length = 1; // First, look for dictionary matches for (long backsearch_pointer = input_pointer - 1; backsearch_pointer >= input_pointer - maximum_backsearch; --backsearch_pointer) { long match_length = 0; while (input_buffer[backsearch_pointer + match_length] == input_buffer[input_pointer + match_length] && ++match_length < maximum_match_length) { ; } if (match_length > longest_match_length) { longest_match_length = match_length; longest_match_offset = backsearch_pointer; } } // Then, look for zero-fill matches if (input_pointer < 0xFFF) // Saxman cannot perform zero-fills past the first 0xFFF bytes (it relies on some goofy logic in the decompressor) { long match_length = 0; while (input_buffer[input_pointer + match_length] == 0 && ++match_length < maximum_match_length) { ; } if (match_length > longest_match_length) { longest_match_length = match_length; // Saxman detects zero-fills by checking if the dictionary reference offset is somehow // pointing to *after* the decompressed data, so we set it to the highest possible value here longest_match_offset = 0xFFF; } } // We cannot compress runs shorter than three bytes if (longest_match_length < 3) { // Uncompressed Push(bitStream, true, output, data); data.Add(input_buffer[input_pointer]); longest_match_length = 1; } else { // Compressed Push(bitStream, false, output, data); long match_offset_adjusted = longest_match_offset - 0x12; // I don't think there's any reason for this, the format's just stupid data.Add((byte)(match_offset_adjusted & 0xFF)); data.Add((byte)(((match_offset_adjusted & 0xF00) >> 4) | ((longest_match_length - 3) & 0x0F))); } input_pointer += longest_match_length; } // Write remaining data (normally we don't flush until we have a full descriptor byte) bitStream.Flush(true); byte[] dataArray = data.ToArray(); output.Write(dataArray, 0, dataArray.Length); if (with_size) { ushort size = (ushort)(output.Position - 2); output.Seek(outputInitialPosition, SeekOrigin.Begin); LittleEndian.Write2(output, size); } }