/* deletes node p from tree */ private static void delete_node(ref encode_state sp, int p) { int q; if (sp.parent[p] == NIL) { return; /* not in tree */ } if (sp.rchild[p] == NIL) { q = sp.lchild[p]; } else if (sp.lchild[p] == NIL) { q = sp.rchild[p]; } else { q = sp.lchild[p]; if (sp.rchild[q] != NIL) { do { q = sp.rchild[q]; } while (sp.rchild[q] != NIL); sp.rchild[sp.parent[q]] = sp.lchild[q]; sp.parent[sp.lchild[q]] = sp.parent[q]; sp.lchild[q] = sp.lchild[p]; sp.parent[sp.lchild[p]] = q; } sp.rchild[q] = sp.rchild[p]; sp.parent[sp.rchild[p]] = q; } sp.parent[q] = sp.parent[p]; if (sp.rchild[sp.parent[p]] == p) { sp.rchild[sp.parent[p]] = q; } else { sp.lchild[sp.parent[p]] = q; } sp.parent[p] = NIL; }
/* * initialize state, mostly the trees * * For i = 0 to N - 1, rchild[i] and lchild[i] will be the right and left * children of node i. These nodes need not be initialized. Also, parent[i] * is the parent of node i. These are initialized to NIL (= N), which stands * for 'not used.' For i = 0 to 255, rchild[N + i + 1] is the root of the * tree for strings that begin with character i. These are initialized to NIL. * Note there are 256 trees. */ private static void init_state(ref encode_state sp) { int i; sp.lchild = new int[N + 1]; sp.rchild = new int[N + 257]; sp.parent = new int[N + 1]; sp.text_buf = new byte[N + F - 1]; for (i = 0; i < N - F; i++) { sp.text_buf[i] = 0; } for (i = N + 1; i <= N + 256; i++) { sp.rchild[i] = NIL; } for (i = 0; i < N; i++) { sp.parent[i] = NIL; } }
public static byte[] Compress(byte[] src) { /* Encoding state, mostly tree but some current match stuff */ encode_state sp = new encode_state(); int i, c, len, r, s, last_match_length, code_buf_ptr; byte[] code_buf = new byte[17]; byte mask; init_state(ref sp); /* * code_buf[1..16] saves eight units of code, and code_buf[0] works * as eight flags, "1" representing that the unit is an unencoded * letter (1 byte), "" a position-and-length pair (2 bytes). * Thus, eight units require at most 16 bytes of code. */ code_buf[0] = 0; code_buf_ptr = mask = 1; /* Clear the buffer with any character that will appear often. */ s = 0; r = N - F; MemoryStream src_stream = new MemoryStream(src); MemoryStream dst_stream = new MemoryStream(); /* Read F bytes into the last F bytes of the buffer */ for (len = 0; len < F && src_stream.Position < src_stream.Length; len++) { sp.text_buf[r + len] = (byte)src_stream.ReadByte(); } if (len <= 0) { return(dst_stream.ToArray()); /* text of size zero */ } /* * Insert the F strings, each of which begins with one or more * 'space' characters. Note the order in which these strings are * inserted. This way, degenerate trees will be less likely to occur. */ for (i = 1; i <= F; i++) { insert_node(ref sp, r - i); } /* * Finally, insert the whole string just read. * The global variables match_length and match_position are set. */ insert_node(ref sp, r); do { /* match_length may be spuriously long near the end of text. */ if (sp.match_length > len) { sp.match_length = len; } if (sp.match_length <= THRESHOLD) { sp.match_length = 1; /* Not long enough match. Send one byte. */ code_buf[0] |= mask; /* 'send one byte' flag */ code_buf[code_buf_ptr++] = sp.text_buf[r]; /* Send uncoded. */ } else { /* Send position and length pair. Note match_length > THRESHOLD. */ code_buf[code_buf_ptr++] = (byte)sp.match_position; code_buf[code_buf_ptr++] = (byte) (((sp.match_position >> 4) & 0xF0) | (sp.match_length - (THRESHOLD + 1))); } //这里位运算有问题,byte为uint,所以这里取低8位 if (((mask <<= 1) & 0xFF) == 0) { /* Shift mask left one bit. */ /* Send at most 8 units of code together */ for (i = 0; i < code_buf_ptr; i++) { dst_stream.WriteByte(code_buf[i]); } code_buf[0] = 0; code_buf_ptr = mask = 1; } last_match_length = sp.match_length; for (i = 0; i < last_match_length && src_stream.Position < src_stream.Length; i++) { delete_node(ref sp, s); /* Delete old strings and */ c = src_stream.ReadByte(); sp.text_buf[s] = (byte)c; /* read new bytes */ /* * If the position is near the end of buffer, extend the buffer * to make string comparison easier. */ if (s < F - 1) { sp.text_buf[s + N] = (byte)c; } /* Since this is a ring buffer, increment the position modulo N. */ s = (s + 1) & (N - 1); r = (r + 1) & (N - 1); /* Register the string in text_buf[r..r+F-1] */ insert_node(ref sp, r); } while (i++ < last_match_length) { delete_node(ref sp, s); /* After the end of text, no need to read, */ s = (s + 1) & (N - 1); r = (r + 1) & (N - 1); /* but buffer may not be empty. */ if (--len >= 0) { insert_node(ref sp, r); } } } while (len > 0); /* until length of string to be processed is zero */ if (code_buf_ptr > 1) { /* Send remaining code. */ for (i = 0; i < code_buf_ptr; i++) { dst_stream.WriteByte(code_buf[i]); } } return(dst_stream.ToArray()); }
/* * Inserts string of length F, text_buf[r..r+F-1], into one of the trees * (text_buf[r]'th tree) and returns the longest-match position and length * via the global variables match_position and match_length. * If match_length = F, then removes the old node in favor of the new one, * because the old one will be deleted sooner. Note r plays double role, * as tree node and position in buffer. */ private static void insert_node(ref encode_state sp, int r) { int i, p, cmp; int key; cmp = 1; key = r; p = N + 1 + sp.text_buf[key]; sp.rchild[r] = sp.lchild[r] = NIL; sp.match_length = 0; for (; ;) { if (cmp >= 0) { if (sp.rchild[p] != NIL) { p = sp.rchild[p]; } else { sp.rchild[p] = r; sp.parent[r] = p; return; } } else { if (sp.lchild[p] != NIL) { p = sp.lchild[p]; } else { sp.lchild[p] = r; sp.parent[r] = p; return; } } for (i = 1; i < F; i++) { if ((cmp = sp.text_buf[key + i] - sp.text_buf[p + i]) != 0) { break; } } if (i > sp.match_length) { sp.match_position = p; if ((sp.match_length = i) >= F) { break; } } } sp.parent[r] = sp.parent[p]; sp.lchild[r] = sp.lchild[p]; sp.rchild[r] = sp.rchild[p]; sp.parent[sp.lchild[p]] = r; sp.parent[sp.rchild[p]] = r; if (sp.rchild[sp.parent[p]] == p) { sp.rchild[sp.parent[p]] = r; } else { sp.lchild[sp.parent[p]] = r; } sp.parent[p] = NIL; /* remove p */ }