// =========================================================================== // Set match_start to the longest match starting at the given string and // return its length. Matches shorter or equal to prev_length are discarded, // in which case the result is equal to prev_length and match_start is // garbage. // IN assertions: cur_match is the head of the hash chain for the current // string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1 // OUT assertion: the match length is not greater than s.lookahead. static uint longest_match(deflate_state s, uint cur_match) { uint chain_length=s.max_chain_length; // max hash chain length byte[] scan=s.window; // current string int scan_ind=(int)s.strstart; int len; // length of current match int best_len=(int)s.prev_length; // best match length so far int nice_match=s.nice_match; // stop if match long enough uint limit=s.strstart>(uint)(s.w_size-MIN_LOOKAHEAD)?s.strstart-(uint)(s.w_size-MIN_LOOKAHEAD):NIL; // Stop when cur_match becomes <= limit. To simplify the code, // we prevent matches with the string of window index 0. ushort[] prev=s.prev; uint wmask=s.w_mask; int strend_ind=(int)s.strstart+MAX_MATCH; byte scan_end1=scan[scan_ind+best_len-1]; byte scan_end=scan[scan_ind+best_len]; // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. // It is easy to get rid of this optimization if necessary. //Assert(s.hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); // Do not waste too much time if we already have a good match: if(s.prev_length>=s.good_match) chain_length>>=2; // Do not look for matches beyond the end of the input. This is necessary // to make deflate deterministic. if((uint)nice_match>s.lookahead) nice_match=(int)s.lookahead; //Assert((uint)s.strstart <= s.window_size-MIN_LOOKAHEAD, "need lookahead"); byte[] match=s.window; do { //Assert(cur_match<s.strstart, "no future"); int match_ind=(int)cur_match; // Skip to next match if the match length cannot increase // or if the match length is less than 2. Note that the checks below // for insufficient lookahead only occur occasionally for performance // reasons. Therefore uninitialized memory will be accessed, and // conditional jumps will be made that depend on those values. // However the length of the match is limited to the lookahead, so // the output of deflate is not affected by the uninitialized values. if(match[match_ind+best_len]!=scan_end||match[match_ind+best_len-1]!=scan_end1|| match[match_ind]!=scan[scan_ind]||match[++match_ind]!=scan[scan_ind+1]) continue; // The check at best_len-1 can be removed because it will be made // again later. (This heuristic is not always a win.) // It is not necessary to compare scan[2] and match[2] since they // are always equal when the other bytes match, given that // the hash keys are equal and that HASH_BITS >= 8. scan_ind+=2; match_ind++; //Assert(scan[scan_ind]==match[match_ind], "match[2]?"); // We check for insufficient lookahead only every 8th comparison; // the 256th check will be made at strstart+258. do { } while(scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan_ind<strend_ind); //Assert(scan_ind <= (uint)(s.window_size-1), "wild scan"); len=MAX_MATCH-(int)(strend_ind-scan_ind); scan_ind=strend_ind-MAX_MATCH; if(len>best_len) { s.match_start=cur_match; best_len=len; if(len>=nice_match) break; scan_end1=scan[scan_ind+best_len-1]; scan_end=scan[scan_ind+best_len]; } } while((cur_match=prev[cur_match&wmask])>limit&&--chain_length!=0); if((uint)best_len<=s.lookahead) return (uint)best_len; return s.lookahead; }
// =========================================================================== // Determine the best encoding for the current block: dynamic trees, static // trees or store, and output the encoded block to the zip file. // buf: input block, or NULL if too old // stored_len: length of input block // last: one if this is the last block for a file static void _tr_flush_block(deflate_state s, byte[] buf, int buf_ind, uint stored_len, int last) { uint opt_lenb, static_lenb; // opt_len and static_len in bytes int max_blindex=0; // index of last bit length code of non zero freq // Build the Huffman trees unless a stored block is forced if(s.level>0) { // Construct the literal and distance trees build_tree(s, ref s.l_desc); //Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s.opt_len, s.static_len)); build_tree(s, ref s.d_desc); //Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s.opt_len, s.static_len)); // At this point, opt_len and static_len are the total bit lengths of // the compressed block data, excluding the tree representations. // Build the bit length tree for the above two trees, and get the index // in bl_order of the last bit length code to send. max_blindex=build_bl_tree(s); // Determine the best encoding. Compute the block lengths in bytes. opt_lenb=(s.opt_len+3+7)>>3; static_lenb=(s.static_len+3+7)>>3; //Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ", opt_lenb, s.opt_len, static_lenb, s.static_len, stored_len, s.last_lit)); if(static_lenb<=opt_lenb) opt_lenb=static_lenb; } else { //Assert(buf!=(char*)0, "lost buf"); opt_lenb=static_lenb=stored_len+5; // force a stored block } if(stored_len+4<=opt_lenb&&buf!=null) { // 4: two words for the lengths // The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. // Otherwise we can't have processed more than WSIZE input bytes since // the last block flush, because compression would have been // successful. If LIT_BUFSIZE <= WSIZE, it is never too late to // transform a block into a stored block. _tr_stored_block(s, buf, buf_ind, stored_len, last); } else if(s.strategy==Z_FIXED||static_lenb==opt_lenb) { send_bits(s, (STATIC_TREES<<1)+last, 3); compress_block(s, static_ltree, static_dtree); } else { send_bits(s, (DYN_TREES<<1)+last, 3); send_all_trees(s, s.l_desc.max_code+1, s.d_desc.max_code+1, max_blindex+1); compress_block(s, s.dyn_ltree, s.dyn_dtree); } //Assert (s.compressed_len == s.bits_sent, "bad compressed size"); // The above check is made mod 2^32, for files larger than 512 MB // and unsigned int implemented on 32 bits. init_block(s); if(last!=0) bi_windup(s); //Tracev((stderr,"\ncomprlen %lu(%lu) ", s.compressed_len>>3, s.compressed_len-7*eof)); }
// =========================================================================== // Send the block data compressed using the given Huffman trees // ltree: literal tree // dtree: distance tree static void compress_block(deflate_state s, ct_data[] ltree, ct_data[] dtree) { uint dist; // distance of matched string int lc; // match length or unmatched char (if dist == 0) uint lx=0; // running index in l_buf uint code; // the code to send int extra; // number of extra bits to send if(s.last_lit!=0) { do { dist=s.d_buf[lx]; lc=s.l_buf[lx++]; if(dist==0) { send_code(s, lc, ltree); // send a literal byte //Tracecv(isgraph(lc), (stderr," '%c' ", lc)); } else { // Here, lc is the match length - MIN_MATCH code=_length_code[lc]; send_code(s, (int)(code+LITERALS+1), ltree); // send the length code extra=extra_lbits[code]; if(extra!=0) { lc-=base_length[code]; send_bits(s, lc, extra); // send the extra length bits } dist--; // dist is now the match distance - 1 code=(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)]); //Assert (code < D_CODES, "bad d_code"); send_code(s, (int)code, dtree); // send the distance code extra=extra_dbits[code]; if(extra!=0) { dist-=(uint)base_dist[code]; send_bits(s, (int)dist, extra); // send the extra distance bits } } // literal or match pair ? } while(lx<s.last_lit); } send_code(s, END_BLOCK, ltree); s.last_eob_len=ltree[END_BLOCK].Len; }
// =========================================================================== // Construct the Huffman tree for the bit lengths and return the index in // bl_order of the last bit length code to send. static int build_bl_tree(deflate_state s) { int max_blindex; // index of last bit length code of non zero freq // Determine the bit length frequencies for literal and distance trees scan_tree(s, s.dyn_ltree, s.l_desc.max_code); scan_tree(s, s.dyn_dtree, s.d_desc.max_code); // Build the bit length tree: build_tree(s, ref s.bl_desc); // opt_len now includes the length of the tree representations, except // the lengths of the bit lengths codes and the 5+5+4 bits for the counts. // Determine the number of bit length codes to send. The pkzip format // requires that at least 4 bit length codes be sent. (appnote.txt says // 3 but the actual value used is 4.) for(max_blindex=BL_CODES-1; max_blindex>=3; max_blindex--) { if(s.bl_tree[bl_order[max_blindex]].Len!=0) break; } // Update opt_len to include the bit length tree and counts s.opt_len+=(uint)(3*(max_blindex+1)+5+5+4); //Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld", s.opt_len, s.static_len)); return max_blindex; }
static void _tr_stored_block(deflate_state s, byte[] buf, int buf_ind, uint stored_len, int last) { send_bits(s, (STORED_BLOCK<<1)+last, 3); // send block type copy_block(s, buf, buf_ind, stored_len, 1); // with header }
// =========================================================================== // Compute the optimal bit lengths for a tree and update the total bit length // for the current block. // IN assertion: the fields freq and dad are set, heap[heap_max] and // above are the tree nodes sorted by increasing frequency. // OUT assertions: the field len is set to the optimal bit length, the // array bl_count contains the frequencies for each bit length. // The length opt_len is updated; static_len is also updated if stree is // not null. // desc: the tree descriptor static void gen_bitlen(deflate_state s, ref tree_desc desc) { ct_data[] tree=desc.dyn_tree; int max_code=desc.max_code; ct_data[] stree=desc.stat_desc.static_tree; int[] extra=desc.stat_desc.extra_bits; int @base=desc.stat_desc.extra_base; int max_length=desc.stat_desc.max_length; int h; // heap index int n, m; // iterate over the tree elements int bits; // bit length int xbits; // extra bits ushort f; // frequency int overflow=0; // number of elements with bit length too large for(bits=0; bits<=MAX_BITS; bits++) s.bl_count[bits]=0; // In a first pass, compute the optimal bit lengths (which may // overflow in the case of the bit length tree). tree[s.heap[s.heap_max]].Len=0; // root of the heap for(h=s.heap_max+1; h<HEAP_SIZE; h++) { n=s.heap[h]; bits=tree[tree[n].Dad].Len+1; if(bits>max_length) { bits=max_length; overflow++; } tree[n].Len=(ushort)bits; // We overwrite tree[n].Dad which is no longer needed if(n>max_code) continue; // not a leaf node s.bl_count[bits]++; xbits=0; if(n>=@base) xbits=extra[n-@base]; f=tree[n].Freq; s.opt_len+=(uint)(f*(bits+xbits)); if(stree!=null) s.static_len+=(uint)(f*(stree[n].Len+xbits)); } if(overflow==0) return; //Trace((stderr,"\nbit length overflow\n")); // This happens for example on obj2 and pic of the Calgary corpus // Find the first bit length which could increase: do { bits=max_length-1; while(s.bl_count[bits]==0) bits--; s.bl_count[bits]--; // move one leaf down the tree s.bl_count[bits+1]+=2; // move one overflow item as its brother s.bl_count[max_length]--; // The brother of the overflow item also moves one step up, // but this does not affect bl_count[max_length] overflow-=2; } while(overflow>0); // Now recompute all bit lengths, scanning in increasing frequency. // h is still equal to HEAP_SIZE. (It is simpler to reconstruct all // lengths instead of fixing only the wrong ones. This idea is taken // from 'ar' written by Haruhiko Okumura.) for(bits=max_length; bits!=0; bits--) { n=s.bl_count[bits]; while(n!=0) { m=s.heap[--h]; if(m>max_code) continue; if((uint)tree[m].Len!=(uint)bits) { //Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits)); s.opt_len+=((uint)bits-tree[m].Len)*tree[m].Freq; tree[m].Len=(ushort)bits; } n--; } } }
// =========================================================================== // Scan a literal or distance tree to determine the frequencies of the codes // in the bit length tree. // tree: the tree to be scanned // max_code: and its largest code of non zero frequency static void scan_tree(deflate_state s, ct_data[] tree, int max_code) { int n; // iterates over all tree elements int prevlen=-1; // last emitted length int curlen; // length of current code int nextlen=tree[0].Len; // length of next code int count=0; // repeat count of the current code int max_count=7; // max repeat count int min_count=4; // min repeat count if(nextlen==0) { max_count=138; min_count=3; } tree[max_code+1].Len=(ushort)0xffff; // guard for(n=0; n<=max_code; n++) { curlen=nextlen; nextlen=tree[n+1].Len; if(++count<max_count&&curlen==nextlen) continue; if(count<min_count) s.bl_tree[curlen].Freq+=(ushort)count; else if(curlen!=0) { if(curlen!=prevlen) s.bl_tree[curlen].Freq++; s.bl_tree[REP_3_6].Freq++; } else if(count<=10) s.bl_tree[REPZ_3_10].Freq++; else s.bl_tree[REPZ_11_138].Freq++; count=0; prevlen=curlen; if(nextlen==0) { max_count=138; min_count=3; } else if(curlen==nextlen) { max_count=6; min_count=3; } else { max_count=7; min_count=4; } } }
// =========================================================================== // For Z_RLE, simply look for runs of bytes, generate matches only of distance // one. Do not maintain a hash table. (It will be regenerated if this run of // deflate switches away from Z_RLE.) static block_state deflate_rle(deflate_state s, int flush) { bool bflush; // set if current block must be flushed uint prev; // byte at distance one to match int scan, strend; // scan goes up to strend for length of run for(; ; ) { // Make sure that we always have enough lookahead, except // at the end of the input file. We need MAX_MATCH bytes // for the longest encodable run. if(s.lookahead<MAX_MATCH) { fill_window(s); if(s.lookahead<MAX_MATCH&&flush==Z_NO_FLUSH) return block_state.need_more; if(s.lookahead==0) break; // flush the current block } // See how many times the previous byte repeats s.match_length=0; if(s.lookahead>=MIN_MATCH&&s.strstart>0) { scan=(int)(s.strstart-1); prev=s.window[scan]; if(prev==s.window[++scan]&&prev==s.window[++scan]&&prev==s.window[++scan]) { strend=(int)(s.strstart+MAX_MATCH); do { } while(prev==s.window[++scan]&&prev==s.window[++scan]&& prev==s.window[++scan]&&prev==s.window[++scan]&& prev==s.window[++scan]&&prev==s.window[++scan]&& prev==s.window[++scan]&&prev==s.window[++scan]&& scan<strend); s.match_length=MAX_MATCH-(uint)(strend-scan); if(s.match_length>s.lookahead) s.match_length=s.lookahead; } } // Emit match if have run of MIN_MATCH or longer, else emit literal if(s.match_length>=MIN_MATCH) { //was _tr_tally_dist(s, 1, s.match_length-MIN_MATCH, bflush); { byte len=(byte)(s.match_length-MIN_MATCH); ushort dist=1; s.d_buf[s.last_lit]=dist; s.l_buf[s.last_lit++]=len; dist--; s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++; s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?true:false; } s.lookahead-=s.match_length; s.strstart+=s.match_length; s.match_length=0; } else { // No match, output a literal byte //Tracevv((stderr,"%c", s.window[s.strstart])); //was _tr_tally_lit(s, s.window[s.strstart], bflush); { byte cc=s.window[s.strstart]; s.d_buf[s.last_lit]=0; s.l_buf[s.last_lit++]=cc; s.dyn_ltree[cc].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?true:false; } s.lookahead--; s.strstart++; } if(bflush) { // FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return block_state.need_more; } } //was FLUSH_BLOCK(s, flush==Z_FINISH); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more; return flush==Z_FINISH?block_state.finish_done:block_state.block_done; }
// =========================================================================== // For Z_HUFFMAN_ONLY, do not look for matches. Do not maintain a hash table. // (It will be regenerated if this run of deflate switches away from Huffman.) static block_state deflate_huff(deflate_state s, int flush) { bool bflush; // set if current block must be flushed for(; ; ) { // Make sure that we have a literal to write. if(s.lookahead==0) { fill_window(s); if(s.lookahead==0) { if(flush==Z_NO_FLUSH) return block_state.need_more; break; // flush the current block } } // Output a literal byte s.match_length=0; //Tracevv((stderr,"%c", s.window[s.strstart])); //was _tr_tally_lit(s, s.window[s.strstart], bflush); { byte cc=s.window[s.strstart]; s.d_buf[s.last_lit]=0; s.l_buf[s.last_lit++]=cc; s.dyn_ltree[cc].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?true:false; } s.lookahead--; s.strstart++; if(bflush) { // FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return block_state.need_more; } } //was FLUSH_BLOCK(s, flush==Z_FINISH); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more; return flush==Z_FINISH?block_state.finish_done:block_state.block_done; }
// =========================================================================== // Compress as much as possible from the input stream, return the current // block state. // This function does not perform lazy evaluation of matches and inserts // new strings in the dictionary only for unmatched strings or for short // matches. It is used only for the fast compression options. static block_state deflate_fast(deflate_state s, int flush) { uint hash_head=NIL; // head of the hash chain int bflush; // set if current block must be flushed for(; ; ) { // Make sure that we always have enough lookahead, except // at the end of the input file. We need MAX_MATCH bytes // for the next match, plus MIN_MATCH bytes to insert the // string following the next match. if(s.lookahead<MIN_LOOKAHEAD) { fill_window(s); if(s.lookahead<MIN_LOOKAHEAD&&flush==Z_NO_FLUSH) return block_state.need_more; if(s.lookahead==0) break; // flush the current block } // Insert the string window[strstart .. strstart+2] in the // dictionary, and set hash_head to the head of the hash chain: hash_head=NIL; if(s.lookahead>=MIN_MATCH) { //was INSERT_STRING(s, s.strstart, hash_head); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask; hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h]; s.head[s.ins_h]=(ushort)s.strstart; } // Find the longest match, discarding those <= prev_length. // At this point we have always match_length < MIN_MATCH if(hash_head!=NIL&&s.strstart-hash_head<=(s.w_size-MIN_LOOKAHEAD)) { // To simplify the code, we prevent matches with the string // of window index 0 (in particular we have to avoid a match // of the string with itself at the start of the input file). s.match_length=longest_match_fast(s, hash_head); // longest_match_fast() sets match_start } if(s.match_length>=MIN_MATCH) { //was _tr_tally_dist(s, s.strstart - s.match_start, s.match_length - MIN_MATCH, bflush); { byte len=(byte)(s.match_length-MIN_MATCH); ushort dist=(ushort)(s.strstart-s.match_start); s.d_buf[s.last_lit]=dist; s.l_buf[s.last_lit++]=len; dist--; s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++; s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?1:0; } s.lookahead-=s.match_length; // Insert new strings in the hash table only if the match length // is not too large. This saves time but degrades compression. if(s.match_length<=s.max_lazy_match&&s.lookahead>=MIN_MATCH) // max_lazy_match was max_insert_length as #define { s.match_length--; // string at strstart already in table do { s.strstart++; //was INSERT_STRING(s, s.strstart, hash_head); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask; hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h]; s.head[s.ins_h]=(ushort)s.strstart; // strstart never exceeds WSIZE-MAX_MATCH, so there are // always MIN_MATCH bytes ahead. } while(--s.match_length!=0); s.strstart++; } else { s.strstart+=s.match_length; s.match_length=0; s.ins_h=s.window[s.strstart]; //was UPDATE_HASH(s, s.ins_h, s.window[s.strstart+1]); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+1])&s.hash_mask; // If lookahead < MIN_MATCH, ins_h is garbage, but it does not // matter since it will be recomputed at next deflate call. } } else { // No match, output a literal byte //Tracevv((stderr,"%c", s.window[s.strstart])); //was _tr_tally_lit (s, s.window[s.strstart], bflush); { byte cc=s.window[s.strstart]; s.d_buf[s.last_lit]=0; s.l_buf[s.last_lit++]=cc; s.dyn_ltree[cc].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?1:0; } s.lookahead--; s.strstart++; } if(bflush!=0) { //was FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return block_state.need_more; } } //was FLUSH_BLOCK(s, flush==Z_FINISH); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more; return flush==Z_FINISH?block_state.finish_done:block_state.block_done; }
// =========================================================================== // Same as above, but achieves better compression. We use a lazy // evaluation for matches: a match is finally adopted only if there is // no better match at the next window position. static block_state deflate_slow(deflate_state s, int flush) { uint hash_head=NIL; // head of hash chain int bflush; // set if current block must be flushed // Process the input block. for(; ; ) { // Make sure that we always have enough lookahead, except // at the end of the input file. We need MAX_MATCH bytes // for the next match, plus MIN_MATCH bytes to insert the // string following the next match. if(s.lookahead<MIN_LOOKAHEAD) { fill_window(s); if(s.lookahead<MIN_LOOKAHEAD&&flush==Z_NO_FLUSH) return block_state.need_more; if(s.lookahead==0) break; // flush the current block } // Insert the string window[strstart .. strstart+2] in the // dictionary, and set hash_head to the head of the hash chain: hash_head=NIL; if(s.lookahead>=MIN_MATCH) { //was INSERT_STRING(s, s.strstart, hash_head); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask; hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h]; s.head[s.ins_h]=(ushort)s.strstart; } // Find the longest match, discarding those <= prev_length. s.prev_length=s.match_length; s.prev_match=s.match_start; s.match_length=MIN_MATCH-1; if(hash_head!=NIL&&s.prev_length<s.max_lazy_match&&s.strstart-hash_head<=(s.w_size-MIN_LOOKAHEAD)) { // To simplify the code, we prevent matches with the string // of window index 0 (in particular we have to avoid a match // of the string with itself at the start of the input file). s.match_length=longest_match(s, hash_head); // longest_match() sets match_start if(s.match_length<=5&&(s.strategy==Z_FILTERED|| (s.match_length==MIN_MATCH&&s.strstart-s.match_start>TOO_FAR))) { // If prev_match is also MIN_MATCH, match_start is garbage // but we will ignore the current match anyway. s.match_length=MIN_MATCH-1; } } // If there was a match at the previous step and the current // match is not better, output the previous match: if(s.prev_length>=MIN_MATCH&&s.match_length<=s.prev_length) { uint max_insert=s.strstart+s.lookahead-MIN_MATCH; // Do not insert strings in hash table beyond this. //was _tr_tally_dist(s, s.strstart -1 - s.prev_match, s.prev_length - MIN_MATCH, bflush); { byte len=(byte)(s.prev_length-MIN_MATCH); ushort dist=(ushort)(s.strstart-1-s.prev_match); s.d_buf[s.last_lit]=dist; s.l_buf[s.last_lit++]=len; dist--; s.dyn_ltree[_length_code[len]+LITERALS+1].Freq++; s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?1:0; } // Insert in hash table all strings up to the end of the match. // strstart-1 and strstart are already inserted. If there is not // enough lookahead, the last two strings are not inserted in // the hash table. s.lookahead-=s.prev_length-1; s.prev_length-=2; do { if(++s.strstart<=max_insert) { //was INSERT_STRING(s, s.strstart, hash_head); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+(MIN_MATCH-1)])&s.hash_mask; hash_head=s.prev[s.strstart&s.w_mask]=s.head[s.ins_h]; s.head[s.ins_h]=(ushort)s.strstart; } } while(--s.prev_length!=0); s.match_available=0; s.match_length=MIN_MATCH-1; s.strstart++; if(bflush!=0) { //was FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return block_state.need_more; } } else if(s.match_available!=0) { // If there was no match at the previous position, output a // single literal. If there was a match but the current match // is longer, truncate the previous match to a single literal. //Tracevv((stderr,"%c", s.window[s.strstart-1])); //was _tr_tally_lit(s, s.window[s.strstart-1], bflush); { byte cc=s.window[s.strstart-1]; s.d_buf[s.last_lit]=0; s.l_buf[s.last_lit++]=cc; s.dyn_ltree[cc].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?1:0; } if(bflush!=0) { //was FLUSH_BLOCK_ONLY(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); } s.strstart++; s.lookahead--; if(s.strm.avail_out==0) return block_state.need_more; } else { // There is no previous match to compare with, wait for // the next step to decide. s.match_available=1; s.strstart++; s.lookahead--; } } //Assert(flush!=Z_NO_FLUSH, "no flush?"); if(s.match_available!=0) { //Tracevv((stderr,"%c", s.window[s.strstart-1])); //was _tr_tally_lit(s, s.window[s.strstart-1], bflush); { byte cc=s.window[s.strstart-1]; s.d_buf[s.last_lit]=0; s.l_buf[s.last_lit++]=cc; s.dyn_ltree[cc].Freq++; bflush=(s.last_lit==s.lit_bufsize-1)?1:0; } s.match_available=0; } //was FLUSH_BLOCK(s, flush==Z_FINISH); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more; return flush==Z_FINISH?block_state.finish_done:block_state.block_done; }
// =========================================================================== // Flush the current block, with given end-of-file flag. // IN assertion: strstart is set to the end of the current match. //#define FLUSH_BLOCK_ONLY(s, last) \ //{ \ // _tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0, \ // (uint)((int)s.strstart - s.block_start), (last)); \ // s.block_start = s.strstart; \ // flush_pending(s.strm); \ // Tracev((stderr,"[FLUSH]")); \ //} // Same but force premature exit if necessary. //#define FLUSH_BLOCK(s, last) \ //{ \ // _tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0, \ // (uint)((int)s.strstart - s.block_start), (last)); \ // s.block_start = s.strstart; \ // flush_pending(s.strm); \ // Tracev((stderr,"[FLUSH]")); \ // if (s.strm.avail_out == 0) return (last) ? finish_started : need_more; \ //} // =========================================================================== // Copy without compression as much as possible from the input stream, return // the current block state. // This function does not insert new strings in the dictionary since // uncompressible data is probably not useful. This function is used // only for the level=0 compression option. // NOTE: this function should be optimized to avoid extra copying from // window to pending_buf. static block_state deflate_stored(deflate_state s, int flush) { // Stored blocks are limited to 0xffff bytes, pending_buf is limited // to pending_buf_size, and each stored block has a 5 byte header: uint max_block_size=0xffff; uint max_start; if(max_block_size>s.pending_buf_size-5) max_block_size=s.pending_buf_size-5; // Copy as much as possible from input to output: for(; ; ) { // Fill the window as much as possible: if(s.lookahead<=1) { //Assert(s.strstart<s.w_size+MAX_DIST(s)||s.block_start>=(int)s.w_size, "slide too late"); fill_window(s); if(s.lookahead==0&&flush==Z_NO_FLUSH) return block_state.need_more; if(s.lookahead==0) break; // flush the current block } //Assert(s.block_start>=0, "block gone"); s.strstart+=s.lookahead; s.lookahead=0; // Emit a stored block if pending_buf will be full: max_start=(uint)s.block_start+max_block_size; if(s.strstart==0||(uint)s.strstart>=max_start) { // strstart == 0 is possible when wraparound on 16-bit machine s.lookahead=(uint)(s.strstart-max_start); s.strstart=(uint)max_start; //was FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), 0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return block_state.need_more; } // Flush if we may have to slide, otherwise block_start may become // negative and the data will be gone: if(s.strstart-(uint)s.block_start>=(s.w_size-MIN_LOOKAHEAD)) { //was FLUSH_BLOCK(s, 0); _tr_flush_block(s, s.block_start >= 0 ? s.window : null, s.block_start >= 0?s.block_start:0, (uint)((int)s.strstart - s.block_start), 0); s.block_start = (int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if (s.strm.avail_out == 0) return block_state.need_more; } } //was FLUSH_BLOCK(s, flush==Z_FINISH); _tr_flush_block(s, s.block_start>=0?s.window:null, s.block_start>=0?s.block_start:0, (uint)((int)s.strstart-s.block_start), flush==Z_FINISH?1:0); s.block_start=(int)s.strstart; flush_pending(s.strm); //Tracev((stderr,"[FLUSH]")); if(s.strm.avail_out==0) return flush==Z_FINISH?block_state.finish_started:block_state.need_more; return flush==Z_FINISH?block_state.finish_done:block_state.block_done; }
// =========================================================================== // Fill the window when the lookahead becomes insufficient. // Updates strstart and lookahead. // // IN assertion: lookahead < MIN_LOOKAHEAD // OUT assertions: strstart <= window_size-MIN_LOOKAHEAD // At least one byte has been read, or avail_in == 0; reads are // performed for at least two bytes (required for the zip translate_eol // option -- not supported here). static void fill_window(deflate_state s) { uint n, m; uint more; // Amount of free space at the end of the window. uint wsize=s.w_size; do { more=(uint)(s.window_size-(uint)s.lookahead-(uint)s.strstart); // If the window is almost full and there is insufficient lookahead, // move the upper half to the lower one to make room in the upper half. if(s.strstart>=wsize+s.w_size-MIN_LOOKAHEAD) { //was memcpy(s.window, s.window+wsize, (uint)wsize); Array.Copy(s.window, wsize, s.window, 0, wsize); s.match_start-=wsize; s.strstart-=wsize; // we now have strstart >= MAX_DIST s.block_start-=(int)wsize; // Slide the hash table (could be avoided with 32 bit values // at the expense of memory usage). We slide even when level == 0 // to keep the hash table consistent if we switch back to level > 0 // later. (Using level 0 permanently is not an optimal usage of // zlib, so we don't care about this pathological case.) n=s.hash_size; uint p=n; do { m=s.head[--p]; s.head[p]=(ushort)(m>=wsize?m-wsize:NIL); } while((--n)!=0); n=wsize; p=n; do { m=s.prev[--p]; s.prev[p]=(ushort)(m>=wsize?m-wsize:NIL); // If n is not on any hash chain, prev[n] is garbage but // its value will never be used. } while((--n)!=0); more+=wsize; } if(s.strm.avail_in==0) return; // If there was no sliding: // strstart <= WSIZE+MAX_DIST-1 && lookahead <= MIN_LOOKAHEAD - 1 && // more == window_size - lookahead - strstart // => more >= window_size - (MIN_LOOKAHEAD-1 + WSIZE + MAX_DIST-1) // => more >= window_size - 2*WSIZE + 2 // In the BIG_MEM or MMAP case (not yet supported), // window_size == input_size + MIN_LOOKAHEAD && // strstart + s.lookahead <= input_size => more >= MIN_LOOKAHEAD. // Otherwise, window_size == 2*WSIZE so more >= 2. // If there was sliding, more >= WSIZE. So in all cases, more >= 2. //Assert(more>=2, "more < 2"); n=(uint)read_buf(s.strm, s.window, (int)(s.strstart+s.lookahead), more); s.lookahead+=n; // Initialize the hash value now that we have some input: if(s.lookahead>=MIN_MATCH) { s.ins_h=s.window[s.strstart]; //was UPDATE_HASH(s, s.ins_h, s.window[s.strstart+1]); s.ins_h=((s.ins_h<<(int)s.hash_shift)^s.window[s.strstart+1])&s.hash_mask; } // If the whole input has less than MIN_MATCH bytes, ins_h is garbage, // but this is not important since only literal bytes will be emitted. } while(s.lookahead<MIN_LOOKAHEAD&&s.strm.avail_in!=0); // If the WIN_INIT bytes after the end of the current data have never been // written, then zero those bytes in order to avoid memory check reports of // the use of uninitialized (or uninitialised as Julian writes) bytes by // the longest match routines. Update the high water mark for the next // time through here. WIN_INIT is set to MAX_MATCH since the longest match // routines allow scanning to strstart + MAX_MATCH, ignoring lookahead. if(s.high_water<s.window_size) { uint curr=s.strstart+s.lookahead; uint init; if(s.high_water<curr) { // Previous high water mark below current data -- zero WIN_INIT // bytes or up to end of window, whichever is less. init=s.window_size-curr; if(init>WIN_INIT) init=WIN_INIT; for(int i=0; i<init; i++) s.window[curr+i]=0; s.high_water=curr+init; } else if(s.high_water<curr+WIN_INIT) { // High water mark at or above current data, but below current data // plus WIN_INIT -- zero out to current data plus WIN_INIT, or up // to end of window, whichever is less. init=curr+WIN_INIT-s.high_water; if(init>s.window_size-s.high_water) init=s.window_size-s.high_water; for(int i=0; i<init; i++) s.window[s.high_water+i]=0; s.high_water+=init; } } }
// --------------------------------------------------------------------------- // Optimized version for FASTEST only static uint longest_match_fast(deflate_state s, uint cur_match) { byte[] scan=s.window; int scan_ind=(int)s.strstart; // current string int len; // length of current match int strend_ind=(int)s.strstart+MAX_MATCH; // The code is optimized for HASH_BITS >= 8 and MAX_MATCH-2 multiple of 16. // It is easy to get rid of this optimization if necessary. //Assert(s.hash_bits >= 8 && MAX_MATCH == 258, "Code too clever"); //Assert((uint)s.strstart <= s.window_size-MIN_LOOKAHEAD, "need lookahead"); //Assert(cur_match < s.strstart, "no future"); byte[] match=s.window; int match_ind=(int)cur_match; // Return failure if the match length is less than 2: if(match[match_ind]!=scan[scan_ind]||match[match_ind+1]!=scan[scan_ind+1]) return MIN_MATCH-1; // The check at best_len-1 can be removed because it will be made // again later. (This heuristic is not always a win.) // It is not necessary to compare scan[2] and match[2] since they // are always equal when the other bytes match, given that // the hash keys are equal and that HASH_BITS >= 8. scan_ind+=2; match_ind+=2; //Assert(scan[scan_ind] == match[match_ind], "match[2]?"); // We check for insufficient lookahead only every 8th comparison; // the 256th check will be made at strstart+258. do { } while(scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan[++scan_ind]==match[++match_ind]&&scan[++scan_ind]==match[++match_ind]&& scan_ind<strend_ind); //Assert(scan_ind <= (uint)(s.window_size-1), "wild scan"); len=MAX_MATCH-(int)(strend_ind-scan_ind); if(len<MIN_MATCH) return MIN_MATCH-1; s.match_start=cur_match; return (uint)len<=s.lookahead?(uint)len:s.lookahead; }
// =========================================================================== // Initialize a new block. static void init_block(deflate_state s) { // Initialize the trees. for(int n=0; n<L_CODES; n++) s.dyn_ltree[n].Freq=0; for(int n=0; n<D_CODES; n++) s.dyn_dtree[n].Freq=0; for(int n=0; n<BL_CODES; n++) s.bl_tree[n].Freq=0; s.dyn_ltree[END_BLOCK].Freq=1; s.opt_len=s.static_len=0; s.last_lit=s.matches=0; }
// ========================================================================= // This is another version of deflateInit with more compression options. The // fields next_in, zalloc, zfree and opaque must be initialized before by // the caller. // The method parameter is the compression method. It must be Z_DEFLATED in // this version of the library. // The windowBits parameter is the base two logarithm of the window size // (the size of the history buffer). It should be in the range 8..15 for this // version of the library. Larger values of this parameter result in better // compression at the expense of memory usage. The default value is 15 if // deflateInit is used instead. // windowBits can also be -8..-15 for raw deflate. In this case, -windowBits // determines the window size. deflate() will then generate raw deflate data // with no zlib header or trailer, and will not compute an adler32 check value. // windowBits can also be greater than 15 for optional gzip encoding. Add // 16 to windowBits to write a simple gzip header and trailer around the // compressed data instead of a zlib wrapper. The gzip header will have no // file name, no extra data, no comment, no modification time (set to zero), // no header crc, and the operating system will be set to 255 (unknown). If a // gzip stream is being written, strm.adler is a crc32 instead of an adler32. // The memLevel parameter specifies how much memory should be allocated // for the internal compression state. memLevel=1 uses minimum memory but // is slow and reduces compression ratio; memLevel=9 uses maximum memory // for optimal speed. The default value is 8. See zconf.h for total memory // usage as a function of windowBits and memLevel. // The strategy parameter is used to tune the compression algorithm. Use the // value Z_DEFAULT_STRATEGY for normal data, Z_FILTERED for data produced by a // filter (or predictor), Z_HUFFMAN_ONLY to force Huffman encoding only (no // string match), or Z_RLE to limit match distances to one (run-length // encoding). Filtered data consists mostly of small values with a somewhat // random distribution. In this case, the compression algorithm is tuned to // compress them better. The effect of Z_FILTERED is to force more Huffman // coding and less string matching; it is somewhat intermediate between // Z_DEFAULT and Z_HUFFMAN_ONLY. Z_RLE is designed to be almost as fast as // Z_HUFFMAN_ONLY, but give better compression for PNG image data. The strategy // parameter only affects the compression ratio but not the correctness of the // compressed output even if it is not set appropriately. Z_FIXED prevents the // use of dynamic Huffman codes, allowing for a simpler decoder for special // applications. // deflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was not enough // memory, Z_STREAM_ERROR if a parameter is invalid (such as an invalid // method). msg is set to null if there is no error message. deflateInit2 does // not perform any compression: this will be done by deflate(). public static int deflateInit2(z_stream strm, int level, int method, int windowBits, int memLevel, int strategy) { if(strm==null) return Z_STREAM_ERROR; strm.msg=null; if(level==Z_DEFAULT_COMPRESSION) level=6; int wrap=1; if(windowBits<0) { // suppress zlib wrapper wrap=0; windowBits=-windowBits; } else if(windowBits>15) { wrap=2; // write gzip wrapper instead windowBits-=16; } if(memLevel<1||memLevel>MAX_MEM_LEVEL||method!=Z_DEFLATED||windowBits<8||windowBits>15||level<0||level>9|| strategy<0||strategy>Z_FIXED) return Z_STREAM_ERROR; if(windowBits==8) windowBits=9; // until 256-byte window bug fixed deflate_state s; try { s=new deflate_state(); } catch(Exception) { return Z_MEM_ERROR; } strm.state=s; s.strm=strm; s.wrap=wrap; s.w_bits=(uint)windowBits; s.w_size=1U<<(int)s.w_bits; s.w_mask=s.w_size-1; s.hash_bits=(uint)memLevel+7; s.hash_size=1U<<(int)s.hash_bits; s.hash_mask=s.hash_size-1; s.hash_shift=(s.hash_bits+MIN_MATCH-1)/MIN_MATCH; try { s.window=new byte[s.w_size*2]; s.prev=new ushort[s.w_size]; s.head=new ushort[s.hash_size]; s.high_water=0; // nothing written to s->window yet s.lit_bufsize=1U<<(memLevel+6); // 16K elements by default s.pending_buf=new byte[s.lit_bufsize*4]; s.pending_buf_size=s.lit_bufsize*4; s.d_buf=new ushort[s.lit_bufsize]; s.l_buf=new byte[s.lit_bufsize]; } catch(Exception) { s.status=FINISH_STATE; strm.msg=zError(Z_MEM_ERROR); deflateEnd(strm); return Z_MEM_ERROR; } s.level=level; s.strategy=strategy; s.method=(byte)method; return deflateReset(strm); }
// =========================================================================== // Remove the smallest element from the heap and recreate the heap with // one less element. Updates heap and heap_len. //#define pqremove(s, tree, top) \ // top = s.heap[SMALLEST]; \ // s.heap[SMALLEST] = s.heap[s.heap_len--]; \ // pqdownheap(s, tree, SMALLEST); // =========================================================================== // Compares to subtrees, using the tree depth as tie breaker when // the subtrees have equal frequency. This minimizes the worst case length. //#define smaller(tree, n, m, depth) \ // (tree[n].Freq < tree[m].Freq || \ // (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m])) // =========================================================================== // Restore the heap property by moving down the tree starting at node k, // exchanging a node with the smallest of its two sons if necessary, stopping // when the heap property is re-established (each father smaller than its // two sons). // tree: the tree to restore // k: node to move down static void pqdownheap(deflate_state s, ct_data[] tree, int k) { int v=s.heap[k]; int j=k<<1; // left son of k while(j<=s.heap_len) { // Set j to the smallest of the two sons: //was if (j < s.heap_len && smaller(tree, s.heap[j+1], s.heap[j], s.depth)) if(j<s.heap_len&&(tree[s.heap[j+1]].Freq<tree[s.heap[j]].Freq|| (tree[s.heap[j+1]].Freq==tree[s.heap[j]].Freq&&s.depth[s.heap[j+1]]<=s.depth[s.heap[j]]))) j++; // Exit if v is smaller than both sons //was if (smaller(tree, v, s.heap[j], s.depth)) break; if(tree[v].Freq<tree[s.heap[j]].Freq|| (tree[v].Freq==tree[s.heap[j]].Freq&&s.depth[v]<=s.depth[s.heap[j]])) break; // Exchange v with the smallest son s.heap[k]=s.heap[j]; k=j; // And continue down the tree, setting j to the left son of k j<<=1; } s.heap[k]=v; }
// ========================================================================= // Put a short in the pending buffer. The 16-bit value is put in MSB order. // IN assertion: the stream state is correct and there is enough room in // pending_buf. static void putShortMSB(deflate_state s, uint b) { //was put_byte(s, (byte)(b >> 8)); s.pending_buf[s.pending++]=(byte)(b >> 8); //was put_byte(s, (byte)(b & 0xff)); s.pending_buf[s.pending++]=(byte)(b & 0xff); }
// =========================================================================== // Construct one Huffman tree and assigns the code bit strings and lengths. // Update the total bit length for the current block. // IN assertion: the field freq is set for all tree elements. // OUT assertions: the fields len and code are set to the optimal bit length // and corresponding code. The length opt_len is updated; static_len is // also updated if stree is not null. The field max_code is set. // desc: the tree descriptor static void build_tree(deflate_state s, ref tree_desc desc) { ct_data[] tree=desc.dyn_tree; ct_data[] stree=desc.stat_desc.static_tree; int elems=desc.stat_desc.elems; int n, m; // iterate over heap elements int max_code=-1; // largest code with non zero frequency int node; // new node being created // Construct the initial heap, with least frequent element in // heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1]. // heap[0] is not used. s.heap_len=0; s.heap_max=HEAP_SIZE; for(n=0; n<elems; n++) { if(tree[n].Freq!=0) { s.heap[++(s.heap_len)]=max_code=n; s.depth[n]=0; } else tree[n].Len=0; } // The pkzip format requires that at least one distance code exists, // and that at least one bit should be sent even if there is only one // possible code. So to avoid special checks later on we force at least // two codes of non zero frequency. while(s.heap_len<2) { node=s.heap[++(s.heap_len)]=(max_code<2?++max_code:0); tree[node].Freq=1; s.depth[node]=0; s.opt_len--; if(stree!=null) s.static_len-=stree[node].Len; // node is 0 or 1 so it does not have extra bits } desc.max_code=max_code; // The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree, // establish sub-heaps of increasing lengths: for(n=s.heap_len/2; n>=1; n--) pqdownheap(s, tree, n); // Construct the Huffman tree by repeatedly combining the least two // frequent nodes. node=elems; // next internal node of the tree do { //was pqremove(s, tree, n); // n = node of least frequency n=s.heap[SMALLEST]; s.heap[SMALLEST]=s.heap[s.heap_len--]; pqdownheap(s, tree, SMALLEST); m=s.heap[SMALLEST]; // m = node of next least frequency s.heap[--(s.heap_max)]=n; // keep the nodes sorted by frequency s.heap[--(s.heap_max)]=m; // Create a new node father of n and m tree[node].Freq=(ushort)(tree[n].Freq+tree[m].Freq); s.depth[node]=(byte)((s.depth[n]>=s.depth[m]?s.depth[n]:s.depth[m])+1); tree[n].Dad=tree[m].Dad=(ushort)node; // and insert the new node in the heap s.heap[SMALLEST]=node++; pqdownheap(s, tree, SMALLEST); } while(s.heap_len>=2); s.heap[--(s.heap_max)]=s.heap[SMALLEST]; // At this point, the fields freq and dad are set. We can now // generate the bit lengths. gen_bitlen(s, ref desc); // The field len is now set, we can generate the bit codes gen_codes(tree, max_code, s.bl_count); }
// =========================================================================== // Flush the bit buffer and align the output on a byte boundary static void bi_windup(deflate_state s) { if(s.bi_valid>8) { //was put_short(s, s.bi_buf); s.pending_buf[s.pending++]=(byte)(s.bi_buf&0xff); s.pending_buf[s.pending++]=(byte)((ushort)s.bi_buf>>8); } else if(s.bi_valid>0) { //was put_byte(s, (unsigned char)s.bi_buf); s.pending_buf[s.pending++]=(byte)s.bi_buf; } s.bi_buf=0; s.bi_valid=0; }
// =========================================================================== // Send a literal or distance tree in compressed form, using the codes in bl_tree. // tree: the tree to be scanned // max_code: and its largest code of non zero frequency static void send_tree(deflate_state s, ct_data[] tree, int max_code) { int n; // iterates over all tree elements int prevlen=-1; // last emitted length int curlen; // length of current code int nextlen=tree[0].Len; // length of next code int count=0; // repeat count of the current code int max_count=7; // max repeat count int min_count=4; // min repeat count // tree[max_code+1].Len = -1; // guard already set if(nextlen==0) { max_count=138; min_count=3; } for(n=0; n<=max_code; n++) { curlen=nextlen; nextlen=tree[n+1].Len; if(++count<max_count&&curlen==nextlen) continue; if(count<min_count) { do { send_code(s, curlen, s.bl_tree); } while(--count!=0); } else if(curlen!=0) { if(curlen!=prevlen) { send_code(s, curlen, s.bl_tree); count--; } //Assert(count>=3&&count<=6, " 3_6?"); send_code(s, REP_3_6, s.bl_tree); send_bits(s, count-3, 2); } else if(count<=10) { send_code(s, REPZ_3_10, s.bl_tree); send_bits(s, count-3, 3); } else { send_code(s, REPZ_11_138, s.bl_tree); send_bits(s, count-11, 7); } count=0; prevlen=curlen; if(nextlen==0) { max_count=138; min_count=3; } else if(curlen==nextlen) { max_count=6; min_count=3; } else { max_count=7; min_count=4; } } }
// =========================================================================== // Copy a stored block, storing first the length and its // one's complement if requested. // buf: the input data // len: its length // header: true if block header must be written static void copy_block(deflate_state s, byte[] buf, int buf_ind, uint len, int header) { bi_windup(s); // align on byte boundary s.last_eob_len=8; // enough lookahead for inflate if(header!=0) { //was put_short(s, (unsigned short)len); s.pending_buf[s.pending++]=(byte)(((ushort)len)&0xff); s.pending_buf[s.pending++]=(byte)(((ushort)len)>>8); //was put_short(s, (unsigned short)~len); s.pending_buf[s.pending++]=(byte)(((ushort)~len)&0xff); s.pending_buf[s.pending++]=(byte)(((ushort)~len)>>8); } while(len--!=0) { //was put_byte(s, *buf++); s.pending_buf[s.pending++]=buf[buf_ind++]; } }
// =========================================================================== // Send the header for a block using dynamic Huffman trees: the counts, the // lengths of the bit length codes, the literal tree and the distance tree. // IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4. // lcodes, dcodes, blcodes: number of codes for each tree static void send_all_trees(deflate_state s, int lcodes, int dcodes, int blcodes) { int rank; // index in bl_order //Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes"); //Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES, "too many codes"); //Tracev((stderr, "\nbl counts: ")); send_bits(s, lcodes-257, 5); // not +255 as stated in appnote.txt send_bits(s, dcodes-1, 5); send_bits(s, blcodes-4, 4); // not -3 as stated in appnote.txt for(rank=0; rank<blcodes; rank++) { //Tracev((stderr, "\nbl code %2d ", bl_order[rank])); send_bits(s, s.bl_tree[bl_order[rank]].Len, 3); } //Tracev((stderr, "\nbl tree: sent %ld", s.bits_sent)); send_tree(s, s.dyn_ltree, lcodes-1); // literal tree //Tracev((stderr, "\nlit tree: sent %ld", s.bits_sent)); send_tree(s, s.dyn_dtree, dcodes-1); // distance tree //Tracev((stderr, "\ndist tree: sent %ld", s.bits_sent)); }
// =========================================================================== // Local (static) routines in this file. // // Send a code of the given tree. c and tree must not have side effects //#define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len) static void send_code(deflate_state s, int c, ct_data[] tree) { ushort value=tree[c].Code; ushort len=tree[c].Len; if(s.bi_valid>(int)Buf_size-len) { int val=value; s.bi_buf|=(ushort)(val<<s.bi_valid); //was put_short(s, s.bi_buf); s.pending_buf[s.pending++]=(byte)(s.bi_buf&0xff); s.pending_buf[s.pending++]=(byte)((ushort)s.bi_buf>>8); s.bi_buf=(ushort)(val>>(Buf_size-s.bi_valid)); s.bi_valid+=len-Buf_size; } else { s.bi_buf|=(ushort)(value<<s.bi_valid); s.bi_valid+=len; } }
// =========================================================================== // Send one empty static block to give enough lookahead for inflate. // This takes 10 bits, of which 7 may remain in the bit buffer. // The current inflate code requires 9 bits of lookahead. If the // last two codes for the previous block (real code plus EOB) were coded // on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode // the last real code. In this case we send two empty static blocks instead // of one. (There are no problems if the previous block is stored or fixed.) // To simplify the code, we assume the worst case of last real code encoded // on one bit only. static void _tr_align(deflate_state s) { send_bits(s, STATIC_TREES<<1, 3); send_code(s, END_BLOCK, static_ltree); bi_flush(s); // Of the 10 bits for the empty block, we have already sent // (10 - bi_valid) bits. The lookahead for the last real code (before // the EOB of the previous block) was thus at least one plus the length // of the EOB plus what we have just sent of the empty static block. if(1+s.last_eob_len+10-s.bi_valid<9) { send_bits(s, STATIC_TREES<<1, 3); send_code(s, END_BLOCK, static_ltree); bi_flush(s); } s.last_eob_len=7; }
// =========================================================================== // Output a short LSB first on the stream. // IN assertion: there is enough room in pendingBuf. //#define put_short(s, w) { \ // put_byte(s, (unsigned char)((w) & 0xff)); \ // put_byte(s, (unsigned char)((unsigned short)(w) >> 8)); \ //} // =========================================================================== // Send a value on a given number of bits. // IN assertion: length <= 16 and value fits in length bits. //#define send_bits(s, value, length) { \ // int len = length; \ // if(s.bi_valid > (int)Buf_size - len) { \ // int val = value; \ // s.bi_buf |= (val << s.bi_valid); \ // // put_short(s, s.bi_buf); \ // s.pending_buf[s.pending++] = (unsigned char)(s.bi_buf & 0xff);\ // s.pending_buf[s.pending++] = (unsigned char)((unsigned short)s.bi_buf >> 8);\ // s.bi_buf = (unsigned short)val >> (Buf_size - s.bi_valid); \ // s.bi_valid += len - Buf_size; \ // } else { \ // s.bi_buf |= (value) << s.bi_valid; \ // s.bi_valid += len; \ // } \ // } static void send_bits(deflate_state s, int value, int length) { int len=length; if(s.bi_valid>(int)Buf_size-len) { int val=value; s.bi_buf|=(ushort)(val<<s.bi_valid); //was put_short(s, s.bi_buf); s.pending_buf[s.pending++]=(byte)(s.bi_buf&0xff); s.pending_buf[s.pending++]=(byte)((ushort)s.bi_buf>>8); s.bi_buf=(ushort)(val>>(Buf_size-s.bi_valid)); s.bi_valid+=len-Buf_size; } else { s.bi_buf|=(ushort)(value<<s.bi_valid); s.bi_valid+=len; } }
// =========================================================================== // Save the match info and tally the frequency counts. Return true if // the current block must be flushed. // dist: distance of matched string // lc: match length-MIN_MATCH or unmatched char (if dist==0) static bool _tr_tally(deflate_state s, uint dist, uint lc) { s.d_buf[s.last_lit]=(ushort)dist; s.l_buf[s.last_lit++]=(byte)lc; if(dist==0) { // lc is the unmatched char s.dyn_ltree[lc].Freq++; } else { s.matches++; // Here, lc is the match length - MIN_MATCH dist--; // dist = match distance - 1 //Assert((ushort)dist < (ushort)MAX_DIST(s) && // (ushort)lc <= (ushort)(MAX_MATCH-MIN_MATCH) && // (ushort)(dist < 256 ? _dist_code[dist] : _dist_code[256+(dist>>7)]) < (ushort)D_CODES, // "_tr_tally: bad match"); s.dyn_ltree[_length_code[lc]+LITERALS+1].Freq++; s.dyn_dtree[(dist<256?_dist_code[dist]:_dist_code[256+(dist>>7)])].Freq++; } return (s.last_lit==s.lit_bufsize-1); // We avoid equality with lit_bufsize because of wraparound at 64K // on 16 bit machines and because stored blocks are restricted to // 64K-1 bytes. }
// the arguments must not have side effects // =========================================================================== // Initialize the tree data structures for a new zlib stream. static void _tr_init(deflate_state s) { s.l_desc.dyn_tree=s.dyn_ltree; s.l_desc.stat_desc=static_l_desc; s.d_desc.dyn_tree=s.dyn_dtree; s.d_desc.stat_desc=static_d_desc; s.bl_desc.dyn_tree=s.bl_tree; s.bl_desc.stat_desc=static_bl_desc; s.bi_buf=0; s.bi_valid=0; s.last_eob_len=8; // enough lookahead for inflate // Initialize the first block of the first file: init_block(s); }
// =========================================================================== // Flush the bit buffer, keeping at most 7 bits in it. static void bi_flush(deflate_state s) { if(s.bi_valid==16) { //was put_short(s, s.bi_buf); s.pending_buf[s.pending++]=(byte)(s.bi_buf&0xff); s.pending_buf[s.pending++]=(byte)((ushort)s.bi_buf>>8); s.bi_buf=0; s.bi_valid=0; } else if(s.bi_valid>=8) { //was put_byte(s, (unsigned char)s.bi_buf); s.pending_buf[s.pending++]=(byte)s.bi_buf; s.bi_buf>>=8; s.bi_valid-=8; } }
// =========================================================================== // Initialize the "longest match" routines for a new zlib stream static void lm_init(deflate_state s) { s.window_size=(uint)2*s.w_size; s.head[s.hash_size-1]=NIL; //was memset((byte*)s.head, 0, (uint)(s.hash_size-1)*sizeof(*s.head)); for(int i=0; i<(s.hash_size-1); i++) s.head[i]=0; // Set the default configuration parameters: s.max_lazy_match=configuration_table[s.level].max_lazy; s.good_match=configuration_table[s.level].good_length; s.nice_match=configuration_table[s.level].nice_length; s.max_chain_length=configuration_table[s.level].max_chain; s.strstart=0; s.block_start=0; s.lookahead=0; s.match_length=s.prev_length=MIN_MATCH-1; s.match_available=0; s.ins_h=0; }