public static byte[] Create(byte[] origin, byte[] target) { var zDelta = new Writer(); int lenOut = target.Length; int lenSrc = origin.Length; int i, lastRead = -1; zDelta.PutInt((uint) lenOut); zDelta.PutChar('\n'); // If the source is very small, it means that we have no // chance of ever doing a copy command. Just output a single // literal segment for the entire target and exit. if (lenSrc <= NHASH) { zDelta.PutInt((uint) lenOut); zDelta.PutChar(':'); zDelta.PutArray(target, 0, lenOut); zDelta.PutInt(Checksum(target)); zDelta.PutChar(';'); return zDelta.ToArray(); } // Compute the hash table used to locate matching sections in the source. int nHash = (int) lenSrc / NHASH; int[] collide = new int[nHash]; int[] landmark = new int[nHash]; for (i = 0; i < collide.Length; i++) collide[i] = -1; for (i = 0; i < landmark.Length; i++) landmark[i] = -1; int hv; RollingHash h = new RollingHash(); for (i = lenSrc - lenSrc % NHASH - NHASH; i >= 0; i -= NHASH) h.Init(origin, i); hv = (int) (h.Value() % nHash); collide[i/NHASH] = landmark[hv]; landmark[hv] = i/NHASH; }
public static byte[] Create(byte[] origin, byte[] target) { var zDelta = new Writer(); int lenOut = target.Length; int lenSrc = origin.Length; int i, lastRead = -1; zDelta.PutInt((uint)lenOut); zDelta.PutChar('\n'); // If the source is very small, it means that we have no // chance of ever doing a copy command. Just output a single // literal segment for the entire target and exit. if (lenSrc <= NHASH) { zDelta.PutInt((uint)lenOut); zDelta.PutChar(':'); zDelta.PutArray(target, 0, lenOut); zDelta.PutInt(Checksum(target)); zDelta.PutChar(';'); return(zDelta.ToArray()); } // Compute the hash table used to locate matching sections in the source. int nHash = (int)lenSrc / NHASH; int[] collide = new int[nHash]; int[] landmark = new int[nHash]; for (i = 0; i < collide.Length; i++) { collide[i] = -1; } for (i = 0; i < landmark.Length; i++) { landmark[i] = -1; } int hv; RollingHash h = new RollingHash(); for (i = 0; i < lenSrc - NHASH; i += NHASH) { h.Init(origin, i); hv = (int)(h.Value() % nHash); collide[i / NHASH] = landmark[hv]; landmark[hv] = i / NHASH; } int _base = 0; int iSrc, iBlock; int bestCnt, bestOfst = 0, bestLitsz = 0; while (_base + NHASH < lenOut) { bestOfst = 0; bestLitsz = 0; h.Init(target, _base); i = 0; // Trying to match a landmark against zOut[_base+i] bestCnt = 0; while (true) { int limit = 250; hv = (int)(h.Value() % nHash); iBlock = landmark[hv]; while (iBlock >= 0 && (limit--) > 0) { // // The hash window has identified a potential match against // landmark block iBlock. But we need to investigate further. // // Look for a region in zOut that matches zSrc. Anchor the search // at zSrc[iSrc] and zOut[_base+i]. Do not include anything prior to // zOut[_base] or after zOut[outLen] nor anything after zSrc[srcLen]. // // Set cnt equal to the length of the match and set ofst so that // zSrc[ofst] is the first element of the match. litsz is the number // of characters between zOut[_base] and the beginning of the match. // sz will be the overhead (in bytes) needed to encode the copy // command. Only generate copy command if the overhead of the // copy command is less than the amount of literal text to be copied. // int cnt, ofst, litsz; int j, k, x, y; int sz; // Beginning at iSrc, match forwards as far as we can. // j counts the number of characters that match. iSrc = iBlock * NHASH; for (j = 0, x = iSrc, y = _base + i; x < lenSrc && y < lenOut; j++, x++, y++) { if (origin[x] != target[y]) { break; } } j--; // Beginning at iSrc-1, match backwards as far as we can. // k counts the number of characters that match. for (k = 1; k < iSrc && k <= i; k++) { if (origin[iSrc - k] != target[_base + i - k]) { break; } } k--; // Compute the offset and size of the matching region. ofst = iSrc - k; cnt = j + k + 1; litsz = i - k; // Number of bytes of literal text before the copy // sz will hold the number of bytes needed to encode the "insert" // command and the copy command, not counting the "insert" text. sz = DigitCount(i - k) + DigitCount(cnt) + DigitCount(ofst) + 3; if (cnt >= sz && cnt > bestCnt) { // Remember this match only if it is the best so far and it // does not increase the file size. bestCnt = cnt; bestOfst = iSrc - k; bestLitsz = litsz; } // Check the next matching block iBlock = collide[iBlock]; } // We have a copy command that does not cause the delta to be larger // than a literal insert. So add the copy command to the delta. if (bestCnt > 0) { if (bestLitsz > 0) { // Add an insert command before the copy. zDelta.PutInt((uint)bestLitsz); zDelta.PutChar(':'); zDelta.PutArray(target, _base, _base + bestLitsz); _base += bestLitsz; } _base += bestCnt; zDelta.PutInt((uint)bestCnt); zDelta.PutChar('@'); zDelta.PutInt((uint)bestOfst); zDelta.PutChar(','); if (bestOfst + bestCnt - 1 > lastRead) { lastRead = bestOfst + bestCnt - 1; } bestCnt = 0; break; } // If we reach this point, it means no match is found so far if (_base + i + NHASH >= lenOut) { // We have reached the end and have not found any // matches. Do an "insert" for everything that does not match zDelta.PutInt((uint)(lenOut - _base)); zDelta.PutChar(':'); zDelta.PutArray(target, _base, _base + lenOut - _base); _base = lenOut; break; } // Advance the hash by one character. Keep looking for a match. h.Next(target[_base + i + NHASH]); i++; } } // Output a final "insert" record to get all the text at the end of // the file that does not match anything in the source. if (_base < lenOut) { zDelta.PutInt((uint)(lenOut - _base)); zDelta.PutChar(':'); zDelta.PutArray(target, _base, _base + lenOut - _base); } // Output the final checksum record. zDelta.PutInt(Checksum(target)); zDelta.PutChar(';'); return(zDelta.ToArray()); }