/// <summary> /// Adds a chunck of data to checksum list /// </summary> /// <param name="buffer">The data to add a checksum entry for</param> /// <param name="index">The index in the buffer to start reading from</param> /// <param name="count">The number of bytes to extract from the array</param> private static void AddChunk(Signature signature, byte[] buffer, int index, int count) { //Add weak checksum (Adler-32) to the chunk signature.Output.Write(RDiffBinary.FixEndian(BitConverter.GetBytes(Adler32Checksum.Calculate(buffer, index, count))), 0, 4); //Add strong checksum signature.Output.Write(Utility.Hash.ComputeHash(buffer, index, count), 0, signature.StrongLength); }
/// <summary> /// Adds a chunck of data to checksum list /// </summary> /// <param name="buffer">The data to add a checksum entry for</param> /// <param name="index">The index in the buffer to start reading from</param> /// <param name="count">The number of bytes to extract from the array</param> private void AddSignatureChunk(byte[] buffer, int index, int count) { byte[] tempBuffer; //if (!hashAlgorithm.CanReuseTransform) //{ // hashAlgorithm = MD5.Create(); //} _signatureByteList.AddRange( RDiffBinary.FixEndian( BitConverter.GetBytes( Adler32Checksum.Calculate(buffer, index, count)))); //Add first half of the computed hash tempBuffer = Utility.Hash.ComputeHash(buffer, index, count); for (int i = 0; i < DEFAULT_STRONG_LEN; i++) { _signatureByteList.Add(tempBuffer[i]); } }
/// <summary> /// Generates a delta file from input, and writes it to output /// </summary> /// <param name="sourceData">The stream to generate the delta from</param> /// <param name="deltaData">The stream to write the delta to</param> public static void GenerateDeltaFile(Stream sourceData, Stream deltaData, ChecksumFileReader checksumFile) { #region variables // The matched data block DeltaBlock <long> matchedBufferIndex; // The unmatched data block DeltaBlock <int> unmatchedBufferIndex; // The buffer block DeltaBlock <int> bufferIndex; // The moving frame that looks for the blocks BlockLookup lookup; byte[] workingData; byte[] tempWork; uint weakChecksum; int lastPossibleBlock; int remainingBytes; bool loadBuffer = false; bool recalculateWeakChecksum = false; bool streamExhausted = false; bool doesWeakChecksumExist; bool finishedDeltaRun; int tempRead; #endregion #region Initialize the iteration matchedBufferIndex = new DeltaBlock <long>(0, 0); unmatchedBufferIndex = new DeltaBlock <int>(0, 0); bufferIndex = new DeltaBlock <int>(0, 0); lookup = new BlockLookup(checksumFile.BlockLength); //We use statically allocated buffers, and we need two buffers // to prevent Array.Copy from allocating a temp buffer workingData = new byte[BUFFER_SIZE]; tempWork = new byte[BUFFER_SIZE]; loadBuffer = false; recalculateWeakChecksum = false; streamExhausted = false; finishedDeltaRun = false; #endregion //Read the initial buffer block bufferIndex.Size = Utility.ForceStreamRead(sourceData, workingData); lookup.BlockLength = Math.Min(lookup.BlockLength, bufferIndex.Size); //Setup the initial checksum //Calculate the Adler checksum of the buffer weakChecksum = Adler32Checksum.Calculate(workingData, 0, lookup.BlockLength); lookup.ResetMatchIndex(); while (lookup.BlockLength > 0) { //Check if the block matches somewhere, if we have force-reloaded the buffer, //the check has already been made if (loadBuffer) { loadBuffer = false; } else { lookup.MatchedIndex = checksumFile.LookupChunck( weakChecksum, workingData, bufferIndex.Offset, lookup.BlockLength, lookup.NextMatchKey); } if (lookup.IsMatch) { // We have a match, flush the unmatched chunk into the result // But the match is offset. CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData); // Matched a stream chunk in the weak checksum. We don't yet know where the actual chunk is. // Find the chunk LookupMatch(matchedBufferIndex, bufferIndex, lookup, deltaData); if (bufferIndex.Size - bufferIndex.Offset < lookup.BlockLength) { //If this is the last chunck, compare to the last hash if (finishedDeltaRun) { lookup.BlockLength = Math.Min(lookup.BlockLength, bufferIndex.Size - bufferIndex.Offset); } else //We are out of buffer, reload { recalculateWeakChecksum = true; } } //Reset the checksum to fit the new block, but skip it if we are out of data if (!recalculateWeakChecksum) { weakChecksum = Adler32Checksum.Calculate(workingData, bufferIndex.Offset, lookup.BlockLength); } } else { //At this point we have not advanced the buffer_index, so the weak_checksum matches the data, // even if we arrive here after reloading the buffer //No match, flush accumulated matches, if any if (matchedBufferIndex.Size > 0) { //Send the matching bytes as a copy WriteCopy(matchedBufferIndex, deltaData); matchedBufferIndex.Reset(0, 0); //We do not immediately start tapping the unmatched bytes, // because the buffer may be nearly empty, and we // want to gather as many unmatched bytes as possible // to avoid the instruction overhead in the file if (bufferIndex.Offset != 0) { loadBuffer = true; } } else { // No match lastPossibleBlock = bufferIndex.Size - lookup.BlockLength; if (unmatchedBufferIndex.Size == 0) { unmatchedBufferIndex.Offset = bufferIndex.Offset; } doesWeakChecksumExist = false; //Local speedup for long non-matching regions while (bufferIndex.Offset < lastPossibleBlock && !doesWeakChecksumExist) { //Roll the weak checksum buffer by 1 byte until you find a match or you reach the end weakChecksum = Adler32Checksum.Roll( workingData[bufferIndex.Offset], workingData[bufferIndex.Offset + lookup.BlockLength], weakChecksum, lookup.BlockLength); //Check if the new weak checksum roll is a match doesWeakChecksumExist = checksumFile.DoesWeakExist(weakChecksum >> 16); //Update offset for future reference bufferIndex.Offset++; } // Calculating the unmatched chunk size unmatchedBufferIndex.Size = bufferIndex.Offset - unmatchedBufferIndex.Offset; //If this is the last block, claim the remaining bytes as unmatched if (finishedDeltaRun) { //There may be a minor optimization possible here, as the last chunk of the original file may still fit // and be smaller than the block length unmatchedBufferIndex.Size += lookup.BlockLength; lookup.BlockLength = 0; } } } //If we are out of buffer, try to load some more if (loadBuffer || bufferIndex.Size - bufferIndex.Offset <= checksumFile.BlockLength) { // This section will flush the located unmatched into the delta stream(if any) // And will refill the working buffer with the remainder of the previous buffer // And fresh data from the source stream //The number of unused bytes the the buffer remainingBytes = bufferIndex.Size - bufferIndex.Offset; //If we have read the last bytes or the buffer is already full, skip this if (!finishedDeltaRun && tempWork.Length - remainingBytes > 0) { // Load the delta amount of data into temp work buffer Buffer.BlockCopy(workingData, bufferIndex.Offset, tempWork, 0, remainingBytes); //Prevent reading the stream after it has been exhausted because some streams break on that if (streamExhausted) { tempRead = 0; } else { // Fill the remainder of tempWork from the source data tempRead = Utility.ForceStreamRead( sourceData, tempWork, remainingBytes, tempWork.Length - remainingBytes); } // If anything was read if (tempRead > 0) { //We are about to discard some data, if it is unmatched, write it to stream CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData); //Now swap the arrays SwapBuffers(ref workingData, ref tempWork); bufferIndex.Offset = 0; bufferIndex.Size = remainingBytes + tempRead; } else { //Prevent reading the stream after it has been exhausted because some streams break on that streamExhausted = true; if (remainingBytes <= checksumFile.BlockLength) { //Mark as done finishedDeltaRun = true; //The last round has a smaller block length lookup.BlockLength = remainingBytes; } } //If we run out of buffer, we may need to recalculate the checksum if (recalculateWeakChecksum) { weakChecksum = Adler32Checksum.Calculate(workingData, bufferIndex.Offset, lookup.BlockLength); recalculateWeakChecksum = false; } } } } //There cannot be both matched and unmatched bytes written if (matchedBufferIndex.Size > 0 && unmatchedBufferIndex.Size > 0) { throw new Exception(Strings.DeltaFile.InternalBufferError); } //Commit all remaining matched content if (matchedBufferIndex.Size > 0) { WriteCopy(matchedBufferIndex, deltaData); } //Commit all remaining unmatched content CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData); //Write end command deltaData.WriteByte((byte)RDiffBinary.EndCommand); deltaData.Flush(); }