Exemplo n.º 1
0
        /// <summary>
        /// Generates a delta file from input, and writes it to output
        /// </summary>
        /// <param name="sourceData">The stream to generate the delta from</param>
        /// <param name="deltaData">The stream to write the delta to</param>
        public static void GenerateDeltaFile(Stream sourceData, Stream deltaData, ChecksumFileReader checksumFile)
        {
            #region variables

            // The matched data block
            DeltaBlock <long> matchedBufferIndex;

            // The unmatched data block
            DeltaBlock <int> unmatchedBufferIndex;

            // The buffer block
            DeltaBlock <int> bufferIndex;

            // The moving frame that looks for the blocks
            BlockLookup lookup;

            byte[] workingData;
            byte[] tempWork;

            uint weakChecksum;

            int lastPossibleBlock;
            int remainingBytes;

            bool loadBuffer = false;
            bool recalculateWeakChecksum = false;
            bool streamExhausted         = false;
            bool doesWeakChecksumExist;
            bool finishedDeltaRun;
            int  tempRead;

            #endregion

            #region Initialize the iteration

            matchedBufferIndex   = new DeltaBlock <long>(0, 0);
            unmatchedBufferIndex = new DeltaBlock <int>(0, 0);
            bufferIndex          = new DeltaBlock <int>(0, 0);
            lookup = new BlockLookup(checksumFile.BlockLength);

            //We use statically allocated buffers, and we need two buffers
            // to prevent Array.Copy from allocating a temp buffer
            workingData = new byte[BUFFER_SIZE];
            tempWork    = new byte[BUFFER_SIZE];

            loadBuffer = false;
            recalculateWeakChecksum = false;
            streamExhausted         = false;
            finishedDeltaRun        = false;

            #endregion

            //Read the initial buffer block
            bufferIndex.Size   = Utility.ForceStreamRead(sourceData, workingData);
            lookup.BlockLength = Math.Min(lookup.BlockLength, bufferIndex.Size);

            //Setup the initial checksum
            //Calculate the Adler checksum of the buffer
            weakChecksum = Adler32Checksum.Calculate(workingData, 0, lookup.BlockLength);

            lookup.ResetMatchIndex();

            while (lookup.BlockLength > 0)
            {
                //Check if the block matches somewhere, if we have force-reloaded the buffer,
                //the check has already been made
                if (loadBuffer)
                {
                    loadBuffer = false;
                }
                else
                {
                    lookup.MatchedIndex = checksumFile.LookupChunck(
                        weakChecksum,
                        workingData,
                        bufferIndex.Offset,
                        lookup.BlockLength,
                        lookup.NextMatchKey);
                }

                if (lookup.IsMatch)
                {
                    // We have a match, flush the unmatched chunk into the result
                    // But the match is offset.
                    CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData);

                    // Matched a stream chunk in the weak checksum. We don't yet know where the actual chunk is.
                    // Find the chunk
                    LookupMatch(matchedBufferIndex, bufferIndex, lookup, deltaData);

                    if (bufferIndex.Size - bufferIndex.Offset < lookup.BlockLength)
                    {
                        //If this is the last chunck, compare to the last hash
                        if (finishedDeltaRun)
                        {
                            lookup.BlockLength = Math.Min(lookup.BlockLength, bufferIndex.Size - bufferIndex.Offset);
                        }
                        else //We are out of buffer, reload
                        {
                            recalculateWeakChecksum = true;
                        }
                    }

                    //Reset the checksum to fit the new block, but skip it if we are out of data
                    if (!recalculateWeakChecksum)
                    {
                        weakChecksum = Adler32Checksum.Calculate(workingData, bufferIndex.Offset, lookup.BlockLength);
                    }
                }
                else
                {
                    //At this point we have not advanced the buffer_index, so the weak_checksum matches the data,
                    // even if we arrive here after reloading the buffer

                    //No match, flush accumulated matches, if any
                    if (matchedBufferIndex.Size > 0)
                    {
                        //Send the matching bytes as a copy
                        WriteCopy(matchedBufferIndex, deltaData);
                        matchedBufferIndex.Reset(0, 0);

                        //We do not immediately start tapping the unmatched bytes,
                        // because the buffer may be nearly empty, and we
                        // want to gather as many unmatched bytes as possible
                        // to avoid the instruction overhead in the file
                        if (bufferIndex.Offset != 0)
                        {
                            loadBuffer = true;
                        }
                    }
                    else
                    {
                        // No match
                        lastPossibleBlock = bufferIndex.Size - lookup.BlockLength;
                        if (unmatchedBufferIndex.Size == 0)
                        {
                            unmatchedBufferIndex.Offset = bufferIndex.Offset;
                        }

                        doesWeakChecksumExist = false;

                        //Local speedup for long non-matching regions
                        while (bufferIndex.Offset < lastPossibleBlock && !doesWeakChecksumExist)
                        {
                            //Roll the weak checksum buffer by 1 byte until you find a match or you reach the end
                            weakChecksum = Adler32Checksum.Roll(
                                workingData[bufferIndex.Offset],
                                workingData[bufferIndex.Offset + lookup.BlockLength],
                                weakChecksum,
                                lookup.BlockLength);

                            //Check if the new weak checksum roll is a match
                            doesWeakChecksumExist = checksumFile.DoesWeakExist(weakChecksum >> 16);

                            //Update offset for future reference
                            bufferIndex.Offset++;
                        }

                        // Calculating the unmatched chunk size
                        unmatchedBufferIndex.Size = bufferIndex.Offset - unmatchedBufferIndex.Offset;

                        //If this is the last block, claim the remaining bytes as unmatched
                        if (finishedDeltaRun)
                        {
                            //There may be a minor optimization possible here, as the last chunk of the original file may still fit
                            // and be smaller than the block length

                            unmatchedBufferIndex.Size += lookup.BlockLength;
                            lookup.BlockLength         = 0;
                        }
                    }
                }

                //If we are out of buffer, try to load some more
                if (loadBuffer || bufferIndex.Size - bufferIndex.Offset <= checksumFile.BlockLength)
                {
                    // This section will flush the located unmatched into the delta stream(if any)
                    // And will refill the working buffer with the remainder of the previous buffer
                    // And fresh data from the source stream

                    //The number of unused bytes the the buffer
                    remainingBytes = bufferIndex.Size - bufferIndex.Offset;

                    //If we have read the last bytes or the buffer is already full, skip this
                    if (!finishedDeltaRun && tempWork.Length - remainingBytes > 0)
                    {
                        // Load the delta amount of data into temp work buffer
                        Buffer.BlockCopy(workingData, bufferIndex.Offset, tempWork, 0, remainingBytes);

                        //Prevent reading the stream after it has been exhausted because some streams break on that
                        if (streamExhausted)
                        {
                            tempRead = 0;
                        }
                        else
                        {
                            // Fill the remainder of tempWork from the source data
                            tempRead = Utility.ForceStreamRead(
                                sourceData,
                                tempWork,
                                remainingBytes,
                                tempWork.Length - remainingBytes);
                        }

                        // If anything was read
                        if (tempRead > 0)
                        {
                            //We are about to discard some data, if it is unmatched, write it to stream
                            CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData);

                            //Now swap the arrays
                            SwapBuffers(ref workingData, ref tempWork);

                            bufferIndex.Offset = 0;
                            bufferIndex.Size   = remainingBytes + tempRead;
                        }
                        else
                        {
                            //Prevent reading the stream after it has been exhausted because some streams break on that
                            streamExhausted = true;

                            if (remainingBytes <= checksumFile.BlockLength)
                            {
                                //Mark as done
                                finishedDeltaRun = true;

                                //The last round has a smaller block length
                                lookup.BlockLength = remainingBytes;
                            }
                        }

                        //If we run out of buffer, we may need to recalculate the checksum
                        if (recalculateWeakChecksum)
                        {
                            weakChecksum            = Adler32Checksum.Calculate(workingData, bufferIndex.Offset, lookup.BlockLength);
                            recalculateWeakChecksum = false;
                        }
                    }
                }
            }

            //There cannot be both matched and unmatched bytes written
            if (matchedBufferIndex.Size > 0 && unmatchedBufferIndex.Size > 0)
            {
                throw new Exception(Strings.DeltaFile.InternalBufferError);
            }

            //Commit all remaining matched content
            if (matchedBufferIndex.Size > 0)
            {
                WriteCopy(matchedBufferIndex, deltaData);
            }

            //Commit all remaining unmatched content
            CommitUnmatchedData(workingData, unmatchedBufferIndex, deltaData);

            //Write end command
            deltaData.WriteByte((byte)RDiffBinary.EndCommand);
            deltaData.Flush();
        }
Exemplo n.º 2
0
        private static void LookupMatch(DeltaBlock <long> matched, DeltaBlock <int> buffer, BlockLookup lookup, Stream output)
        {
            //First match
            if (matched.Size == 0)
            {
                // Applying the lookup offset to the match
                matched.Offset = lookup.MatchedIndex * lookup.BlockLength;
            }
            else if (!lookup.DoesSequenceFit)
            {
                //Subsequent match, but the sequence does not fit
                WriteCopy(matched, output);

                //Pretend this was the first block
                matched.Size   = 0;
                matched.Offset = lookup.MatchedIndex * lookup.BlockLength;
            }

            //If the next block matches this signature, we can write larger
            // copy instructions and thus safe space
            lookup.NextMatchKey = lookup.MatchedIndex + 1;

            //Adjust the counters
            matched.Size  += lookup.BlockLength;
            buffer.Offset += lookup.BlockLength;
        }