Beispiel #1
0
        SameBlock FindBlock(ChunkedFile sourceTree, long targetFileStartOffset)
        {
            if (mTargetSize - targetFileStartOffset < BlockSize) return null;

            long preDataSize = targetFileStartOffset - mTargetCDataBaseOffset;
            // rea the current data part in to memory
            ChunkChecksum checksum = new ChunkChecksum();
            sourceTree.CalculateChecksum(mTargetCData, preDataSize, BlockSize, checksum);

            long foundIndex;
            if (sourceTree.Search(checksum, out foundIndex)) {
                // we found something
                SameBlock bestMatch = new SameBlock();
                bestMatch.SourceOffset = sourceTree.Chunks[foundIndex].Offset;
                bestMatch.TargetOffset = targetFileStartOffset;
                bestMatch.Size = 0; // default to 0. because they can all be mismatches as well

                // inreae match size if possible, also check if it is a match at all
                long matchCount = 0;
                while ((sourceTree.Chunks[foundIndex].Checksum == checksum) &&
                       ((MaximumMatches == 0) || (matchCount < MaximumMatches)))
                {
                    // check if this one is better than the current match
                    SameBlock match = new SameBlock();
                    match.SourceOffset = sourceTree.Chunks[foundIndex].Offset;
                    match.TargetOffset = targetFileStartOffset;
                    match.Size = 0; // default to 0. could be a mismatch with the same key
                    ImproveSameBlockMatch(match, bestMatch.Size);
                    if (match.Size > bestMatch.Size) {
                        bestMatch = match;
                    }
                    foundIndex++;
                    matchCount++;
                }

                // TODO: Emit debugging information here if in verbose mode.

                if (bestMatch.Size == 0) {
                    return null;
                } else {
                    return bestMatch;
                }
            } else {
                return null;
            }
        }
Beispiel #2
0
        void ImproveSameBlockMatch(SameBlock match, long currentBest)
        {
            // we should now try to make the match longer by reading big chunks of the files to come
            mSource.Seek(match.SourceOffset + match.Size, SeekOrigin.Begin);
            mTarget.Seek(match.TargetOffset + match.Size, SeekOrigin.Begin);

            {
                byte[] sourceData = new byte[ComparisonSize];
                byte[] targetData = new byte[ComparisonSize];
                bool deepBreak = false;
                while (true) {
                    long startTarget = match.TargetOffset + match.Size;
                    long startSource = match.SourceOffset + match.Size;
                    long checkSize = ComparisonSize;

                    if (checkSize > (mTargetSize - startTarget)) {
                        checkSize = mTargetSize - startTarget;
                        deepBreak = true;
                    }

                    if (checkSize > (mSourceSize - startSource)) {
                        checkSize = mSourceSize - startSource;
                        deepBreak = true;
                    }

                    mSource.Read(sourceData, 0, (int)checkSize);
                    mTarget.Read(targetData, 0, (int)checkSize);

                    // TODO: Could we optimize this with either an array primitive or unsafe pointers?

                    long i = 0;
                    while ((i < checkSize) && (sourceData[i] == targetData[i]))
                    {
                        match.Size++;
                        i++;
                    }

                    // check if we stopped because we had a mismatch or ran out of input
                    if (i < checkSize || deepBreak) break;

                    //break; // Maybe many breaks will help?
                }
            }

            if (match.Size < BlockSize) {
                match.Size = 0;
            } else {
                // try to improve before match if this is useful
                if ((match.Size + BlockSize) <= currentBest) return;
                // do not do if there is no more data in the target...
                if (match.TargetOffset == 0) return;

                // we know it is stored in the cache... so we just need the source one
                byte[] sourceData = new byte[MaxBlockSize];

                long startSource = match.SourceOffset - BlockSize;
                long checkSize = BlockSize;

                if (checkSize > match.SourceOffset) {
                    checkSize = match.SourceOffset;
                    startSource = 0;
                }

                if (checkSize == 0) return;

                mSource.Seek(startSource, SeekOrigin.Begin);
                mSource.Read(sourceData, 0, (int)checkSize);
                checkSize--;

                while (sourceData[checkSize] == (mTargetCData[match.TargetOffset - mTargetCDataBaseOffset - 1])) {
                    match.TargetOffset--;
                    match.SourceOffset--;
                    match.Size++;
                    checkSize--;
                    if (checkSize == 0) break;
                    if (match.TargetOffset == 0) break;
                }
            }
        }
Beispiel #3
0
        /// <param name="sameBlocks">
        /// This list will store blocks that have been found to have remained
        /// the same between files.
        /// </param>
        public void Execute(IList<SameBlock> sameBlocks, IPatchProgress prog)
        {
            if (sameBlocks == null)
                throw new ArgumentNullException();

            ChunkedFile sourceTree = new ChunkedFile(mSource, mSourceSize, mBlockSize);

            // the vector needs an 'empty' first block so checking for overlap with the 'previous' block never fails.
            sameBlocks.Add(new SameBlock());

            mTargetCDataBaseOffset = 0;
            mTargetCDataSize = 0;
            bool firstRun = true;

            // currentOffset is in the target file
            for (long currentOffset = 0; currentOffset < mTargetSize;) {
                bool reloadTargetCData = true;

                if ((currentOffset >= mTargetCDataBaseOffset) &&
                    (currentOffset + TargetLookaheadSize < mTargetCDataBaseOffset + TargetBufferSize))
                {
                    if (firstRun) {
                        firstRun = false;
                    } else {
                        reloadTargetCData = false;
                    }
                }

                if (reloadTargetCData) {
                    // at least support looking back blockSize, if possible (findBlock relies on this!)
                    mTargetCDataBaseOffset = currentOffset - mBlockSize;
                    // handle start of file correctly
                    if (currentOffset < BlockSize) mTargetCDataBaseOffset = 0;

                    mTargetCDataSize = TargetBufferSize;

                    // check if this does not extend beyond EOF
                    if (mTargetCDataBaseOffset + mTargetCDataSize > mTargetSize) {
                        mTargetCDataSize = mTargetSize - mTargetCDataBaseOffset;
                    }

                    // we need to update the memory cache of target
                    // TODO: Emit debug info here, if verbose is enabled.
                    // cout << "[CacheReload] File position = " << static_cast<unsigned long>(targetCDataBaseOffset) << "\n";

                    if (prog != null) {
                        prog.OnPatchProgress(mTargetCDataBaseOffset, mTargetSize);
                    }

                    mTarget.Seek(mTargetCDataBaseOffset, SeekOrigin.Begin);
                    mTarget.Read(mTargetCData, 0, (int)mTargetCDataSize);
                }

                SameBlock currentSameBlock = FindBlock(sourceTree, currentOffset);
                if (currentSameBlock != null) {
                    // We have a match.
                    SameBlock previousBlock = sameBlocks[sameBlocks.Count-1];
                    if (previousBlock.TargetOffset + previousBlock.Size > currentSameBlock.TargetOffset) {
                        // There is overlap, resolve it.
                        long difference = previousBlock.TargetOffset + previousBlock.Size - currentSameBlock.TargetOffset;
                        currentSameBlock.SourceOffset += difference;
                        currentSameBlock.TargetOffset += difference;
                        currentSameBlock.Size -= difference;
                    }
                    Console.WriteLine(currentSameBlock.ToString());
                    sameBlocks.Add(currentSameBlock);

                    // TODO: Emit debug info here, if verbose is enabled.

                    currentOffset = currentSameBlock.TargetOffset + currentSameBlock.Size;
                } else {
                    // No match, advance to the next byte.
                    currentOffset++;
                }
            }

            // Add a block at the end to prevent bounds checking hassles.
            SameBlock lastBlock = new SameBlock();
            lastBlock.SourceOffset = 0;
            lastBlock.TargetOffset = mTargetSize;
            lastBlock.Size = 0;
            sameBlocks.Add(lastBlock);
        }