Ejemplo n.º 1
0
        // very manual writer... but want to see how small I can get the data.
        public static CompleteSignature ReadBinaryCompleteSignature(Stream s)
        {
            var sig = new CompleteSignature();

            var l = new List<BlockSignature>();

            var reader = new BinaryReader(s);

            int numberOfEntries = reader.ReadInt32();

            for (var i = 0; i < numberOfEntries; i++)
            {
                var entry = new BlockSignature();

                // 8 bytes. offset
                long offset = reader.ReadInt64();

                // 4 bytes. size
                int size = reader.ReadInt32();

                // 4 bytes. Block Number;
                int blockNumber = reader.ReadInt32();

                // 4 bytes. Rolling Signature.
                decimal sig1 = reader.ReadDecimal();
                decimal sig2 = reader.ReadDecimal();
                RollingSignature rollingSig = new RollingSignature() { Sig1 = sig1, Sig2 = sig2 };

                // should be 16 bytes.
                byte[] md5 = reader.ReadBytes(16);

                entry.BlockNumber = (UInt32)blockNumber;
                entry.RollingSig = (RollingSignature)rollingSig;
                entry.MD5Signature = md5;
                entry.Offset = offset;
                entry.Size = (uint)size;

                l.Add(entry);
            }
            sig.SignatureList = l.ToArray<BlockSignature>();
            return sig;
        }
Ejemplo n.º 2
0
        public static SizeBasedCompleteSignature CreateSignatureForLocalFile(string localFilePath)
        {
            var sig = new SizeBasedCompleteSignature();

            var buffer = new byte[ConfigHelper.SignatureSize];
            var sigDict = new Dictionary<int, List<BlockSignature>>();

            using (var fs = new FileStream(localFilePath, FileMode.Open))
            {
                long offset = 0;
                uint idCount = 0;
                int bytesRead = 0;

                while ((bytesRead = fs.Read(buffer, 0, ConfigHelper.SignatureSize)) > 0)
                {
                    var blockSig = GenerateBlockSig(buffer, offset,bytesRead, idCount);
                    List<BlockSignature> sigList;
                    if (!sigDict.TryGetValue(bytesRead, out sigList))
                    {
                        sigList = new List<BlockSignature>();
                        sigDict[bytesRead] = sigList;
                    }

                    sigList.Add(blockSig);

                    offset += bytesRead;
                    idCount++;
                }

            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();
            sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()};
                sizedBaseSignature.Signatures[key] = compSig;

            }

            return sizedBaseSignature;
        }
Ejemplo n.º 3
0
        private static List<RemainingBytes> SearchLocalFileForSignaturesBasedOnSize(CompleteSignature sig, MemoryMappedViewAccessor accessor, List<RemainingBytes> remainingByteList, int sigSize, long fileSize, List<BlockSignature> signaturesToReuse)
        {
            var windowSize = sigSize;
            var newRemainingBytes = new List<RemainingBytes>();
            var sigDict = GenerateBlockDict(sig);
            var buffer = new byte[sigSize];
            var offset = 0L;
            foreach (var byteRange in remainingByteList)
            {

                var byteRangeSize = byteRange.EndOffset - byteRange.BeginOffset + 1;

                // if byte range is large... and signature size is small (what values???) then dont check.
                // We could end up with LOADS of tiny sig matching where ideally we'd use a larger new sig block.
                // The exception is when the sig size exactly matches the byterange size... then we allow it to check if the sig will match
                // in practice this allows small (1-2 byte sigs) to match the byte ranges.
                if (byteRangeSize > 1000 && sigSize > 100 || byteRangeSize == sigSize)
                {
                    // if byteRange is smaller than the key we're using, then there cannot be a match so add
                    // it to the newRemainingBytes list
                    if (byteRange.EndOffset - byteRange.BeginOffset + 1 >= windowSize)
                    {
                        // search this byterange for all possible keys.
                        offset = byteRange.BeginOffset;
                        var generateFreshSig = true;
                        var bytesRead = 0L;
                        RollingSignature? currentSig = null;
                        long oldEndOffset = byteRange.BeginOffset;
                        do
                        {
                            if (generateFreshSig)
                            {
                                bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize);
                                currentSig = CreateRollingSignature(buffer, (int)bytesRead);

                            }
                            else
                            {
                                // roll existing sig.
                                var previousByte = accessor.ReadByte(offset - 1);
                                var nextByte = accessor.ReadByte(offset + windowSize - 1);  // Need bounds checking?
                                currentSig = RollSignature(windowSize, previousByte, nextByte, currentSig.Value);
                            }

                            if (sigDict.ContainsKey(currentSig.Value))
                            {
                                // populate buffer. Potential waste of IO here.
                                bytesRead = accessor.ReadArray(offset, buffer, 0, windowSize);

                                // check md5 sig.
                                var md5Sig = CreateMD5Signature(buffer, (int)bytesRead);
                                var sigsForCurrentRollingSig = sigDict[currentSig.Value];

                                // have a matching md5? If so, we have a match.
                                var matchingSigs =
                                    sigsForCurrentRollingSig.Where(s => s.MD5Signature.SequenceEqual(md5Sig))
                                        .Select(n => n)
                                        .ToList();

                                if (matchingSigs.Any())
                                {
                                    // need to add any byte ranges between oldEndOffset and offset as bytes remaining (ie not part of any sig).
                                    // This is for catching any bytes BEFORE the sig match we've just found.
                                    if (oldEndOffset != offset)
                                    {
                                        newRemainingBytes.Add(new RemainingBytes()
                                        {
                                            BeginOffset = oldEndOffset,
                                            EndOffset = offset - 1
                                        });
                                    }

                                    var matchingSig = matchingSigs[0];

                                    // when storing which existing sig to use, make sure we know the offset in the NEW file it should appear.
                                    matchingSig.Offset = offset;
                                    signaturesToReuse.Add(matchingSig);
                                    offset += windowSize;
                                    generateFreshSig = true;
                                    oldEndOffset = offset;
                                }
                                else
                                {
                                    offset++;
                                    generateFreshSig = false;
                                }
                            }
                            else
                            {
                                // no match. Just increment offset and generate rolling sig.
                                offset++;
                                generateFreshSig = false;
                            }
                        } while (offset + windowSize <= byteRange.EndOffset + 1);

                        // add remaining bytes to newRemainingBytes list
                        // Possible to have single byte at end with offset at very last byte.
                        if (offset <= byteRange.EndOffset)
                        {
                            newRemainingBytes.Add(new RemainingBytes()
                            {
                                BeginOffset = oldEndOffset,
                                EndOffset = byteRange.EndOffset
                            });

                        }
                        // if last sig
                    }
                    else
                    {
                        newRemainingBytes.Add(byteRange);
                    }
                }
                else
                {
                    newRemainingBytes.Add(byteRange);
                }

            }

            return newRemainingBytes;
        }
Ejemplo n.º 4
0
 internal static Dictionary<RollingSignature, List<BlockSignature>> GenerateBlockDict(CompleteSignature sig)
 {
     return GenerateBlockDict(sig.SignatureList);
 }
Ejemplo n.º 5
0
        /// <summary>
        /// Existing blocks + sigs are in searchResults
        /// new 
        /// </summary>
        /// <param name="bytesUploaded"></param>
        /// <returns></returns>
        internal static SizeBasedCompleteSignature CreateSignatureFromNewAndReusedBlocks(List<UploadedBlock> allBlocks)
        {
            var sigDict = new Dictionary<int, List<BlockSignature>>();

            List<BlockSignature> sigList;

            // new blocks
            foreach (var newBlock in allBlocks )
            {
                if (!sigDict.TryGetValue((int)newBlock.Sig.Size, out sigList))
                {
                    sigList = new List<BlockSignature>();
                    sigDict[(int) newBlock.Sig.Size] = sigList;
                }

                // add sig to the list.
                sigList.Add( newBlock.Sig);
            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();
            sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()};
                sizedBaseSignature.Signatures[key] = compSig;

            }

            return sizedBaseSignature;
        }