예제 #1
0
        /// <summary>
        /// Quick check to make sure we dont have any dupe md5's
        /// </summary>
        /// <param name="sig"></param>
        /// <returns></returns>
        private Boolean VerifySignature(SizeBasedCompleteSignature sig)
        {
            var myDict = new Dictionary <string, int>();
            var valid  = true;
            var count  = 0;

            foreach (var size in sig.Signatures.Keys)
            {
                foreach (var sSig in sig.Signatures[size].SignatureList)
                {
                    var md5    = sSig.MD5Signature;
                    var md5Str = ByteArrayToString(md5);
                    if (myDict.ContainsKey(md5Str))
                    {
                        valid = false;
                        count++;
                    }
                    else
                    {
                        myDict[md5Str] = 1;
                    }
                }
            }

            return(valid);
        }
예제 #2
0
        /// <summary>
        /// Existing blocks + sigs are in searchResults
        /// new
        /// </summary>
        /// <param name="bytesUploaded"></param>
        /// <returns></returns>
        internal static SizeBasedCompleteSignature CreateSignatureFromNewAndReusedBlocks(List <UploadedBlock> allBlocks)
        {
            var sigDict = new Dictionary <int, List <BlockSignature> >();

            List <BlockSignature> sigList;

            // new blocks
            foreach (var newBlock in allBlocks)
            {
                if (!sigDict.TryGetValue((int)newBlock.Sig.Size, out sigList))
                {
                    sigList = new List <BlockSignature>();
                    sigDict[(int)newBlock.Sig.Size] = sigList;
                }

                // add sig to the list.
                sigList.Add(newBlock.Sig);
            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();

            sizedBaseSignature.Signatures = new Dictionary <int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature()
                {
                    SignatureList = sigDict[key].ToArray()
                };
                sizedBaseSignature.Signatures[key] = compSig;
            }

            return(sizedBaseSignature);
        }
예제 #3
0
        private void PopulateSignatureTree(SizeBasedCompleteSignature sig, Dictionary <TreeNode, List <BlockSignature> > sigDict, TreeView sigTV)
        {
            sigTV.Nodes.Clear();

            bool isLeftTree;

            if (sigTV.Name == "sigTreeView")
            {
                isLeftTree = true;
            }
            else
            {
                isLeftTree = false;
            }

            bothFilesShared = 0;

            PopulateSignatureTreeByOffset(sigTV, sig, sigDict);
            sharedSize.Text = bothFilesShared.ToString("N0");
            newSize.Text    = (file2Size - bothFilesShared).ToString("N0");

            if (isLeftTree)
            {
                file1TotalSize.Text = file1Size.ToString("N0");
                sigTV.Update();
            }
            else
            {
                file2TotalSize.Text = file2Size.ToString("N0");
                sigTV.Update();
            }
        }
예제 #4
0
        private long CalculateFileSize(SizeBasedCompleteSignature sig)
        {
            long fileSize = 0;

            foreach (var sigSize in sig.Signatures.Keys)
            {
                fileSize += (long)sigSize * (long)sig.Signatures[sigSize].SignatureList.Count();
            }

            return(fileSize);
        }
예제 #5
0
        /// <summary>
        /// Merge smaller blocks into something at least fragmentMergeSize bytes long.
        /// Only upload at most maxUploadLimit (0 == no limit).
        /// Should this be in CommonOps?
        /// Lame... really? DEFRAG? Then again I suppose the term IS appropriate.
        /// </summary>
        /// <param name="containerName"></param>
        /// <param name="blobName"></param>
        public void DefragBlob(SizeBasedCompleteSignature blobSig, long maxUploadLimitMB = 2)
        {
            var allBlobSigs = blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            var targetSigSize = ConfigHelper.SignatureSize;

            // loop through sigs, merge what we can but dont exceed maxUploadLimit
            long bytesToUpload      = 0;
            var  byteRangesToUpload = new List <RemainingBytes>();
            var  defragNodeList     = new List <DefragNode>();

            for (var i = 0; i < allBlobSigs.Count; i++)
            {
                uint sigSize = 0;
                var  j       = i;

                while (j < allBlobSigs.Count)
                {
                    var sig = allBlobSigs[j];
                    j++;

                    // break if we get too big.
                    if (sigSize + sig.Size > targetSigSize)
                    {
                        break;
                    }

                    sigSize += sig.Size;
                }

                defragNodeList.Add(new DefragNode {
                    Offset = allBlobSigs[i].Offset, Size = sigSize, SigPos = i, NoSigs = j - i - 1
                });
            }

            // defragNodeList is a list of sigs, and size. These ones will be merged.
            var sortedList = defragNodeList.OrderByDescending(n => n.NoSigs).ToList();


            // the entries in defragNodeList that has the max number of sigs in it (ie most fragmentation) will be the ones to get merged.
            foreach (var sig in sortedList)
            {
                DefragSigGroup(blobSig, sig);
                bytesToUpload += sig.Size;

                if (bytesToUpload > maxUploadLimitMB)
                {
                    break;
                }
            }
        }
예제 #6
0
        public void UploadSignatureForBlob(string blobName, string containerName, SizeBasedCompleteSignature sig)
        {
            var client    = AzureHelper.GetCloudBlobClient();
            var container = client.GetContainerReference(containerName);

            // upload sig.
            var sigBlobName = AzureHelper.SetSignatureName(containerName, blobName);

            var sigBlob = container.GetBlockBlobReference(sigBlobName);

            using (Stream s = new MemoryStream())
            {
                SerializationHelper.WriteBinarySizedBasedSignature(sig, s);
                s.Seek(0, SeekOrigin.Begin);
                sigBlob.UploadFromStream(s);
            }
        }
예제 #7
0
        public static void WriteBinarySizedBasedSignature(SizeBasedCompleteSignature sig, Stream s)
        {
            var writer = new BinaryWriter(s);

            int numberOfSizes = sig.Signatures.Keys.Count;

            // 4 bytes. Number of key sizes.
            writer.Write(numberOfSizes);

            foreach (int keySize in sig.Signatures.Keys)
            {
                // write key size.
                writer.Write(keySize);
                var completeSigForKeySize = sig.Signatures[keySize];

                int numberOfEntries = completeSigForKeySize.SignatureList.Length;

                // number of entries for this key size.
                writer.Write(numberOfEntries);

                foreach (var i in completeSigForKeySize.SignatureList)
                {
                    // 8 bytes
                    writer.Write(i.Offset);

                    // 4 bytes
                    writer.Write(i.Size);

                    // 4 bytes.
                    writer.Write(i.BlockNumber);

                    // 8 bytes.
                    writer.Write(i.RollingSig.Sig1);

                    // 8 bytes.
                    writer.Write(i.RollingSig.Sig2);


                    // should be 16 bytes.
                    foreach (byte b in i.MD5Signature)
                    {
                        writer.Write(b);
                    }
                }
            }
        }
예제 #8
0
        public static SizeBasedCompleteSignature CreateSignatureForLocalFile(string localFilePath)
        {
            var sig = new SizeBasedCompleteSignature();

            var buffer  = new byte[ConfigHelper.SignatureSize];
            var sigDict = new Dictionary <int, List <BlockSignature> >();

            using (var fs = new FileStream(localFilePath, FileMode.Open))
            {
                long offset    = 0;
                uint idCount   = 0;
                int  bytesRead = 0;

                while ((bytesRead = fs.Read(buffer, 0, ConfigHelper.SignatureSize)) > 0)
                {
                    var blockSig = GenerateBlockSig(buffer, offset, bytesRead, idCount);
                    List <BlockSignature> sigList;
                    if (!sigDict.TryGetValue(bytesRead, out sigList))
                    {
                        sigList            = new List <BlockSignature>();
                        sigDict[bytesRead] = sigList;
                    }

                    sigList.Add(blockSig);

                    offset += bytesRead;
                    idCount++;
                }
            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();

            sizedBaseSignature.Signatures = new Dictionary <int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature()
                {
                    SignatureList = sigDict[key].ToArray()
                };
                sizedBaseSignature.Signatures[key] = compSig;
            }

            return(sizedBaseSignature);
        }
예제 #9
0
        private void PopulateSignatureTreeByOffset(TreeView sigTV, SizeBasedCompleteSignature sig, Dictionary <TreeNode, List <BlockSignature> > sigDict)
        {
            var sigList = new List <BlockSignature>();

            if (sig.Signatures != null)
            {
                foreach (var size in sig.Signatures.Keys)
                {
                    foreach (var sSig in sig.Signatures[size].SignatureList)
                    {
                        sigList.Add(sSig);
                    }
                }

                var sortedSigList = (from s in sigList orderby s.Offset select s).ToList <BlockSignature>();

                PopulateRootNodes(sigTV, sortedSigList, sigDict);
            }
        }
예제 #10
0
        private void ProcessDoubleClick(TreeNode selectedNode, SizeBasedCompleteSignature sig)
        {
            if (selectedNode != null && selectedNode.Parent != null)
            {
                var sp      = selectedNode.Parent.Text.Split();
                var sigSize = Convert.ToInt32(sp[0]);
                var offset  = Convert.ToInt64(selectedNode.Text.Split()[0]);

                var specificSig = (from s in sig.Signatures[sigSize].SignatureList where s.Offset == offset select s).First <BlockSignature>();

                var md5String = ByteArrayToString(specificSig.MD5Signature);

                var rollingSig = string.Format("{0}:{1}", specificSig.RollingSig.Sig1, specificSig.RollingSig.Sig2);

                var msg = string.Format("Offset: {0}\nSize: {1}\nRollingSig: {2}\nMD5: {3}", specificSig.Offset.ToString(),
                                        specificSig.Size.ToString(), rollingSig, md5String);

                var dialog = MessageBox.Show(msg);
            }
        }
예제 #11
0
        /// <summary>
        /// Gets SizeBasedCompleteSignature.
        /// Format is: first 4 bytes are number of CompleteSig's there are.
        /// For each complete sig, the format is 4 bytes, number of entries.
        /// </summary>
        /// <param name="s"></param>
        /// <returns></returns>
        public static SizeBasedCompleteSignature ReadSizeBasedBinarySignature(Stream s)
        {
            var sig = new SizeBasedCompleteSignature();

            sig.Signatures = new Dictionary <int, CompleteSignature>();

            // always go to beginning of stream.
            s.Seek(0, SeekOrigin.Begin);

            var reader = new BinaryReader(s);
            int numberOfCompleteSignatures = reader.ReadInt32();

            for (var i = 0; i < numberOfCompleteSignatures; i++)
            {
                int keySize = reader.ReadInt32();

                var completeSig = ReadBinaryCompleteSignature(s);
                sig.Signatures[keySize] = completeSig;
            }

            return(sig);
        }
예제 #12
0
        /// <summary>
        /// Loads the sig file.
        /// </summary>
        /// <param name="filename"></param>
        private void LoadSigFile(string filename, ref SizeBasedCompleteSignature sig, Dictionary <TreeNode, List <BlockSignature> > sigDict, TreeView sigTV)
        {
            using (var fs = new FileStream(filename, FileMode.Open))
            {
                sig = SerializationHelper.ReadSizeBasedBinarySignature(fs);

                VerifySignature(sig);

                if (sigTV.Name == "sigTreeView")
                {
                    sig1MD5Dict = GenerateMD5DictFromSig(sig);

                    file1Size = CalculateFileSize(sig);
                }
                else
                {
                    sig2MD5Dict = GenerateMD5DictFromSig(sig);
                    file2Size   = CalculateFileSize(sig);
                }
            }

            bothFilesShared = 0;
            PopulateSignatureTree(sig, sigDict, sigTV);
        }
예제 #13
0
 // defrags a group of sigs... merges them together.
 private void DefragSigGroup(SizeBasedCompleteSignature blobSig, DefragNode sig)
 {
 }
예제 #14
0
        private List <RemainingBytes> GenerateByteRangesOfBlobToDownload(List <BlockSignature> sigsToReuseList, SizeBasedCompleteSignature cloudBlobSig, string containerName, string blobName)
        {
            var blobSize           = AzureHelper.GetBlobSize(containerName, blobName);
            var remainingBytesList = new List <RemainingBytes>();
            var allBlobSigs        = cloudBlobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            var sortedSigs = (from sig in sigsToReuseList orderby sig.Offset ascending select sig).ToList();

            long startOffsetToCopy = 0;

            // loop through all cloudBlobSigs.
            // If have a match in sigsToReuse, skip it.
            // otherwise, take note of offset and size to download.
            foreach (var sig in allBlobSigs)
            {
                var haveMatchingSig = sigsToReuseList.Any(s => s.MD5Signature.SequenceEqual(sig.MD5Signature));
                if (!haveMatchingSig)
                {
                    // if no match then we need to copy everything from startOffsetToCopy to sig.Offset + sig.Size
                    remainingBytesList.Add(new RemainingBytes()
                    {
                        BeginOffset = startOffsetToCopy,
                        EndOffset   = sig.Offset + sig.Size - 1
                    });
                    startOffsetToCopy = sig.Offset + sig.Size;
                }
                else
                {
                    // we have a match therefore dont need to copy the data.
                    // change startOffsetToCopy to just after current sig.
                    startOffsetToCopy = sig.Offset + sig.Size;
                }
            }

            return(remainingBytesList);
        }
예제 #15
0
        // regenerate blob locally.
        // we need to either download byte ranges from Azure.
        // OR
        // need to copy from local file.
        private void RegenerateBlob(string containerName, string blobName, List <RemainingBytes> byteRangesToDownload, string localFilePath, List <BlockSignature> reusableBlockSignatures, SizeBasedCompleteSignature blobSig, int parallelFactor = 2)
        {
            // removing size from the equation.
            var allBlobSigs =
                blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            // LUT to see if block is to be reused or not.
            var reusableBlockDict = CommonOps.GenerateBlockDict(reusableBlockSignatures.ToArray());

            var offset = 0L;

            using (var localStream = new FileStream(localFilePath, FileMode.Open))
                using (var newStream = new FileStream(localFilePath + ".new", FileMode.Create))
                {
                    // go through all sigs in offset order....  determine if can reuse or need to download.
                    foreach (var sig in allBlobSigs)
                    {
                        var haveMatch = false;
                        if (reusableBlockDict.ContainsKey(sig.RollingSig))
                        {
                            // have a match... so will reuse local file.
                            var localSig = reusableBlockDict[sig.RollingSig];

                            var matchingLocalSigs =
                                localSig.Where(s => s.MD5Signature.SequenceEqual(sig.MD5Signature))
                                .Select(n => n)
                                .ToList();

                            if (matchingLocalSigs.Any())
                            {
                                // have a match.
                                var matchingLocalSig = matchingLocalSigs[0];

                                // huge amount of wasted allocations...  maybe move this.
                                var buffer = new byte[matchingLocalSig.Size];

                                localStream.Seek(matchingLocalSig.Offset, SeekOrigin.Begin);
                                localStream.Read(buffer, 0, (int)matchingLocalSig.Size);

                                newStream.Seek(sig.Offset, SeekOrigin.Begin);
                                newStream.Write(buffer, 0, (int)matchingLocalSig.Size);

                                haveMatch = true;
                                offset   += matchingLocalSig.Size;
                            }
                        }

                        if (!haveMatch)
                        {
                            // check if we have byte ranges starting at offset.
                            var byteRange =
                                (from b in byteRangesToDownload where b.BeginOffset == offset select b).FirstOrDefault();
                            if (byteRange != null)
                            {
                                // download bytes.
                                var blobBytes = DownloadBytes(containerName, blobName, byteRange.BeginOffset,
                                                              byteRange.EndOffset, parallelFactor);

                                newStream.Seek(sig.Offset, SeekOrigin.Begin);
                                newStream.Write(blobBytes, 0, (int)(byteRange.EndOffset - byteRange.BeginOffset + 1));

                                offset += (byteRange.EndOffset - byteRange.BeginOffset + 1);
                            }
                        }
                    }
                }

            // rename .new file to original
            File.Replace(localFilePath + ".new", localFilePath, null);
        }
예제 #16
0
        public static SignatureSearchResult SearchLocalFileForSignatures(string localFilePath, SizeBasedCompleteSignature sig)
        {
            var result = new SignatureSearchResult();

            // length of file.
            var tempFile   = File.Open(localFilePath, FileMode.Open);
            var fileLength = tempFile.Length;

            tempFile.Close();

            var offset       = 0;
            var windowSize   = ConfigHelper.SignatureSize;
            var windowBuffer = new byte[windowSize];

            // signatures we can reuse.
            var signaturesToReuse = new List <BlockSignature>();

            // get sizes of signatures (block sizes) from existing sig.
            // then loop through all sizes looking for matches in local file.
            // important to search from largest to smallest.
            var signatureSizes = sig.Signatures.Keys.ToList();

            signatureSizes.Sort();
            signatureSizes.Reverse();

            // byte ranges that have not been matched to existing blocks yet.
            var remainingByteList = new List <RemainingBytes>();

            remainingByteList.Add(new RemainingBytes {
                BeginOffset = 0, EndOffset = fileLength - 1
            });

            // Create the memory-mapped file.
            using (var mmf = MemoryMappedFile.CreateFromFile(localFilePath, FileMode.Open))
            {
                using (var accessor = mmf.CreateViewAccessor())
                {
                    // Any sigs smaller than 100 bytes? skip?
                    // Valid?
                    // Really want to avoid searching for single bytes everywhere.
                    foreach (var sigSize in signatureSizes)
                    {
                        var sigs = sig.Signatures[sigSize];
                        var newRemainingByteList = SearchLocalFileForSignaturesBasedOnSize(sigs, accessor, remainingByteList, sigSize, fileLength, signaturesToReuse);
                        remainingByteList = newRemainingByteList;
                    }
                }
            }

            result.ByteRangesToUpload = remainingByteList;
            result.SignaturesToReuse  = signaturesToReuse;
            return(result);
        }