Exemple #1
0
        public static SizeBasedCompleteSignature CreateSignatureForLocalFile(string localFilePath)
        {
            var sig = new SizeBasedCompleteSignature();

            var buffer = new byte[ConfigHelper.SignatureSize];
            var sigDict = new Dictionary<int, List<BlockSignature>>();

            using (var fs = new FileStream(localFilePath, FileMode.Open))
            {
                long offset = 0;
                uint idCount = 0;
                int bytesRead = 0;

                while ((bytesRead = fs.Read(buffer, 0, ConfigHelper.SignatureSize)) > 0)
                {
                    var blockSig = GenerateBlockSig(buffer, offset,bytesRead, idCount);
                    List<BlockSignature> sigList;
                    if (!sigDict.TryGetValue(bytesRead, out sigList))
                    {
                        sigList = new List<BlockSignature>();
                        sigDict[bytesRead] = sigList;
                    }

                    sigList.Add(blockSig);

                    offset += bytesRead;
                    idCount++;
                }

            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();
            sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()};
                sizedBaseSignature.Signatures[key] = compSig;

            }

            return sizedBaseSignature;
        }
Exemple #2
0
 // defrags a group of sigs... merges them together.
 private void DefragSigGroup(SizeBasedCompleteSignature blobSig, DefragNode sig)
 {
 }
Exemple #3
0
        public void UploadSignatureForBlob(string blobName, string containerName, SizeBasedCompleteSignature sig)
        {
            var client = AzureHelper.GetCloudBlobClient();
            var container = client.GetContainerReference(containerName);

            // upload sig.
            var sigBlobName = AzureHelper.SetSignatureName(containerName, blobName);

            var sigBlob = container.GetBlockBlobReference(sigBlobName);

            using (Stream s = new MemoryStream())
            {
                SerializationHelper.WriteBinarySizedBasedSignature(sig, s);
                s.Seek(0, SeekOrigin.Begin);
                sigBlob.UploadFromStream(s);
            }
        }
Exemple #4
0
        /// <summary>
        /// Merge smaller blocks into something at least fragmentMergeSize bytes long.
        /// Only upload at most maxUploadLimit (0 == no limit).
        /// Should this be in CommonOps?
        /// Lame... really? DEFRAG? Then again I suppose the term IS appropriate.
        /// </summary>
        /// <param name="containerName"></param>
        /// <param name="blobName"></param>
        public void DefragBlob(SizeBasedCompleteSignature blobSig, long maxUploadLimitMB = 2)
        {
            var allBlobSigs = blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            var targetSigSize = ConfigHelper.SignatureSize;

            // loop through sigs, merge what we can but dont exceed maxUploadLimit
            long bytesToUpload = 0;
            var byteRangesToUpload = new List<RemainingBytes>();
            var defragNodeList = new List<DefragNode>();
            for (var i = 0; i < allBlobSigs.Count; i++)
            {
                uint sigSize = 0;
                var j = i;

                while (j < allBlobSigs.Count)
                {
                    var sig = allBlobSigs[j];
                    j++;

                    // break if we get too big.
                    if (sigSize + sig.Size > targetSigSize)
                    {
                        break;
                    }

                    sigSize += sig.Size;

                }

                defragNodeList.Add(new DefragNode { Offset = allBlobSigs[i].Offset, Size = sigSize, SigPos = i, NoSigs = j - i - 1 });
            }

            // defragNodeList is a list of sigs, and size. These ones will be merged.
            var sortedList = defragNodeList.OrderByDescending(n => n.NoSigs).ToList();

            // the entries in defragNodeList that has the max number of sigs in it (ie most fragmentation) will be the ones to get merged.
            foreach( var sig in sortedList)
            {
                DefragSigGroup(blobSig, sig);
                bytesToUpload += sig.Size;

                if (bytesToUpload > maxUploadLimitMB)
                {
                    break;
                }
            }
        }
Exemple #5
0
        /// <summary>
        /// Existing blocks + sigs are in searchResults
        /// new 
        /// </summary>
        /// <param name="bytesUploaded"></param>
        /// <returns></returns>
        internal static SizeBasedCompleteSignature CreateSignatureFromNewAndReusedBlocks(List<UploadedBlock> allBlocks)
        {
            var sigDict = new Dictionary<int, List<BlockSignature>>();

            List<BlockSignature> sigList;

            // new blocks
            foreach (var newBlock in allBlocks )
            {
                if (!sigDict.TryGetValue((int)newBlock.Sig.Size, out sigList))
                {
                    sigList = new List<BlockSignature>();
                    sigDict[(int) newBlock.Sig.Size] = sigList;
                }

                // add sig to the list.
                sigList.Add( newBlock.Sig);
            }

            var sizedBaseSignature = new SizeBasedCompleteSignature();
            sizedBaseSignature.Signatures = new Dictionary<int, CompleteSignature>();

            foreach (var key in sigDict.Keys)
            {
                var compSig = new CompleteSignature() {SignatureList = sigDict[key].ToArray()};
                sizedBaseSignature.Signatures[key] = compSig;

            }

            return sizedBaseSignature;
        }
        /// <summary>
        /// Gets SizeBasedCompleteSignature.
        /// Format is: first 4 bytes are number of CompleteSig's there are. 
        /// For each complete sig, the format is 4 bytes, number of entries. 
        /// </summary>
        /// <param name="s"></param>
        /// <returns></returns>
        public static SizeBasedCompleteSignature ReadSizeBasedBinarySignature(Stream s)
        {
            var sig = new SizeBasedCompleteSignature();
            sig.Signatures = new Dictionary<int, CompleteSignature>();

            // always go to beginning of stream.
            s.Seek(0, SeekOrigin.Begin);

            var reader = new BinaryReader(s);
            int numberOfCompleteSignatures = reader.ReadInt32();

            for (var i = 0; i < numberOfCompleteSignatures; i++)
            {
                int keySize = reader.ReadInt32();

                var completeSig = ReadBinaryCompleteSignature(s);
                sig.Signatures[keySize] = completeSig;

            }

            return sig;
        }
Exemple #7
0
        public static SignatureSearchResult SearchLocalFileForSignatures(string localFilePath, SizeBasedCompleteSignature sig)
        {
            var result = new SignatureSearchResult();

            // length of file.
            var tempFile = File.Open(localFilePath, FileMode.Open);
            var fileLength = tempFile.Length;

            tempFile.Close();

            var offset = 0;
            var windowSize = ConfigHelper.SignatureSize;
            var windowBuffer = new byte[windowSize];

            // signatures we can reuse.
            var signaturesToReuse = new List<BlockSignature>();

            // get sizes of signatures (block sizes) from existing sig.
            // then loop through all sizes looking for matches in local file.
            // important to search from largest to smallest.
            var signatureSizes = sig.Signatures.Keys.ToList();
            signatureSizes.Sort();
            signatureSizes.Reverse();

            // byte ranges that have not been matched to existing blocks yet.
            var remainingByteList = new List<RemainingBytes>();
            remainingByteList.Add(new RemainingBytes {BeginOffset = 0, EndOffset = fileLength - 1});

            // Create the memory-mapped file.
            using (var mmf = MemoryMappedFile.CreateFromFile(localFilePath, FileMode.Open))
            {
                using (var accessor = mmf.CreateViewAccessor())
                {
                    // Any sigs smaller than 100 bytes? skip?
                    // Valid?
                    // Really want to avoid searching for single bytes everywhere.
                    foreach (var sigSize in signatureSizes)
                    {
                        var sigs = sig.Signatures[sigSize];
                        var newRemainingByteList = SearchLocalFileForSignaturesBasedOnSize(sigs, accessor, remainingByteList, sigSize, fileLength, signaturesToReuse);
                        remainingByteList = newRemainingByteList;
                    }
                }
            }

            result.ByteRangesToUpload = remainingByteList;
            result.SignaturesToReuse = signaturesToReuse;
            return result;
        }
        private void PopulateSignatureTreeByOffset(TreeView sigTV, SizeBasedCompleteSignature sig, Dictionary<TreeNode, List<BlockSignature>> sigDict)
        {
            var sigList = new List<BlockSignature>();

            if (sig.Signatures != null)
            {
                foreach (var size in sig.Signatures.Keys)
                {
                    foreach (var sSig in sig.Signatures[size].SignatureList)
                    {
                        sigList.Add(sSig);
                    }
                }

                var sortedSigList = (from s in sigList orderby s.Offset select s).ToList<BlockSignature>();

                PopulateRootNodes(sigTV, sortedSigList, sigDict);

            }
        }
        private void ProcessDoubleClick(TreeNode selectedNode, SizeBasedCompleteSignature sig)
        {
            if (selectedNode != null && selectedNode.Parent != null)
            {
                var sp = selectedNode.Parent.Text.Split();
                var sigSize = Convert.ToInt32(sp[0]);
                var offset = Convert.ToInt64(selectedNode.Text.Split()[0]);

                var specificSig = (from s in sig.Signatures[sigSize].SignatureList where s.Offset == offset select s).First<BlockSignature>();

                var md5String = ByteArrayToString(specificSig.MD5Signature);

                var rollingSig = string.Format("{0}:{1}", specificSig.RollingSig.Sig1, specificSig.RollingSig.Sig2);

                var msg = string.Format("Offset: {0}\nSize: {1}\nRollingSig: {2}\nMD5: {3}", specificSig.Offset.ToString(),
                                        specificSig.Size.ToString(), rollingSig, md5String);

                var dialog = MessageBox.Show(msg);
            }
        }
        /// <summary>
        /// Loads the sig file.
        /// </summary>
        /// <param name="filename"></param>
        private void LoadSigFile(string filename, ref SizeBasedCompleteSignature sig, Dictionary<TreeNode, List<BlockSignature>> sigDict, TreeView sigTV)
        {
            using (var fs = new FileStream(filename, FileMode.Open))
            {
                sig = SerializationHelper.ReadSizeBasedBinarySignature( fs );

                VerifySignature(sig);

                if (sigTV.Name == "sigTreeView")
                {
                    sig1MD5Dict = GenerateMD5DictFromSig(sig);

                    file1Size = CalculateFileSize(sig);

                }
                else
                {
                    sig2MD5Dict = GenerateMD5DictFromSig(sig);
                    file2Size = CalculateFileSize(sig);

                }
            }

            bothFilesShared = 0;
            PopulateSignatureTree(sig, sigDict, sigTV);
        }
        private void PopulateSignatureTree(SizeBasedCompleteSignature sig, Dictionary<TreeNode, List<BlockSignature>> sigDict, TreeView sigTV)
        {
            sigTV.Nodes.Clear();

            bool isLeftTree;
            if (sigTV.Name == "sigTreeView")
            {
                isLeftTree = true;
            }
            else
            {
                isLeftTree = false;
            }

            bothFilesShared = 0;

            PopulateSignatureTreeByOffset(sigTV, sig, sigDict);
            sharedSize.Text = bothFilesShared.ToString("N0");
            newSize.Text = (file2Size - bothFilesShared).ToString("N0");

            if (isLeftTree)
            {
                file1TotalSize.Text = file1Size.ToString("N0");
                sigTV.Update();
            }
            else
            {
                file2TotalSize.Text = file2Size.ToString("N0");
                sigTV.Update();
            }
        }
        private List<Tuple<byte[], long>> GenerateMD5ListFromSig(SizeBasedCompleteSignature? sig)
        {
            var md5List = new List< Tuple<byte[], long>>();

            if (sig.HasValue && sig.Value.Signatures != null)
            {
                foreach (var size in sig.Value.Signatures.Keys)
                {
                    foreach (var sSig in sig.Value.Signatures[size].SignatureList)
                    {
                        var tuple = new Tuple<byte[], long>(sSig.MD5Signature, sSig.Offset);
                        md5List.Add(tuple);
                    }
                }
            }
            return md5List;
        }
 private Dictionary<string, long> GenerateMD5DictFromSig(SizeBasedCompleteSignature? sig)
 {
     var list = GenerateMD5ListFromSig(sig);
     var dict = GenerateMD5Dict(list);
     return dict;
 }
        private long CalculateFileSize(SizeBasedCompleteSignature sig)
        {
            long fileSize = 0;
            foreach( var sigSize in sig.Signatures.Keys)
            {
                fileSize += (long) sigSize * (long) sig.Signatures[sigSize].SignatureList.Count();
            }

            return fileSize;
        }
Exemple #15
0
        private List<RemainingBytes> GenerateByteRangesOfBlobToDownload(List<BlockSignature> sigsToReuseList, SizeBasedCompleteSignature cloudBlobSig, string containerName, string blobName)
        {
            var blobSize = AzureHelper.GetBlobSize(containerName, blobName);
            var remainingBytesList = new List<RemainingBytes>();
            var allBlobSigs = cloudBlobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            var sortedSigs = (from sig in sigsToReuseList orderby sig.Offset ascending select sig).ToList();

            long startOffsetToCopy = 0;

            // loop through all cloudBlobSigs.
            // If have a match in sigsToReuse, skip it.
            // otherwise, take note of offset and size to download.
            foreach( var sig in allBlobSigs)
            {
                var haveMatchingSig = sigsToReuseList.Any(s => s.MD5Signature.SequenceEqual(sig.MD5Signature));
                if (!haveMatchingSig)
                {
                    // if no match then we need to copy everything from startOffsetToCopy to sig.Offset + sig.Size
                    remainingBytesList.Add(new RemainingBytes()
                    {
                        BeginOffset = startOffsetToCopy,
                        EndOffset = sig.Offset + sig.Size -1
                    });
                    startOffsetToCopy = sig.Offset + sig.Size;
                }
                else
                {
                    // we have a match therefore dont need to copy the data.
                    // change startOffsetToCopy to just after current sig.
                    startOffsetToCopy = sig.Offset + sig.Size;
                }
            }

            return remainingBytesList;
        }
        /// <summary>
        /// Quick check to make sure we dont have any dupe md5's
        /// </summary>
        /// <param name="sig"></param>
        /// <returns></returns>
        private Boolean VerifySignature(SizeBasedCompleteSignature sig)
        {
            var myDict = new Dictionary<string, int>();
            var valid = true;
            var count = 0;
            foreach (var size in sig.Signatures.Keys)
            {
                foreach (var sSig in sig.Signatures[size].SignatureList)
                {
                    var md5 = sSig.MD5Signature;
                    var md5Str = ByteArrayToString(md5);
                    if (myDict.ContainsKey( md5Str) )
                    {
                        valid = false;
                        count++;
                    }
                    else
                    {
                        myDict[md5Str] = 1;
                    }

                }
            }

            return valid;
        }
Exemple #17
0
        // regenerate blob locally.
        // we need to either download byte ranges from Azure.
        // OR
        // need to copy from local file.
        private void RegenerateBlob(string containerName, string blobName, List<RemainingBytes> byteRangesToDownload, string localFilePath, List<BlockSignature> reusableBlockSignatures, SizeBasedCompleteSignature blobSig, int parallelFactor = 2)
        {
            // removing size from the equation.
            var allBlobSigs =
                blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            // LUT to see if block is to be reused or not.
            var reusableBlockDict = CommonOps.GenerateBlockDict(reusableBlockSignatures.ToArray());

            var offset = 0L;

            using (var localStream = new FileStream( localFilePath, FileMode.Open))
            using (var newStream = new FileStream( localFilePath+".new", FileMode.Create))
            {
                // go through all sigs in offset order....  determine if can reuse or need to download.
                foreach (var sig in allBlobSigs)
                {
                    var haveMatch = false;
                    if (reusableBlockDict.ContainsKey(sig.RollingSig))
                    {
                        // have a match... so will reuse local file.
                        var localSig = reusableBlockDict[sig.RollingSig];

                        var matchingLocalSigs =
                                localSig.Where(s => s.MD5Signature.SequenceEqual(sig.MD5Signature))
                                    .Select(n => n)
                                    .ToList();

                        if (matchingLocalSigs.Any())
                        {
                            // have a match.
                            var matchingLocalSig = matchingLocalSigs[0];

                            // huge amount of wasted allocations...  maybe move this.
                            var buffer = new byte[matchingLocalSig.Size];

                            localStream.Seek(matchingLocalSig.Offset, SeekOrigin.Begin);
                            localStream.Read(buffer, 0, (int) matchingLocalSig.Size);

                            newStream.Seek(sig.Offset, SeekOrigin.Begin);
                            newStream.Write( buffer, 0, (int) matchingLocalSig.Size);

                            haveMatch = true;
                            offset += matchingLocalSig.Size;
                        }

                    }

                    if (!haveMatch)
                    {
                        // check if we have byte ranges starting at offset.
                        var byteRange =
                            (from b in byteRangesToDownload where b.BeginOffset == offset select b).FirstOrDefault();
                        if (byteRange != null)
                        {
                            // download bytes.
                            var blobBytes = DownloadBytes(containerName, blobName, byteRange.BeginOffset,
                                byteRange.EndOffset, parallelFactor);

                            newStream.Seek(sig.Offset, SeekOrigin.Begin);
                            newStream.Write(blobBytes, 0, (int)(byteRange.EndOffset - byteRange.BeginOffset + 1));

                            offset += (byteRange.EndOffset - byteRange.BeginOffset + 1);
                        }
                    }
                }
            }

            // rename .new file to original
            File.Replace(localFilePath + ".new", localFilePath,null);
        }
        public static void WriteBinarySizedBasedSignature(SizeBasedCompleteSignature sig, Stream s)
        {
            var writer = new BinaryWriter(s);

            int numberOfSizes = sig.Signatures.Keys.Count;

            // 4 bytes. Number of key sizes.
            writer.Write(numberOfSizes);

            foreach (int keySize in sig.Signatures.Keys)
            {
                // write key size.
                writer.Write(keySize);
                var completeSigForKeySize = sig.Signatures[keySize];

                int numberOfEntries = completeSigForKeySize.SignatureList.Length;

                // number of entries for this key size.
                writer.Write(numberOfEntries);

                foreach (var i in completeSigForKeySize.SignatureList)
                {

                    // 8 bytes
                    writer.Write(i.Offset);

                    // 4 bytes
                    writer.Write(i.Size);

                    // 4 bytes.
                    writer.Write(i.BlockNumber);

                    // 8 bytes.
                    writer.Write(i.RollingSig.Sig1);

                    // 8 bytes.
                    writer.Write(i.RollingSig.Sig2);

                    // should be 16 bytes.
                    foreach (byte b in i.MD5Signature)
                    {
                        writer.Write(b);
                    }
                }
            }
        }