Exemple #1
0
        // updates blob if possible.
        // if blob doesn't already exist OR does not have a signature file
        // then we just upload as usual.
        public long UploadFile(string containerName, string blobName, string localFilePath)
        {
            var fileLength = CommonOps.GetFileSize(localFilePath);

            // not used here but is cached for later.
            // WORK IN PROGRESS DONT ERASE THIS LINE.
            //ConfigHelper.GetSignatureSize(fileLength, true);

            // 1) Does remote blob exist?
            // 2) if so, download existing signature for blob.
            if (AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName))
            {
                // 3) If blob exists and have signature, then let the magic begin.
                // 3.1) Download existing blob signature from Azure.
                // 3.2) Search through local file for matches in existing blob signature.
                // 3.3) Upload differences to Azure
                // 3.4) Upload new signature.s

                var blobSig       = DownloadSignatureForBlob(containerName, blobName);
                var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig);
                var allBlocks     = UploadDelta(localFilePath, searchResults, containerName, blobName);
                var sig           = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks);

                UploadSignatureForBlob(blobName, containerName, sig);

                long bytesUploaded = allBlocks.Where(b => b.IsNew).Select(b => b.Size).Sum();

                return(bytesUploaded);
            }
            else
            {
                // 4) If blob or signature does NOT exist, just upload as normal. No tricky stuff to do here.
                // 4.1) Generate signature and upload it.

                var remainingBytes = new RemainingBytes()
                {
                    BeginOffset = 0,
                    EndOffset   = fileLength - 1
                };

                var allUploadedBlocks = UploadBytesParallel(remainingBytes, localFilePath, containerName, blobName);
                // var allUploadedBlocks = UploadBytes(remainingBytes, localFilePath, containerName, blobName);

                var res = (from b in allUploadedBlocks orderby b.Offset ascending select b.BlockId);
                PutBlockList(res.ToArray(), containerName, blobName);

                var sig = CommonOps.CreateSignatureForLocalFile(localFilePath);
                UploadSignatureForBlob(blobName, containerName, sig);

                return(fileLength);
            }
        }
Exemple #2
0
        // updates blob if possible.
        // if blob doesn't already exist OR does not have a signature file
        // then we just upload as usual.
        public SizeBasedCompleteSignature GenerateDeltaSigFromLocalResources(string localSigPath, string localFilePath)
        {
            using (var fs = new FileStream(localSigPath, FileMode.Open))
            {
                var sig           = SerializationHelper.ReadSizeBasedBinarySignature(fs);
                var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, sig);
                var allBlocks     = UploadDelta(localFilePath, searchResults, null, null, true);

                var newSig = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks);


                return(newSig);
            }
        }
Exemple #3
0
        // updates blob if possible.
        // if blob doesn't already exist OR does not have a signature file
        // then we just upload as usual.
        public long CalculateDeltaSizeFromLocalSig(string localSigPath, string localFilePath)
        {
            using (var fs = new FileStream(localSigPath, FileMode.Open))
            {
                var sig           = SerializationHelper.ReadSizeBasedBinarySignature(fs);
                var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, sig);

                long total = 0;
                foreach (var remainingBytes in searchResults.ByteRangesToUpload)
                {
                    total += (remainingBytes.EndOffset - remainingBytes.BeginOffset + 1);
                }

                return(total);
            }
        }
Exemple #4
0
        /// <summary>
        /// Yes, copying the byte array to here. But given we'll not have many of these tasks going to parallel
        /// and each byte array is AT MOST 4M, I think I can live with the memory overhead.
        /// </summary>
        /// <param name="offset"></param>
        /// <param name="bytesRead"></param>
        /// <param name="bytesToRead"></param>
        /// <param name="blob"></param>
        /// <param name="uploadedBlockList"></param>
        /// <param name="testMode"></param>
        /// <returns></returns>
        private Task WriteBytes(long offset, int bytesRead, byte[] bytesToRead, CloudBlockBlob blob, ConcurrentBag <UploadedBlock> uploadedBlockList, bool testMode)
        {
            var t = Task.Factory.StartNew(() =>
            {
                var sig     = CommonOps.GenerateBlockSig(bytesToRead, offset, (int)bytesRead, 0);
                var blockId = Convert.ToBase64String(sig.MD5Signature);

                bool isDupe = false;
                lock (parallelLock)
                {
                    isDupe = uploadedBlockList.Any(ub => ub.BlockId == blockId);

                    // store the block id that is associated with this byte range.
                    uploadedBlockList.Add(new UploadedBlock()
                    {
                        BlockId     = blockId,
                        Offset      = offset,
                        Sig         = sig,
                        Size        = bytesRead,
                        IsNew       = true,
                        IsDuplicate = isDupe
                    });
                }

                if (!testMode)
                {
                    if (!isDupe)
                    {
                        // yes, putting into memory stream is probably a waste here.
                        using (var ms = new MemoryStream(bytesToRead))
                        {
                            var options = new BlobRequestOptions()
                            {
                                ServerTimeout = new TimeSpan(0, 90, 0)
                            };
                            blob.PutBlock(blockId, ms, null, null, options);
                        }
                    }
                }
            });

            return(t);
        }
Exemple #5
0
        // updates blob if possible.
        // if blob doesn't already exist OR does not have a signature file
        // then we just upload as usual.
        public long CalculateDeltaSize(string containerName, string blobName, string localFilePath)
        {
            // 1) Does remote blob exist?
            // 2) if so, download existing signature for blob.
            if (!string.IsNullOrEmpty(blobName) && !string.IsNullOrEmpty(containerName) && AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName))
            {
                // 3) If blob exists and have signature, then let the magic begin.
                // 3.1) Download existing blob signature from Azure.
                // 3.2) Search through local file for matches in existing blob signature.
                // 3.3) Upload differences to Azure
                // 3.4) Upload new signature.s

                var blobSig       = DownloadSignatureForBlob(containerName, blobName);
                var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig);

                long total = 0;
                foreach (var remainingBytes in searchResults.ByteRangesToUpload)
                {
                    total += (remainingBytes.EndOffset - remainingBytes.BeginOffset);
                }

                return(total);
            }
            else
            {
                var fileLength = CommonOps.GetFileSize(localFilePath);

                var remainingBytes = new RemainingBytes()
                {
                    BeginOffset = 0,
                    EndOffset   = fileLength - 1
                };

                // upload all bytes of new file. UploadBytes method will break into appropriate sized blocks.
                var allUploadedBlocks = UploadBytes(remainingBytes, localFilePath, containerName, blobName, true);

                var sizeUploaded = allUploadedBlocks.Where(b => !b.IsDuplicate).Sum(b => b.Size);

                return(sizeUploaded);
            }
        }
Exemple #6
0
        public long DownloadBlob(string containerName, string blobName, string localFilePath, int parallelFactor = 2)
        {
            long bytesDownloaded = 0;

            if (CommonOps.DoesFileExist(localFilePath))
            {
                // local file exists.
                // 1) generate sig for local file.
                // 2) download sig for blob.

                var blobSig       = DownloadSignatureForBlob(containerName, blobName);
                var localSig      = CommonOps.CreateSignatureForLocalFile(localFilePath);
                var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig);

                // we now have a list of which blocks are already in the local file (searchResults.SignaturesToReuse)
                // We need to then determine the byteranges which are NOT covered by these blocks
                // and download those.
                // Then we need to get the blocks that already exist in the local file, read those then write them to the new file.
                var byteRangesToDownload = GenerateByteRangesOfBlobToDownload(searchResults.SignaturesToReuse, blobSig,
                                                                              containerName, blobName);

                RegenerateBlob(containerName, blobName, byteRangesToDownload, localFilePath, searchResults.SignaturesToReuse, blobSig, parallelFactor);

                foreach (var byteRange in byteRangesToDownload)
                {
                    bytesDownloaded += byteRange.EndOffset - byteRange.BeginOffset;
                }
            }
            else
            {
                // download fresh copy.
                // get stream to store.
                using (var stream = CommonHelper.GetStream(localFilePath))
                {
                    bytesDownloaded = DownloadBlob(containerName, blobName, stream, parallelFactor);
                }
            }

            return(bytesDownloaded);
        }
Exemple #7
0
        // regenerate blob locally.
        // we need to either download byte ranges from Azure.
        // OR
        // need to copy from local file.
        private void RegenerateBlob(string containerName, string blobName, List <RemainingBytes> byteRangesToDownload, string localFilePath, List <BlockSignature> reusableBlockSignatures, SizeBasedCompleteSignature blobSig, int parallelFactor = 2)
        {
            // removing size from the equation.
            var allBlobSigs =
                blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList();

            // LUT to see if block is to be reused or not.
            var reusableBlockDict = CommonOps.GenerateBlockDict(reusableBlockSignatures.ToArray());

            var offset = 0L;

            using (var localStream = new FileStream(localFilePath, FileMode.Open))
                using (var newStream = new FileStream(localFilePath + ".new", FileMode.Create))
                {
                    // go through all sigs in offset order....  determine if can reuse or need to download.
                    foreach (var sig in allBlobSigs)
                    {
                        var haveMatch = false;
                        if (reusableBlockDict.ContainsKey(sig.RollingSig))
                        {
                            // have a match... so will reuse local file.
                            var localSig = reusableBlockDict[sig.RollingSig];

                            var matchingLocalSigs =
                                localSig.Where(s => s.MD5Signature.SequenceEqual(sig.MD5Signature))
                                .Select(n => n)
                                .ToList();

                            if (matchingLocalSigs.Any())
                            {
                                // have a match.
                                var matchingLocalSig = matchingLocalSigs[0];

                                // huge amount of wasted allocations...  maybe move this.
                                var buffer = new byte[matchingLocalSig.Size];

                                localStream.Seek(matchingLocalSig.Offset, SeekOrigin.Begin);
                                localStream.Read(buffer, 0, (int)matchingLocalSig.Size);

                                newStream.Seek(sig.Offset, SeekOrigin.Begin);
                                newStream.Write(buffer, 0, (int)matchingLocalSig.Size);

                                haveMatch = true;
                                offset   += matchingLocalSig.Size;
                            }
                        }

                        if (!haveMatch)
                        {
                            // check if we have byte ranges starting at offset.
                            var byteRange =
                                (from b in byteRangesToDownload where b.BeginOffset == offset select b).FirstOrDefault();
                            if (byteRange != null)
                            {
                                // download bytes.
                                var blobBytes = DownloadBytes(containerName, blobName, byteRange.BeginOffset,
                                                              byteRange.EndOffset, parallelFactor);

                                newStream.Seek(sig.Offset, SeekOrigin.Begin);
                                newStream.Write(blobBytes, 0, (int)(byteRange.EndOffset - byteRange.BeginOffset + 1));

                                offset += (byteRange.EndOffset - byteRange.BeginOffset + 1);
                            }
                        }
                    }
                }

            // rename .new file to original
            File.Replace(localFilePath + ".new", localFilePath, null);
        }
Exemple #8
0
        // updates blob if possible.
        // if blob doesn't already exist OR does not have a signature file
        // then we just upload as usual.
        public long UploadFile(string containerName, string blobName, string localFilePath, int parallelFactor = 2)
        {
            var fileLength = CommonOps.GetFileSize(localFilePath);
            var sw         = new Stopwatch();

            sw.Start();
            var md5ForFile = GetFileMD5(localFilePath);

            // 1) Does remote blob exist?
            // 2) if so, download existing signature for blob.
            if (AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName))
            {
                var md5ForBlob = GetBlobMD5(containerName, blobName);

                // only continue if files are actually different.
                if (md5ForBlob != md5ForFile)
                {
                    // 3) If blob exists and have signature, then let the magic begin.
                    // 3.1) Download existing blob signature from Azure.
                    // 3.2) Search through local file for matches in existing blob signature.
                    // 3.3) Upload differences to Azure
                    // 3.4) Upload new signature.s

                    var blobSig = DownloadSignatureForBlob(containerName, blobName);
                    Console.WriteLine(string.Format("Dowloaded sig {0}ms", sw.ElapsedMilliseconds));

                    var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig);

                    Console.WriteLine(string.Format("Searched for common {0}ms", sw.ElapsedMilliseconds));

                    var allBlocks = UploadDelta(localFilePath, searchResults, containerName, blobName, parallelFactor: parallelFactor);
                    var sig       = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks);

                    UploadSignatureForBlob(blobName, containerName, sig);

                    // set md5 for entire blob
                    AzureHelper.SetBlobMD5(containerName, blobName, md5ForFile);

                    long bytesUploaded = allBlocks.Where(b => b.IsNew).Select(b => b.Size).Sum();

                    return(bytesUploaded);
                }

                return(0);   // no bytes changed, no bytes uploaded
            }
            else
            {
                // 4) If blob or signature does NOT exist, just upload as normal. No tricky stuff to do here.
                // 4.1) Generate signature and upload it.

                var remainingBytes = new RemainingBytes()
                {
                    BeginOffset = 0,
                    EndOffset   = fileLength - 1
                };

                var allUploadedBlocks = UploadBytesParallel(remainingBytes, localFilePath, containerName, blobName, parallelFactor: parallelFactor);
                var res = (from b in allUploadedBlocks orderby b.Offset ascending select b.BlockId);
                PutBlockList(res.ToArray(), containerName, blobName);

                var sig = CommonOps.CreateSignatureForLocalFile(localFilePath);
                UploadSignatureForBlob(blobName, containerName, sig);

                // set md5 for entire blob
                AzureHelper.SetBlobMD5(containerName, blobName, md5ForFile);

                return(fileLength);
            }
        }
Exemple #9
0
        private List <UploadedBlock> UploadBytes(RemainingBytes remainingBytes, string localFilePath, string containerName, string blobName, bool testMode = false)
        {
            var uploadedBlockList = new List <UploadedBlock>();

            try
            {
                CloudBlockBlob blob = null;
                if (!testMode)
                {
                    var client    = AzureHelper.GetCloudBlobClient();
                    var container = client.GetContainerReference(containerName);
                    container.CreateIfNotExists();
                    blob = container.GetBlockBlobReference(blobName);
                }
                var blockCount =
                    Math.Round((double)(remainingBytes.EndOffset - remainingBytes.BeginOffset + 1) /
                               (double)ConfigHelper.SignatureSize, MidpointRounding.AwayFromZero);

                using (var stream = new FileStream(localFilePath, FileMode.Open))
                {
                    for (var offset = remainingBytes.BeginOffset; offset <= remainingBytes.EndOffset;)
                    {
                        var sizeToRead = offset + ConfigHelper.SignatureSize <= remainingBytes.EndOffset
                            ? ConfigHelper.SignatureSize
                            : remainingBytes.EndOffset - offset + 1;

                        if (sizeToRead == 0)
                        {
                            var error = "";
                        }

                        // seek to the offset we need. Dont forget remaining bytes may be bigger than the signature size
                        // we want to deal with.
                        stream.Seek(offset, SeekOrigin.Begin);
                        var bytesToRead = new byte[sizeToRead];
                        var bytesRead   = stream.Read(bytesToRead, 0, (int)sizeToRead);

                        var sig     = CommonOps.GenerateBlockSig(bytesToRead, offset, (int)sizeToRead, 0);
                        var blockId = Convert.ToBase64String(sig.MD5Signature);
                        var isDupe  = uploadedBlockList.Any(ub => ub.BlockId == blockId);

                        if (!testMode)
                        {
                            // only upload bytes IF another block hasn't already covered it.
                            // unlikely situation I think, but possibly going to happen for
                            // VM images etc where there is lots of "blank space".

                            if (!isDupe)
                            {
                                // yes, putting into memory stream is probably a waste here.
                                using (var ms = new MemoryStream(bytesToRead))
                                {
                                    var options = new BlobRequestOptions()
                                    {
                                        ServerTimeout = new TimeSpan(0, 90, 0)
                                    };
                                    blob.PutBlock(blockId, ms, null, null, options);
                                }
                            }
                        }

                        // store the block id that is associated with this byte range.
                        uploadedBlockList.Add(new UploadedBlock()
                        {
                            BlockId     = blockId,
                            Offset      = offset,
                            Sig         = sig,
                            Size        = bytesRead,
                            IsNew       = true,
                            IsDuplicate = isDupe
                        });

                        offset += sizeToRead;
                    }
                }
            }
            catch (ArgumentException ex)
            {
                // probably bad container.
                Console.WriteLine("Argument Exception " + ex.ToString());
            }
            finally
            {
            }

            return(uploadedBlockList);
        }