// updates blob if possible. // if blob doesn't already exist OR does not have a signature file // then we just upload as usual. public long UploadFile(string containerName, string blobName, string localFilePath) { var fileLength = CommonOps.GetFileSize(localFilePath); // not used here but is cached for later. // WORK IN PROGRESS DONT ERASE THIS LINE. //ConfigHelper.GetSignatureSize(fileLength, true); // 1) Does remote blob exist? // 2) if so, download existing signature for blob. if (AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName)) { // 3) If blob exists and have signature, then let the magic begin. // 3.1) Download existing blob signature from Azure. // 3.2) Search through local file for matches in existing blob signature. // 3.3) Upload differences to Azure // 3.4) Upload new signature.s var blobSig = DownloadSignatureForBlob(containerName, blobName); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig); var allBlocks = UploadDelta(localFilePath, searchResults, containerName, blobName); var sig = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks); UploadSignatureForBlob(blobName, containerName, sig); long bytesUploaded = allBlocks.Where(b => b.IsNew).Select(b => b.Size).Sum(); return(bytesUploaded); } else { // 4) If blob or signature does NOT exist, just upload as normal. No tricky stuff to do here. // 4.1) Generate signature and upload it. var remainingBytes = new RemainingBytes() { BeginOffset = 0, EndOffset = fileLength - 1 }; var allUploadedBlocks = UploadBytesParallel(remainingBytes, localFilePath, containerName, blobName); // var allUploadedBlocks = UploadBytes(remainingBytes, localFilePath, containerName, blobName); var res = (from b in allUploadedBlocks orderby b.Offset ascending select b.BlockId); PutBlockList(res.ToArray(), containerName, blobName); var sig = CommonOps.CreateSignatureForLocalFile(localFilePath); UploadSignatureForBlob(blobName, containerName, sig); return(fileLength); } }
// updates blob if possible. // if blob doesn't already exist OR does not have a signature file // then we just upload as usual. public SizeBasedCompleteSignature GenerateDeltaSigFromLocalResources(string localSigPath, string localFilePath) { using (var fs = new FileStream(localSigPath, FileMode.Open)) { var sig = SerializationHelper.ReadSizeBasedBinarySignature(fs); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, sig); var allBlocks = UploadDelta(localFilePath, searchResults, null, null, true); var newSig = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks); return(newSig); } }
// updates blob if possible. // if blob doesn't already exist OR does not have a signature file // then we just upload as usual. public long CalculateDeltaSizeFromLocalSig(string localSigPath, string localFilePath) { using (var fs = new FileStream(localSigPath, FileMode.Open)) { var sig = SerializationHelper.ReadSizeBasedBinarySignature(fs); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, sig); long total = 0; foreach (var remainingBytes in searchResults.ByteRangesToUpload) { total += (remainingBytes.EndOffset - remainingBytes.BeginOffset + 1); } return(total); } }
/// <summary> /// Yes, copying the byte array to here. But given we'll not have many of these tasks going to parallel /// and each byte array is AT MOST 4M, I think I can live with the memory overhead. /// </summary> /// <param name="offset"></param> /// <param name="bytesRead"></param> /// <param name="bytesToRead"></param> /// <param name="blob"></param> /// <param name="uploadedBlockList"></param> /// <param name="testMode"></param> /// <returns></returns> private Task WriteBytes(long offset, int bytesRead, byte[] bytesToRead, CloudBlockBlob blob, ConcurrentBag <UploadedBlock> uploadedBlockList, bool testMode) { var t = Task.Factory.StartNew(() => { var sig = CommonOps.GenerateBlockSig(bytesToRead, offset, (int)bytesRead, 0); var blockId = Convert.ToBase64String(sig.MD5Signature); bool isDupe = false; lock (parallelLock) { isDupe = uploadedBlockList.Any(ub => ub.BlockId == blockId); // store the block id that is associated with this byte range. uploadedBlockList.Add(new UploadedBlock() { BlockId = blockId, Offset = offset, Sig = sig, Size = bytesRead, IsNew = true, IsDuplicate = isDupe }); } if (!testMode) { if (!isDupe) { // yes, putting into memory stream is probably a waste here. using (var ms = new MemoryStream(bytesToRead)) { var options = new BlobRequestOptions() { ServerTimeout = new TimeSpan(0, 90, 0) }; blob.PutBlock(blockId, ms, null, null, options); } } } }); return(t); }
// updates blob if possible. // if blob doesn't already exist OR does not have a signature file // then we just upload as usual. public long CalculateDeltaSize(string containerName, string blobName, string localFilePath) { // 1) Does remote blob exist? // 2) if so, download existing signature for blob. if (!string.IsNullOrEmpty(blobName) && !string.IsNullOrEmpty(containerName) && AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName)) { // 3) If blob exists and have signature, then let the magic begin. // 3.1) Download existing blob signature from Azure. // 3.2) Search through local file for matches in existing blob signature. // 3.3) Upload differences to Azure // 3.4) Upload new signature.s var blobSig = DownloadSignatureForBlob(containerName, blobName); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig); long total = 0; foreach (var remainingBytes in searchResults.ByteRangesToUpload) { total += (remainingBytes.EndOffset - remainingBytes.BeginOffset); } return(total); } else { var fileLength = CommonOps.GetFileSize(localFilePath); var remainingBytes = new RemainingBytes() { BeginOffset = 0, EndOffset = fileLength - 1 }; // upload all bytes of new file. UploadBytes method will break into appropriate sized blocks. var allUploadedBlocks = UploadBytes(remainingBytes, localFilePath, containerName, blobName, true); var sizeUploaded = allUploadedBlocks.Where(b => !b.IsDuplicate).Sum(b => b.Size); return(sizeUploaded); } }
public long DownloadBlob(string containerName, string blobName, string localFilePath, int parallelFactor = 2) { long bytesDownloaded = 0; if (CommonOps.DoesFileExist(localFilePath)) { // local file exists. // 1) generate sig for local file. // 2) download sig for blob. var blobSig = DownloadSignatureForBlob(containerName, blobName); var localSig = CommonOps.CreateSignatureForLocalFile(localFilePath); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig); // we now have a list of which blocks are already in the local file (searchResults.SignaturesToReuse) // We need to then determine the byteranges which are NOT covered by these blocks // and download those. // Then we need to get the blocks that already exist in the local file, read those then write them to the new file. var byteRangesToDownload = GenerateByteRangesOfBlobToDownload(searchResults.SignaturesToReuse, blobSig, containerName, blobName); RegenerateBlob(containerName, blobName, byteRangesToDownload, localFilePath, searchResults.SignaturesToReuse, blobSig, parallelFactor); foreach (var byteRange in byteRangesToDownload) { bytesDownloaded += byteRange.EndOffset - byteRange.BeginOffset; } } else { // download fresh copy. // get stream to store. using (var stream = CommonHelper.GetStream(localFilePath)) { bytesDownloaded = DownloadBlob(containerName, blobName, stream, parallelFactor); } } return(bytesDownloaded); }
// regenerate blob locally. // we need to either download byte ranges from Azure. // OR // need to copy from local file. private void RegenerateBlob(string containerName, string blobName, List <RemainingBytes> byteRangesToDownload, string localFilePath, List <BlockSignature> reusableBlockSignatures, SizeBasedCompleteSignature blobSig, int parallelFactor = 2) { // removing size from the equation. var allBlobSigs = blobSig.Signatures.Values.SelectMany(x => x.SignatureList).OrderBy(a => a.Offset).ToList(); // LUT to see if block is to be reused or not. var reusableBlockDict = CommonOps.GenerateBlockDict(reusableBlockSignatures.ToArray()); var offset = 0L; using (var localStream = new FileStream(localFilePath, FileMode.Open)) using (var newStream = new FileStream(localFilePath + ".new", FileMode.Create)) { // go through all sigs in offset order.... determine if can reuse or need to download. foreach (var sig in allBlobSigs) { var haveMatch = false; if (reusableBlockDict.ContainsKey(sig.RollingSig)) { // have a match... so will reuse local file. var localSig = reusableBlockDict[sig.RollingSig]; var matchingLocalSigs = localSig.Where(s => s.MD5Signature.SequenceEqual(sig.MD5Signature)) .Select(n => n) .ToList(); if (matchingLocalSigs.Any()) { // have a match. var matchingLocalSig = matchingLocalSigs[0]; // huge amount of wasted allocations... maybe move this. var buffer = new byte[matchingLocalSig.Size]; localStream.Seek(matchingLocalSig.Offset, SeekOrigin.Begin); localStream.Read(buffer, 0, (int)matchingLocalSig.Size); newStream.Seek(sig.Offset, SeekOrigin.Begin); newStream.Write(buffer, 0, (int)matchingLocalSig.Size); haveMatch = true; offset += matchingLocalSig.Size; } } if (!haveMatch) { // check if we have byte ranges starting at offset. var byteRange = (from b in byteRangesToDownload where b.BeginOffset == offset select b).FirstOrDefault(); if (byteRange != null) { // download bytes. var blobBytes = DownloadBytes(containerName, blobName, byteRange.BeginOffset, byteRange.EndOffset, parallelFactor); newStream.Seek(sig.Offset, SeekOrigin.Begin); newStream.Write(blobBytes, 0, (int)(byteRange.EndOffset - byteRange.BeginOffset + 1)); offset += (byteRange.EndOffset - byteRange.BeginOffset + 1); } } } } // rename .new file to original File.Replace(localFilePath + ".new", localFilePath, null); }
// updates blob if possible. // if blob doesn't already exist OR does not have a signature file // then we just upload as usual. public long UploadFile(string containerName, string blobName, string localFilePath, int parallelFactor = 2) { var fileLength = CommonOps.GetFileSize(localFilePath); var sw = new Stopwatch(); sw.Start(); var md5ForFile = GetFileMD5(localFilePath); // 1) Does remote blob exist? // 2) if so, download existing signature for blob. if (AzureHelper.DoesBlobExist(containerName, blobName) && AzureHelper.DoesBlobSignatureExist(containerName, blobName)) { var md5ForBlob = GetBlobMD5(containerName, blobName); // only continue if files are actually different. if (md5ForBlob != md5ForFile) { // 3) If blob exists and have signature, then let the magic begin. // 3.1) Download existing blob signature from Azure. // 3.2) Search through local file for matches in existing blob signature. // 3.3) Upload differences to Azure // 3.4) Upload new signature.s var blobSig = DownloadSignatureForBlob(containerName, blobName); Console.WriteLine(string.Format("Dowloaded sig {0}ms", sw.ElapsedMilliseconds)); var searchResults = CommonOps.SearchLocalFileForSignatures(localFilePath, blobSig); Console.WriteLine(string.Format("Searched for common {0}ms", sw.ElapsedMilliseconds)); var allBlocks = UploadDelta(localFilePath, searchResults, containerName, blobName, parallelFactor: parallelFactor); var sig = CommonOps.CreateSignatureFromNewAndReusedBlocks(allBlocks); UploadSignatureForBlob(blobName, containerName, sig); // set md5 for entire blob AzureHelper.SetBlobMD5(containerName, blobName, md5ForFile); long bytesUploaded = allBlocks.Where(b => b.IsNew).Select(b => b.Size).Sum(); return(bytesUploaded); } return(0); // no bytes changed, no bytes uploaded } else { // 4) If blob or signature does NOT exist, just upload as normal. No tricky stuff to do here. // 4.1) Generate signature and upload it. var remainingBytes = new RemainingBytes() { BeginOffset = 0, EndOffset = fileLength - 1 }; var allUploadedBlocks = UploadBytesParallel(remainingBytes, localFilePath, containerName, blobName, parallelFactor: parallelFactor); var res = (from b in allUploadedBlocks orderby b.Offset ascending select b.BlockId); PutBlockList(res.ToArray(), containerName, blobName); var sig = CommonOps.CreateSignatureForLocalFile(localFilePath); UploadSignatureForBlob(blobName, containerName, sig); // set md5 for entire blob AzureHelper.SetBlobMD5(containerName, blobName, md5ForFile); return(fileLength); } }
private List <UploadedBlock> UploadBytes(RemainingBytes remainingBytes, string localFilePath, string containerName, string blobName, bool testMode = false) { var uploadedBlockList = new List <UploadedBlock>(); try { CloudBlockBlob blob = null; if (!testMode) { var client = AzureHelper.GetCloudBlobClient(); var container = client.GetContainerReference(containerName); container.CreateIfNotExists(); blob = container.GetBlockBlobReference(blobName); } var blockCount = Math.Round((double)(remainingBytes.EndOffset - remainingBytes.BeginOffset + 1) / (double)ConfigHelper.SignatureSize, MidpointRounding.AwayFromZero); using (var stream = new FileStream(localFilePath, FileMode.Open)) { for (var offset = remainingBytes.BeginOffset; offset <= remainingBytes.EndOffset;) { var sizeToRead = offset + ConfigHelper.SignatureSize <= remainingBytes.EndOffset ? ConfigHelper.SignatureSize : remainingBytes.EndOffset - offset + 1; if (sizeToRead == 0) { var error = ""; } // seek to the offset we need. Dont forget remaining bytes may be bigger than the signature size // we want to deal with. stream.Seek(offset, SeekOrigin.Begin); var bytesToRead = new byte[sizeToRead]; var bytesRead = stream.Read(bytesToRead, 0, (int)sizeToRead); var sig = CommonOps.GenerateBlockSig(bytesToRead, offset, (int)sizeToRead, 0); var blockId = Convert.ToBase64String(sig.MD5Signature); var isDupe = uploadedBlockList.Any(ub => ub.BlockId == blockId); if (!testMode) { // only upload bytes IF another block hasn't already covered it. // unlikely situation I think, but possibly going to happen for // VM images etc where there is lots of "blank space". if (!isDupe) { // yes, putting into memory stream is probably a waste here. using (var ms = new MemoryStream(bytesToRead)) { var options = new BlobRequestOptions() { ServerTimeout = new TimeSpan(0, 90, 0) }; blob.PutBlock(blockId, ms, null, null, options); } } } // store the block id that is associated with this byte range. uploadedBlockList.Add(new UploadedBlock() { BlockId = blockId, Offset = offset, Sig = sig, Size = bytesRead, IsNew = true, IsDuplicate = isDupe }); offset += sizeToRead; } } } catch (ArgumentException ex) { // probably bad container. Console.WriteLine("Argument Exception " + ex.ToString()); } finally { } return(uploadedBlockList); }