예제 #1
0
        internal static async Task <(List <string> blockList, CloudBlockBlob destBlob)> GetDestinationBlobBlockList(string destStorageAccountName,
                                                                                                                    string destStorageContainerName,
                                                                                                                    string destBlobName,
                                                                                                                    string destSAS,
                                                                                                                    string destStorageAccountKey,
                                                                                                                    string destEndpointSuffix,
                                                                                                                    bool overwriteDest,
                                                                                                                    int retryCount,
                                                                                                                    ILogger logger)
        {
            var destBlockList = new List <string>();

            // get a reference to the destination blob
            var destBlob = BlobHelpers.GetBlockBlob(destStorageAccountName,
                                                    destStorageContainerName,
                                                    destBlobName,
                                                    destSAS,
                                                    destStorageAccountKey,
                                                    destEndpointSuffix,
                                                    retryCount,
                                                    logger);

            if (destBlob is null)
            {
                logger.LogError($"Failed to get a reference to destination conatiner / blob {destBlobName}; exiting!");
                return(destBlockList, destBlob);
            }

            // check if the blob exists, in which case we need to also get the list of blocks associated with that blob
            // this will help to skip blocks which were already completed, and thereby help with resume
            var blockList = await BlobHelpers.GetBlockListForBlob(destBlob, retryCount, logger);

            if (blockList is null)
            {
                // this is when the destination blob does not yet exist
                logger.LogDebug($"Destination blob {destBlobName} does not exist (block listing returned null).");
            }
            else
            {
                // support overwrite by deleting the destination blob
                if (overwriteDest)
                {
                    logger.LogDebug($"Destination blob {destBlobName} exists but needs to be deleted as overwrite == true.");

                    await destBlob.DeleteAsync();

                    logger.LogDebug($"Destination blob {destBlobName} deleted to prepare for overwrite.");
                }
                else
                {
                    logger.LogDebug($"Destination blob {destBlobName} exists; trying to get block listing.");

                    destBlockList = new List <string>(blockList.Select(b => b.Name));

                    logger.LogDebug($"Destination blob {destBlobName} has {destBlockList.Count} blocks.");
                }
            }

            return(destBlockList, destBlob);
        }
예제 #2
0
        /// <summary>
        /// Concatenate a set of blobs - specified either via their prefix or an explicit list of blob names - to a single destination blob
        /// Source blobs must be from a single container. However because of the way the Azure Storage Block Blob works, you can call
        /// this function multiple times, specifying different sets of source blobs each time, and they will simply be "appended" to the destination blob.
        /// </summary>
        /// <param name="sourceStorageAccountName"></param>
        /// <param name="sourceStorageContainerName"></param>
        /// <param name="sourceStorageAccountKey"></param>
        /// <param name="isSourceSAS"></param>
        /// <param name="sourceBlobPrefix"></param>
        /// <param name="sourceEndpointSuffix"></param>
        /// <param name="sortBlobs"></param>
        /// <param name="sourceSAS"></param>
        /// <param name="sourceBlobNames"></param>
        /// <param name="destEndpointSuffix"></param>
        /// <param name="destSAS"></param>
        /// <param name="destStorageAccountName"></param>
        /// <param name="destStorageAccountKey"></param>
        /// <param name="isDestSAS"></param>
        /// <param name="destStorageContainerName"></param>
        /// <param name="destBlobName"></param>
        /// <param name="colHeader"></param>
        /// <param name="calcMD5ForBlocks"></param>
        /// <param name="logger"></param>
        /// <returns></returns>
        public async static Task <bool> BlobToBlob(string sourceStorageAccountName,
                                                   string sourceStorageContainerName,
                                                   string sourceStorageAccountKey,
                                                   string sourceSAS,
                                                   string sourceBlobPrefix,
                                                   string sourceEndpointSuffix,
                                                   bool sortBlobs,
                                                   List <string> sourceBlobNames,
                                                   string destStorageAccountName,
                                                   string destStorageAccountKey,
                                                   string destSAS,
                                                   string destStorageContainerName,
                                                   string destBlobName,
                                                   string destEndpointSuffix,
                                                   string colHeader,
                                                   string fileSeparator,
                                                   bool calcMD5ForBlock,
                                                   bool overwriteDest,
                                                   int timeoutSeconds,
                                                   int maxDOP,
                                                   bool useInbuiltRetry,
                                                   int retryCount,
                                                   ILogger logger,
                                                   IProgress <OpProgress> progress)
        {
            var opProgress = new OpProgress();

            try
            {
                var sw = Stopwatch.StartNew();

                GlobalOptimizations();

                var res = await BlobHelpers.GetDestinationBlobBlockList(destStorageAccountName,
                                                                        destStorageContainerName,
                                                                        destBlobName,
                                                                        destSAS,
                                                                        destStorageAccountKey,
                                                                        destEndpointSuffix,
                                                                        overwriteDest,
                                                                        retryCount,
                                                                        logger);

                var destBlockList = res.blockList;
                var destBlob      = res.destBlob;

                if (destBlockList is null)
                {
                    return(false);
                }

                // create a place holder for the final block list (to be eventually used for put block list) and pre-populate it with the known list of blocks
                // already associated with the destination blob
                var finalBlockList = new List <string>(destBlockList);

                // signal back to the caller with the total block list

                // check if there is a specific list of blobs given by the user, in which case the immediate 'if' code below will be skipped
                if (sourceBlobNames is null || sourceBlobNames.Count == 0)
                {
                    // now just get the blob names, that's all we need for further processing
                    sourceBlobNames = await BlobHelpers.GetBlobListing(sourceStorageAccountName,
                                                                       sourceStorageContainerName,
                                                                       sourceBlobPrefix,
                                                                       sourceSAS,
                                                                       sourceStorageAccountKey,
                                                                       sourceEndpointSuffix,
                                                                       retryCount,
                                                                       logger);

                    // check for null being returned from above in which case we need to exit
                    if (sourceBlobNames is null)
                    {
                        logger.LogError($"Souce blob listing failed to return any results. Exiting!");
                        return(false);
                    }

                    // if the user specified to sort the input blobs (only valid for the prefix case) then we will happily do that!
                    // The gotcha here is that this is a string sort. So if blobs have names like blob_9, blob_13, blob_6, blob_3, blob_1
                    // the sort order will result in blob_1, blob_13, blob_3, blob_6, blob_9.
                    // To avoid issues like this the user must 0-prefix the numbers embedded in the filenames.
                    if (sortBlobs)
                    {
                        sourceBlobNames.Sort();
                    }
                }

                // var sourceBlobItems = new List<BlobItem>();
                var sourceBlockList = new Dictionary <string, List <BlockRangeBase> >();

                // we first need to seed with the column header if it was specified
                PrefixColumnHeaderIfApplicable(sourceBlockList,
                                               colHeader,
                                               sourceEndpointSuffix,
                                               sourceStorageAccountName,
                                               sourceStorageContainerName);

                // it is necessary to use a dictionary to hold the BlobItem because we do want to preserve sort order if necessary
                int tmpBlobIndex = 0;
                foreach (var srcBlob in sourceBlobNames)
                {
                    sourceBlockList.Add(srcBlob, new List <BlockRangeBase>());

                    InjectFileSeparator(sourceBlockList,
                                        fileSeparator,
                                        tmpBlobIndex,
                                        sourceEndpointSuffix,
                                        sourceStorageAccountName,
                                        sourceStorageContainerName);

                    tmpBlobIndex++;
                }

                // sourceBlobItems.AddRange(sourceBlobNames.Select(b => new BlobItem { sourceBlobName = b }));

                // get block lists for all source blobs in parallel. earlier this was done serially and was found to be bottleneck
                tmpBlobIndex = 0;
                Parallel.ForEach(sourceBlobNames, srcBlobName =>
                {
                    Interlocked.Increment(ref tmpBlobIndex);

                    var tmpSrcBlob = BlobHelpers.GetBlockBlob(sourceStorageAccountName,
                                                              sourceStorageContainerName,
                                                              srcBlobName,
                                                              sourceSAS,
                                                              sourceStorageAccountKey,
                                                              sourceEndpointSuffix,
                                                              retryCount,
                                                              logger);

                    if (tmpSrcBlob is null)
                    {
                        // throw exception. this condition will only be entered if a blob name was explicitly specified and it does not really exist
                        throw new StorageException($"An invalid source blob ({srcBlobName}) was specified.");
                    }

                    // first we get the block list of the source blob. we use this to later parallelize the download / copy operation
                    var tmpBlockList = BlobHelpers.GetBlockListForBlob(tmpSrcBlob, retryCount, logger).GetAwaiter().GetResult();

                    // proceed to construct a List<BlobBlockRange> to add to the master dictionary

                    // iterate through the list of blocks and compute their effective offsets in the final file.
                    long currentOffset  = 0;
                    int chunkIndex      = 0;
                    var blocksToBeAdded = new List <BlobBlockRange>();

                    if (tmpBlockList.Count() == 0 && tmpSrcBlob.Properties.Length > 0)
                    {
                        // in case the source blob is smaller then 256 MB (for latest API) then the blob is stored directly without any block list
                        // so in this case the sourceBlockList is 0-length and we need to fake a BlockListItem as null, which will later be handled below
                        var blockLength = tmpSrcBlob.Properties.Length;

                        blocksToBeAdded.Add(new BlobBlockRange(tmpSrcBlob,
                                                               string.Concat(
                                                                   tmpBlobIndex,
                                                                   sourceEndpointSuffix,
                                                                   sourceStorageAccountName,
                                                                   sourceStorageContainerName,
                                                                   srcBlobName,
                                                                   blockLength,
                                                                   chunkIndex),
                                                               currentOffset,
                                                               blockLength));
                    }
                    else
                    {
                        foreach (var blockListItem in tmpBlockList)
                        {
                            var blockLength = blockListItem.Length;

                            // compute a unique blockId based on blob account + container + blob name (includes path) + block length + block "number"
                            // We also add a fileIndex component (tmpBlockIndex), to allow for the same source blob to recur in the list of source blobs
                            blocksToBeAdded.Add(new BlobBlockRange(tmpSrcBlob,
                                                                   string.Concat(
                                                                       tmpBlobIndex,
                                                                       sourceEndpointSuffix,
                                                                       sourceStorageAccountName,
                                                                       sourceStorageContainerName,
                                                                       srcBlobName,
                                                                       blockLength,
                                                                       chunkIndex),
                                                                   currentOffset,
                                                                   blockLength));

                            // increment this here itself as we may potentially skip to the next blob
                            chunkIndex++;
                            currentOffset += blockLength;
                        }
                    }

                    lock (sourceBlockList)
                    {
                        sourceBlockList[srcBlobName].AddRange(blocksToBeAdded);
                    }
                });

                // the total number of "ticks" to be reported will be the number of blocks + the number of blobs
                // this is because each PutBlock is reported separately, as is the PutBlockList when each source blob is finished
                opProgress.TotalTicks = sourceBlockList.Count + sourceBlockList.Values.Select(b => b.Count()).Sum();

                progress.Report(opProgress);

                if (!await BlockRangeWorkers.ProcessSourceBlocks(sourceBlockList,
                                                                 destBlob,
                                                                 destBlockList,
                                                                 finalBlockList,
                                                                 calcMD5ForBlock,
                                                                 timeoutSeconds,
                                                                 maxDOP,
                                                                 useInbuiltRetry,
                                                                 retryCount,
                                                                 opProgress,
                                                                 progress,
                                                                 logger))
                {
                    return(false);
                }

                sw.Stop();

                opProgress.StatusMessage = $"BlobToBlob operation suceeded in {sw.Elapsed.TotalSeconds} seconds.";
                progress.Report(opProgress);
            }
            catch (StorageException ex)
            {
                //opProgress.Percent = 100;
                //opProgress.StatusMessage = "Errors occured. Details in the log.";
                //progress.Report(opProgress);

                BlobHelpers.LogStorageException("Unhandled exception in BlobToBlob", ex, logger, false);

                return(false);
            }

            return(true);
        }