Esempio n. 1
0
        /// <summary>
        /// Reads a block item for block listing.
        /// </summary>
        /// <param name="committed">Whether we are currently listing committed blocks or not</param>
        /// <param name="reader"></param>
        /// <param name="token"></param>
        /// <returns>Block listing entry</returns>
        private static async Task <ListBlockItem> ParseBlockItemAsync(bool committed, XmlReader reader, CancellationToken token)
        {
            ListBlockItem block = new ListBlockItem()
            {
                Committed = committed,
            };

            await reader.ReadStartElementAsync().ConfigureAwait(false);

            while (await reader.IsStartElementAsync().ConfigureAwait(false))
            {
                token.ThrowIfCancellationRequested();
                if (reader.IsEmptyElement)
                {
                    await reader.SkipAsync().ConfigureAwait(false);
                }
                else
                {
                    switch (reader.Name)
                    {
                    case Constants.SizeElement:
                        block.Length = await reader.ReadElementContentAsInt64Async().ConfigureAwait(false);

                        break;

                    case Constants.NameElement:
                        block.Name = await reader.ReadElementContentAsStringAsync().ConfigureAwait(false);

                        break;

                    default:
                        await reader.SkipAsync().ConfigureAwait(false);

                        break;
                    }
                }
            }

            await reader.ReadEndElementAsync().ConfigureAwait(false);

            return(block);
        }
Esempio n. 2
0
        private IList <IListBlockItem <T> > Convert <T>(IList <ProtoListBlockItem> protoListBlockItems)
        {
            if (protoListBlockItems == null)
            {
                return(null);
            }

            var items = new List <IListBlockItem <T> >();

            foreach (var protoItem in protoListBlockItems)
            {
                var item = new ListBlockItem <T>();
                item.LastUpdated     = protoItem.LastUpdated;
                item.ListBlockItemId = protoItem.ListBlockItemId;
                item.Status          = protoItem.Status;
                item.StatusReason    = protoItem.StatusReason;
                item.Step            = protoItem.Step;
                item.Value           = JsonGenericSerializer.Deserialize <T>(protoItem.Value);

                items.Add(item);
            }

            return(items);
        }
Esempio n. 3
0
        public List <ListBlockItem <T> > GetListBlockItems <T>(string blockId, ItemStatus status)
        {
            var items = new List <ListBlockItem <T> >();

            using (var connection = new SqlConnection(TestConstants.TestConnectionString))
            {
                connection.Open();
                var command = connection.CreateCommand();
                command.CommandText = GetListBlockItemsQuery;
                command.Parameters.Add("@BlockId", SqlDbType.BigInt).Value = long.Parse(blockId);
                command.Parameters.Add("@Status", SqlDbType.TinyInt).Value = (byte)status;

                var reader = command.ExecuteReader();
                while (reader.Read())
                {
                    var item = new ListBlockItem <T>();
                    item.ListBlockItemId = reader.GetInt64(0).ToString();
                    item.Value           = JsonGenericSerializer.Deserialize <T>(reader.GetString(1));
                    item.Status          = (ItemStatus)reader.GetByte(2);

                    if (reader[4] != DBNull.Value)
                    {
                        item.StatusReason = reader.GetString(4);
                    }

                    if (reader[5] != DBNull.Value)
                    {
                        item.Step = reader.GetByte(5);
                    }

                    items.Add(item);
                }
            }

            return(items);
        }
Esempio n. 4
0
        static void Main(string[] args)
        {
            var builder = new ConfigurationBuilder()
                          .SetBasePath(Directory.GetCurrentDirectory())
                          .AddJsonFile("appsettings.json");

            Configuration = builder.Build();

            CloudStorageAccount account     = CloudStorageAccount.Parse(Configuration["AppSettings:storageConnection"]);
            CloudQueueClient    queueClient = account.CreateCloudQueueClient();
            CloudQueue          queue       = queueClient.GetQueueReference("blobcreateevents");

            queue.CreateIfNotExistsAsync();

            CloudBlobClient blobClient = account.CreateCloudBlobClient();

            bool          done = false;
            ListBlockItem lastBlockDownloaded = null;
            int           currentBackOff      = 0;
            int           maxBackOff          = 5;

            byte[] bytesFromBlob = new byte[4 * 1024 * 1024];

            do
            {
                Console.ForegroundColor = ConsoleColor.Green;
                Console.WriteLine("Getting messages from the queue.");

                queue.FetchAttributesAsync().Wait();

                int countOfMessages = Convert.ToInt32(queue.ApproximateMessageCount);
                if (countOfMessages > 0)
                {
                    currentBackOff = 1;
                    var msgs = queue.GetMessagesAsync(countOfMessages > 32 ? 32 : countOfMessages).Result;
                    var numberOfMessagesLeftInBatch = msgs.Count();
                    foreach (var msg in msgs)
                    {
                        if (numberOfMessagesLeftInBatch == 1)
                        // process only the last message
                        // all messages are equivalent in meaning
                        // sender usually gets ahead of receiver
                        {
                            JToken token = JObject.Parse(msg.AsString);

                            string resourceId = (string)token.SelectToken("topic");
                            string subject    = (string)token.SelectToken("subject");
                            string eventType  = (string)token.SelectToken("eventType");

                            if (eventType == "Microsoft.Storage.BlobCreated")
                            {
                                var storageAccountName   = resourceId.Split('/')[8];
                                var storageContainerName = subject.Split('/')[4];
                                var blobName             = subject.Split('/')[6];
                                Uri uri = new Uri($"https://{storageAccountName}.blob.core.windows.net/{storageContainerName}/{blobName}");

                                CloudBlockBlob blob = new CloudBlockBlob(uri, blobClient);

                                var blockList = blob.DownloadBlockListAsync().Result;

                                long blobOffset   = 0;
                                var  blocksToCopy = blockList;

                                if (lastBlockDownloaded != null)
                                {
                                    // count the blocks already written
                                    var countOfblocksAlreadyWritten = blockList.TakeWhile(item => (item.Name != lastBlockDownloaded.Name)).Count() + 1;

                                    // get an enumerable of those block list items
                                    var blocksAlreadyWritten = blockList.Take(countOfblocksAlreadyWritten);

                                    // add up the bytes already written
                                    foreach (var block in blocksAlreadyWritten)
                                    {
                                        blobOffset += block.Length;
                                    }

                                    // skip over blocks already written
                                    blocksToCopy = blockList.SkipWhile(item => (item.Name != lastBlockDownloaded.Name)).Skip(1);
                                }

                                if (blocksToCopy.Count() > 0)
                                {
                                    var fs = File.OpenWrite(@"c:\temp\abigfile.dat");
                                    using (BinaryWriter writer = new BinaryWriter(fs))
                                    {
                                        foreach (var block in blocksToCopy)
                                        {
                                            blob.DownloadRangeToByteArrayAsync(bytesFromBlob, 0, blobOffset, block.Length).Wait();

                                            writer.Seek(0, SeekOrigin.End);
                                            writer.Write(bytesFromBlob, 0, (int)block.Length);

                                            blobOffset += block.Length;

                                            Console.ForegroundColor = ConsoleColor.White;
                                            Console.WriteLine($"{block.Name}");
                                        }
                                    };
                                    fs.Close();
                                }

                                lastBlockDownloaded = blockList.Last();
                            }
                        }

                        Console.ForegroundColor = ConsoleColor.Green;
                        Console.WriteLine("Deleting a message from the queue");

                        queue.DeleteMessageAsync(msg);
                        numberOfMessagesLeftInBatch--;
                    }
                }
                else
                {
                    Console.ForegroundColor = ConsoleColor.Green;
                    Console.WriteLine($"Waiting for {currentBackOff} seconds for next message");

                    Thread.Sleep(currentBackOff * 1000);

                    currentBackOff += (currentBackOff < maxBackOff ? 1 : 0);
                }

                // set done flag in some way, or loop forever.
            } while (!done);

            Console.ReadKey();
        }
Esempio n. 5
0
        /// <summary>
        /// Concatenate a set of blobs - specified either via their prefix or an explicit list of blob names - to a single destination blob
        /// Source blobs must be from a single container. However because of the way the Azure Storage Block Blob works, you can call
        /// this function multiple times, specifying different sets of source blobs each time, and they will simply be "appended" to the destination blob.
        /// </summary>
        /// <param name="sourceStorageAccountName"></param>
        /// <param name="sourceStorageContainerName"></param>
        /// <param name="sourceStorageAccountKey"></param>
        /// <param name="sourceBlobPrefix"></param>
        /// <param name="sortBlobs"></param>
        /// <param name="sourceBlobNames"></param>
        /// <param name="destStorageAccountName"></param>
        /// <param name="destStorageAccountKey"></param>
        /// <param name="destStorageContainerName"></param>
        /// <param name="destBlobName"></param>
        public static bool BlobToBlob(string sourceStorageAccountName,
                                      string sourceStorageContainerName,
                                      string sourceStorageAccountKey,
                                      string sourceBlobPrefix,
                                      bool sortBlobs,
                                      List <string> sourceBlobNames,
                                      string destStorageAccountName,
                                      string destStorageAccountKey,
                                      string destStorageContainerName,
                                      string destBlobName)
        {
            // this will be used to compute a unique blockId for the source blocks
            var shaHasher = new SHA384Managed();

            GlobalOptimizations();

            // TODO remove hard-coding of core.windows.net
            string sourceAzureStorageConnStr = $"DefaultEndpointsProtocol=https;AccountName={sourceStorageAccountName};AccountKey={sourceStorageAccountKey};EndpointSuffix=core.windows.net";
            string destAzureStorageConnStr   = $"DefaultEndpointsProtocol=https;AccountName={destStorageAccountName};AccountKey={destStorageAccountKey};EndpointSuffix=core.windows.net";

            var destStorageAccount = CloudStorageAccount.Parse(destAzureStorageConnStr);
            var destBlobClient     = destStorageAccount.CreateCloudBlobClient();
            var destContainer      = destBlobClient.GetContainerReference(destStorageContainerName);
            var destBlob           = destContainer.GetBlockBlobReference(destBlobName);

            List <string> destBlockList = new List <string>();

            // check if the blob exists, in which case we need to also get the list of blocks associated with that blob
            // this will help to skip blocks which were already completed, and thereby help with resume
            // TODO Block IDs are not unique across files - this will trip up the logic
            if (destBlob.ExistsAsync().GetAwaiter().GetResult())
            {
                // only get committed blocks to be sure
                destBlockList = (from b in (destBlob.DownloadBlockListAsync(BlockListingFilter.Committed, null, null, null).GetAwaiter().GetResult()) select b.Name).ToList();
            }

            // create a place holder for the final block list (to be eventually used for put block list) and pre-populate it with the known list of blocks
            // already associated with the destination blob
            var finalBlockList = new List <string>();

            finalBlockList.AddRange(destBlockList);

            var sourceStorageAccount = CloudStorageAccount.Parse(sourceAzureStorageConnStr);
            var sourceBlobClient     = sourceStorageAccount.CreateCloudBlobClient();
            var sourceContainer      = sourceBlobClient.GetContainerReference(sourceStorageContainerName);

            var blobListing = new List <IListBlobItem>();
            BlobContinuationToken continuationToken = null;

            // check if there is a specific list of blobs given by the user, in which case the immediate 'if' code below will be skipped
            if (sourceBlobNames is null || sourceBlobNames.Count == 0)
            {
                // we have a prefix specified, so get a of blobs with a specific prefix and then add them to a list
                do
                {
                    var response = sourceContainer.ListBlobsSegmentedAsync(sourceBlobPrefix, true, BlobListingDetails.None, null, continuationToken, null, null).GetAwaiter().GetResult();
                    continuationToken = response.ContinuationToken;
                    blobListing.AddRange(response.Results);
                }while (continuationToken != null);

                // now just get the blob names, that's all we need for further processing
                sourceBlobNames = (from b in blobListing.OfType <CloudBlockBlob>() select b.Name).ToList();

                // if the user specified to sort the input blobs (only valid for the prefix case) then we will happily do that!
                // The gotcha here is that this is a string sort. So if blobs have names like blob_9, blob_13, blob_6, blob_3, blob_1
                // the sort order will result in blob_1, blob_13, blob_3, blob_6, blob_9.
                // To avoid issues like this the user must 0-prefix the numbers embedded in the filenames.
                if (sortBlobs)
                {
                    sourceBlobNames.Sort();
                }
            }

            // iterate through each source blob, one at a time.
            foreach (var sourceBlobName in sourceBlobNames)
            {
                Debug.WriteLine($"{DateTime.Now}: START {sourceBlobName}");

                var sourceBlob = sourceContainer.GetBlockBlobReference(sourceBlobName);

                // first we get the block list of the source blob. we use this to later parallelize the download / copy operation
                var sourceBlockList = sourceBlob.DownloadBlockListAsync(BlockListingFilter.Committed, null, null, null).GetAwaiter().GetResult();

                // in case the source blob is smaller then 256 MB (for latest API) then the blob is stored directly without any block list
                // so in this case the sourceBlockList is 0-length and we need to fake a BlockListItem as null, which will later be handled below
                if (sourceBlockList.Count() == 0 && sourceBlob.Properties.Length > 0)
                {
                    ListBlockItem fakeBlockItem = null;
                    sourceBlockList = sourceBlockList.Concat(new[] { fakeBlockItem });
                }

                var  blockRanges   = new List <BlockRange>();
                long currentOffset = 0;

                // iterate through the list of blocks and compute their effective offsets in the final file.
                int chunkIndex = 0;
                foreach (var blockListItem in sourceBlockList)
                {
                    // handle special case when the sourceBlob was put using PutBlob and has no blocks
                    var blockLength = blockListItem == null ? sourceBlob.Properties.Length : blockListItem.Length;

                    // compute a unique blockId based on blob account + container + blob name (includes path) + block length + block "number"
                    // TODO also include the endpoint when we generalize for all clouds
                    var hashBasis  = System.Text.Encoding.UTF8.GetBytes(string.Concat(sourceStorageAccountName, sourceStorageContainerName, sourceBlobName, blockLength, chunkIndex));
                    var newBlockId = Convert.ToBase64String(shaHasher.ComputeHash(hashBasis));

                    var newBlockRange = new BlockRange()
                    {
                        Name        = newBlockId,
                        StartOffset = currentOffset,
                        Length      = blockLength
                    };

                    // increment this here itself as we may potentially skip to the next blob
                    chunkIndex++;
                    currentOffset += blockLength;

                    // check if this block has already been copied + committed at the destination, and in that case, skip it
                    if (destBlockList.Contains(newBlockId))
                    {
                        continue;
                    }
                    else
                    {
                        blockRanges.Add(newBlockRange);
                    }
                }

                Debug.WriteLine($"Number of ranges: {blockRanges.Count}");

                // reset back to 0 to actually execute the copies
                currentOffset = 0;

                // proceed to copy blocks in parallel. to do this, we download to a local memory stream and then push that back out to the destination blob
                Parallel.ForEach <BlockRange, MD5CryptoServiceProvider>(blockRanges, new ParallelOptions()
                {
                    MaxDegreeOfParallelism = Environment.ProcessorCount * 8
                },
                                                                        () =>
                {
                    // this will be a "task-local" copy. Better than creating this within the task, as that will be slighly expensive.
                    return(new MD5CryptoServiceProvider());
                },
                                                                        (currRange, loopState, hasher) =>
                {
                    // TODO do we really need this copy?
                    var sourceBlobLocalCopy = sourceBlob; // sourceContainer.GetBlockBlobReference(sourceBlobName);

                    // TODO verify cast
                    using (var memStream = new MemoryStream((int)currRange.Length))
                    {
                        sourceBlobLocalCopy.DownloadRangeToStreamAsync(memStream, currRange.StartOffset, currRange.Length).GetAwaiter().GetResult();

                        // compute the hash, for which we need to reset the memory stream
                        memStream.Position = 0;
                        var md5Checksum    = hasher.ComputeHash(memStream);

                        // reset the memory stream again to 0 and then call Azure Storage to put this as a block with the given block ID and MD5 hash
                        memStream.Position = 0;

                        Debug.WriteLine($"Putting block {currRange.Name}");

                        destBlob.PutBlockAsync(currRange.Name, memStream, Convert.ToBase64String(md5Checksum)).GetAwaiter().GetResult();
                    }

                    return(hasher);
                },
                                                                        (hasherFinally) => { }
                                                                        );

                // keep adding the blocks we just copied, to the final block list
                finalBlockList.AddRange(from r in blockRanges select r.Name);

                // TODO review this whether we put this for each source file or at the end of all source files
                // when we are all done, execute a "commit" by using Put Block List operation
                destBlob.PutBlockListAsync(finalBlockList).GetAwaiter().GetResult();

                Debug.WriteLine($"{DateTime.Now}: END {sourceBlobName}");

                // TODO optionally allow user to specify extra character(s) to append in between files. This is typically needed when the source files do not have a trailing \n character.
            }

            // release the SHA hasher resources
            shaHasher.Dispose();

            // TODO handle failures.
            return(true);
        }
Esempio n. 6
0
        /// <summary>
        /// Appends a stream's contents to the end of a blob (creating the blob and appending a header if the blob doesn't exist).
        /// Before calling this method, set the blob's ContentType Property (ex: "text/csv; charset=utf-8")
        /// </summary>
        /// <param name="blob">The blob to have the byte array appended to.</param>
        /// <param name="data">The stream with contents to append.</param>
        /// <param name="header">The header byte array to put in the blob if the blob is being created.</param>
        /// <param name="maxBlockSize">The maximum block sized in bytes (0=Azure default of 4MB)</param>
        /// <param name="options">A Microsoft.WindowsAzure.Storage.Blob.BlobRequestOptions object that specifies additional options for the request.</param>
        /// <param name="operationContext">An Microsoft.WindowsAzure.Storage.OperationContext object that represents the context for the current operation.</param>
        /// <param name="cancellationToken">A System.Threading.CancellationToken to observe while waiting for a task to complete.</param>
        /// <returns>A System.Threading.Tasks.Task object that represents the current operation.</returns>
        public static async Task AppendAsync(this CloudBlockBlob blob, Stream data, Byte[] header, Int32 maxBlockSize = 0,
                                             BlobRequestOptions options          = null, OperationContext operationContext = null,
                                             CancellationToken cancellationToken = default(CancellationToken))
        {
            if (maxBlockSize == 0)
            {
                maxBlockSize = 4 * 1024 * 1024;
            }
            if (data.Length > maxBlockSize)
            {
                throw new ArgumentOutOfRangeException("data", "A single data object cannot be larger than " + maxBlockSize.ToString() + " bytes.");
            }
            if (header != null && header.Length > maxBlockSize)
            {
                throw new ArgumentOutOfRangeException("header", "The header cannot be larger than " + maxBlockSize.ToString() + " bytes.");
            }
            while (true)
            {
                using (var ms = new MemoryStream()) {
                    AccessCondition             accessCondition;
                    IEnumerable <ListBlockItem> blockList = Enumerable.Empty <ListBlockItem>();
                    try {
                        blockList = await blob.DownloadBlockListAsync(BlockListingFilter.Committed,
                                                                      null, options, operationContext, cancellationToken).ConfigureAwait(false); // 404 if blob not found

                        accessCondition = AccessCondition.GenerateIfMatchCondition(blob.Properties.ETag);                                        // Write if blob matches what we read
                    }
                    catch (StorageException se) {
                        if (!se.Matches(HttpStatusCode.NotFound, BlobErrorCodeStrings.BlobNotFound))
                        {
                            throw;
                        }
                        accessCondition = AccessCondition.GenerateIfNoneMatchCondition("*"); // Write if blob doesn't exist
                    }
                    try {
                        List <String> blockIds  = blockList.Select(lbi => lbi.Name).ToList();
                        ListBlockItem lastBlock = blockList.LastOrDefault();
                        if (lastBlock == null)
                        {
                            if (header != null)
                            {
                                // No blocks exist, add header (if specified)
                                ms.Write(header, 0, header.Length);
                            }
                        }
                        else
                        {
                            // A block exists, can it hold the new data?
                            if (lastBlock.Length + data.Length < maxBlockSize)
                            {
                                // Yes, download the last block's current data as long as the blob's etag hasn't changed
                                Int64 lastBlockOffset = blockList.Sum(lbi => lbi.Length) - lastBlock.Length;
                                await blob.DownloadRangeToStreamAsync(ms, lastBlockOffset, lastBlock.Length,
                                                                      accessCondition, options, operationContext, cancellationToken).ConfigureAwait(false); // 412 if blob modified behind our back

                                blockIds.Remove(lastBlock.Name);                                                                                            // Remove the block we're appending to
                            }
                        }
                        await data.CopyToAsync(ms).ConfigureAwait(false); // Append new data to end of stream

                        ms.Seek(0, SeekOrigin.Begin);
                        // Upload new block and append it to the blob
                        String newBlockId = Guid.NewGuid().ToString("N").Encode().ToBase64String();
                        blockIds.Add(newBlockId);                                                                     // Append new block to end of blob
                        await blob.PutBlockAsync(newBlockId, ms, null, accessCondition,
                                                 options, operationContext, cancellationToken).ConfigureAwait(false); // PutBlock ignores access condition so this always succeeds

                        await blob.PutBlockListAsync(blockIds, accessCondition,
                                                     options, operationContext, cancellationToken).ConfigureAwait(false); // 409 if blob created behind our back; 400 if block ID doesn't exist (happens in another PC calls PutBlockList after our PutBlock)

                        break;                                                                                            // If successful, we're done; don't retry
                    }
                    catch (StorageException se) {
                        // Blob got created behind our back, retry
                        if (se.Matches(HttpStatusCode.Conflict, BlobErrorCodeStrings.BlobAlreadyExists))
                        {
                            continue;
                        }

                        // Blob got created or modified behind our back, retry
                        if (se.Matches(HttpStatusCode.PreconditionFailed))
                        {
                            continue;
                        }

                        // Another PC called PutBlockList between our PutBlock & PutBlockList,
                        // our block got destroyed, retry
                        if (se.Matches(HttpStatusCode.BadRequest, BlobErrorCodeStrings.InvalidBlockList))
                        {
                            continue;
                        }
                        throw;
                    }
                }
            }
        }