/// <summary> /// Reads a block item for block listing. /// </summary> /// <param name="committed">Whether we are currently listing committed blocks or not</param> /// <param name="reader"></param> /// <param name="token"></param> /// <returns>Block listing entry</returns> private static async Task <ListBlockItem> ParseBlockItemAsync(bool committed, XmlReader reader, CancellationToken token) { ListBlockItem block = new ListBlockItem() { Committed = committed, }; await reader.ReadStartElementAsync().ConfigureAwait(false); while (await reader.IsStartElementAsync().ConfigureAwait(false)) { token.ThrowIfCancellationRequested(); if (reader.IsEmptyElement) { await reader.SkipAsync().ConfigureAwait(false); } else { switch (reader.Name) { case Constants.SizeElement: block.Length = await reader.ReadElementContentAsInt64Async().ConfigureAwait(false); break; case Constants.NameElement: block.Name = await reader.ReadElementContentAsStringAsync().ConfigureAwait(false); break; default: await reader.SkipAsync().ConfigureAwait(false); break; } } } await reader.ReadEndElementAsync().ConfigureAwait(false); return(block); }
private IList <IListBlockItem <T> > Convert <T>(IList <ProtoListBlockItem> protoListBlockItems) { if (protoListBlockItems == null) { return(null); } var items = new List <IListBlockItem <T> >(); foreach (var protoItem in protoListBlockItems) { var item = new ListBlockItem <T>(); item.LastUpdated = protoItem.LastUpdated; item.ListBlockItemId = protoItem.ListBlockItemId; item.Status = protoItem.Status; item.StatusReason = protoItem.StatusReason; item.Step = protoItem.Step; item.Value = JsonGenericSerializer.Deserialize <T>(protoItem.Value); items.Add(item); } return(items); }
public List <ListBlockItem <T> > GetListBlockItems <T>(string blockId, ItemStatus status) { var items = new List <ListBlockItem <T> >(); using (var connection = new SqlConnection(TestConstants.TestConnectionString)) { connection.Open(); var command = connection.CreateCommand(); command.CommandText = GetListBlockItemsQuery; command.Parameters.Add("@BlockId", SqlDbType.BigInt).Value = long.Parse(blockId); command.Parameters.Add("@Status", SqlDbType.TinyInt).Value = (byte)status; var reader = command.ExecuteReader(); while (reader.Read()) { var item = new ListBlockItem <T>(); item.ListBlockItemId = reader.GetInt64(0).ToString(); item.Value = JsonGenericSerializer.Deserialize <T>(reader.GetString(1)); item.Status = (ItemStatus)reader.GetByte(2); if (reader[4] != DBNull.Value) { item.StatusReason = reader.GetString(4); } if (reader[5] != DBNull.Value) { item.Step = reader.GetByte(5); } items.Add(item); } } return(items); }
static void Main(string[] args) { var builder = new ConfigurationBuilder() .SetBasePath(Directory.GetCurrentDirectory()) .AddJsonFile("appsettings.json"); Configuration = builder.Build(); CloudStorageAccount account = CloudStorageAccount.Parse(Configuration["AppSettings:storageConnection"]); CloudQueueClient queueClient = account.CreateCloudQueueClient(); CloudQueue queue = queueClient.GetQueueReference("blobcreateevents"); queue.CreateIfNotExistsAsync(); CloudBlobClient blobClient = account.CreateCloudBlobClient(); bool done = false; ListBlockItem lastBlockDownloaded = null; int currentBackOff = 0; int maxBackOff = 5; byte[] bytesFromBlob = new byte[4 * 1024 * 1024]; do { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Getting messages from the queue."); queue.FetchAttributesAsync().Wait(); int countOfMessages = Convert.ToInt32(queue.ApproximateMessageCount); if (countOfMessages > 0) { currentBackOff = 1; var msgs = queue.GetMessagesAsync(countOfMessages > 32 ? 32 : countOfMessages).Result; var numberOfMessagesLeftInBatch = msgs.Count(); foreach (var msg in msgs) { if (numberOfMessagesLeftInBatch == 1) // process only the last message // all messages are equivalent in meaning // sender usually gets ahead of receiver { JToken token = JObject.Parse(msg.AsString); string resourceId = (string)token.SelectToken("topic"); string subject = (string)token.SelectToken("subject"); string eventType = (string)token.SelectToken("eventType"); if (eventType == "Microsoft.Storage.BlobCreated") { var storageAccountName = resourceId.Split('/')[8]; var storageContainerName = subject.Split('/')[4]; var blobName = subject.Split('/')[6]; Uri uri = new Uri($"https://{storageAccountName}.blob.core.windows.net/{storageContainerName}/{blobName}"); CloudBlockBlob blob = new CloudBlockBlob(uri, blobClient); var blockList = blob.DownloadBlockListAsync().Result; long blobOffset = 0; var blocksToCopy = blockList; if (lastBlockDownloaded != null) { // count the blocks already written var countOfblocksAlreadyWritten = blockList.TakeWhile(item => (item.Name != lastBlockDownloaded.Name)).Count() + 1; // get an enumerable of those block list items var blocksAlreadyWritten = blockList.Take(countOfblocksAlreadyWritten); // add up the bytes already written foreach (var block in blocksAlreadyWritten) { blobOffset += block.Length; } // skip over blocks already written blocksToCopy = blockList.SkipWhile(item => (item.Name != lastBlockDownloaded.Name)).Skip(1); } if (blocksToCopy.Count() > 0) { var fs = File.OpenWrite(@"c:\temp\abigfile.dat"); using (BinaryWriter writer = new BinaryWriter(fs)) { foreach (var block in blocksToCopy) { blob.DownloadRangeToByteArrayAsync(bytesFromBlob, 0, blobOffset, block.Length).Wait(); writer.Seek(0, SeekOrigin.End); writer.Write(bytesFromBlob, 0, (int)block.Length); blobOffset += block.Length; Console.ForegroundColor = ConsoleColor.White; Console.WriteLine($"{block.Name}"); } }; fs.Close(); } lastBlockDownloaded = blockList.Last(); } } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Deleting a message from the queue"); queue.DeleteMessageAsync(msg); numberOfMessagesLeftInBatch--; } } else { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine($"Waiting for {currentBackOff} seconds for next message"); Thread.Sleep(currentBackOff * 1000); currentBackOff += (currentBackOff < maxBackOff ? 1 : 0); } // set done flag in some way, or loop forever. } while (!done); Console.ReadKey(); }
/// <summary> /// Concatenate a set of blobs - specified either via their prefix or an explicit list of blob names - to a single destination blob /// Source blobs must be from a single container. However because of the way the Azure Storage Block Blob works, you can call /// this function multiple times, specifying different sets of source blobs each time, and they will simply be "appended" to the destination blob. /// </summary> /// <param name="sourceStorageAccountName"></param> /// <param name="sourceStorageContainerName"></param> /// <param name="sourceStorageAccountKey"></param> /// <param name="sourceBlobPrefix"></param> /// <param name="sortBlobs"></param> /// <param name="sourceBlobNames"></param> /// <param name="destStorageAccountName"></param> /// <param name="destStorageAccountKey"></param> /// <param name="destStorageContainerName"></param> /// <param name="destBlobName"></param> public static bool BlobToBlob(string sourceStorageAccountName, string sourceStorageContainerName, string sourceStorageAccountKey, string sourceBlobPrefix, bool sortBlobs, List <string> sourceBlobNames, string destStorageAccountName, string destStorageAccountKey, string destStorageContainerName, string destBlobName) { // this will be used to compute a unique blockId for the source blocks var shaHasher = new SHA384Managed(); GlobalOptimizations(); // TODO remove hard-coding of core.windows.net string sourceAzureStorageConnStr = $"DefaultEndpointsProtocol=https;AccountName={sourceStorageAccountName};AccountKey={sourceStorageAccountKey};EndpointSuffix=core.windows.net"; string destAzureStorageConnStr = $"DefaultEndpointsProtocol=https;AccountName={destStorageAccountName};AccountKey={destStorageAccountKey};EndpointSuffix=core.windows.net"; var destStorageAccount = CloudStorageAccount.Parse(destAzureStorageConnStr); var destBlobClient = destStorageAccount.CreateCloudBlobClient(); var destContainer = destBlobClient.GetContainerReference(destStorageContainerName); var destBlob = destContainer.GetBlockBlobReference(destBlobName); List <string> destBlockList = new List <string>(); // check if the blob exists, in which case we need to also get the list of blocks associated with that blob // this will help to skip blocks which were already completed, and thereby help with resume // TODO Block IDs are not unique across files - this will trip up the logic if (destBlob.ExistsAsync().GetAwaiter().GetResult()) { // only get committed blocks to be sure destBlockList = (from b in (destBlob.DownloadBlockListAsync(BlockListingFilter.Committed, null, null, null).GetAwaiter().GetResult()) select b.Name).ToList(); } // create a place holder for the final block list (to be eventually used for put block list) and pre-populate it with the known list of blocks // already associated with the destination blob var finalBlockList = new List <string>(); finalBlockList.AddRange(destBlockList); var sourceStorageAccount = CloudStorageAccount.Parse(sourceAzureStorageConnStr); var sourceBlobClient = sourceStorageAccount.CreateCloudBlobClient(); var sourceContainer = sourceBlobClient.GetContainerReference(sourceStorageContainerName); var blobListing = new List <IListBlobItem>(); BlobContinuationToken continuationToken = null; // check if there is a specific list of blobs given by the user, in which case the immediate 'if' code below will be skipped if (sourceBlobNames is null || sourceBlobNames.Count == 0) { // we have a prefix specified, so get a of blobs with a specific prefix and then add them to a list do { var response = sourceContainer.ListBlobsSegmentedAsync(sourceBlobPrefix, true, BlobListingDetails.None, null, continuationToken, null, null).GetAwaiter().GetResult(); continuationToken = response.ContinuationToken; blobListing.AddRange(response.Results); }while (continuationToken != null); // now just get the blob names, that's all we need for further processing sourceBlobNames = (from b in blobListing.OfType <CloudBlockBlob>() select b.Name).ToList(); // if the user specified to sort the input blobs (only valid for the prefix case) then we will happily do that! // The gotcha here is that this is a string sort. So if blobs have names like blob_9, blob_13, blob_6, blob_3, blob_1 // the sort order will result in blob_1, blob_13, blob_3, blob_6, blob_9. // To avoid issues like this the user must 0-prefix the numbers embedded in the filenames. if (sortBlobs) { sourceBlobNames.Sort(); } } // iterate through each source blob, one at a time. foreach (var sourceBlobName in sourceBlobNames) { Debug.WriteLine($"{DateTime.Now}: START {sourceBlobName}"); var sourceBlob = sourceContainer.GetBlockBlobReference(sourceBlobName); // first we get the block list of the source blob. we use this to later parallelize the download / copy operation var sourceBlockList = sourceBlob.DownloadBlockListAsync(BlockListingFilter.Committed, null, null, null).GetAwaiter().GetResult(); // in case the source blob is smaller then 256 MB (for latest API) then the blob is stored directly without any block list // so in this case the sourceBlockList is 0-length and we need to fake a BlockListItem as null, which will later be handled below if (sourceBlockList.Count() == 0 && sourceBlob.Properties.Length > 0) { ListBlockItem fakeBlockItem = null; sourceBlockList = sourceBlockList.Concat(new[] { fakeBlockItem }); } var blockRanges = new List <BlockRange>(); long currentOffset = 0; // iterate through the list of blocks and compute their effective offsets in the final file. int chunkIndex = 0; foreach (var blockListItem in sourceBlockList) { // handle special case when the sourceBlob was put using PutBlob and has no blocks var blockLength = blockListItem == null ? sourceBlob.Properties.Length : blockListItem.Length; // compute a unique blockId based on blob account + container + blob name (includes path) + block length + block "number" // TODO also include the endpoint when we generalize for all clouds var hashBasis = System.Text.Encoding.UTF8.GetBytes(string.Concat(sourceStorageAccountName, sourceStorageContainerName, sourceBlobName, blockLength, chunkIndex)); var newBlockId = Convert.ToBase64String(shaHasher.ComputeHash(hashBasis)); var newBlockRange = new BlockRange() { Name = newBlockId, StartOffset = currentOffset, Length = blockLength }; // increment this here itself as we may potentially skip to the next blob chunkIndex++; currentOffset += blockLength; // check if this block has already been copied + committed at the destination, and in that case, skip it if (destBlockList.Contains(newBlockId)) { continue; } else { blockRanges.Add(newBlockRange); } } Debug.WriteLine($"Number of ranges: {blockRanges.Count}"); // reset back to 0 to actually execute the copies currentOffset = 0; // proceed to copy blocks in parallel. to do this, we download to a local memory stream and then push that back out to the destination blob Parallel.ForEach <BlockRange, MD5CryptoServiceProvider>(blockRanges, new ParallelOptions() { MaxDegreeOfParallelism = Environment.ProcessorCount * 8 }, () => { // this will be a "task-local" copy. Better than creating this within the task, as that will be slighly expensive. return(new MD5CryptoServiceProvider()); }, (currRange, loopState, hasher) => { // TODO do we really need this copy? var sourceBlobLocalCopy = sourceBlob; // sourceContainer.GetBlockBlobReference(sourceBlobName); // TODO verify cast using (var memStream = new MemoryStream((int)currRange.Length)) { sourceBlobLocalCopy.DownloadRangeToStreamAsync(memStream, currRange.StartOffset, currRange.Length).GetAwaiter().GetResult(); // compute the hash, for which we need to reset the memory stream memStream.Position = 0; var md5Checksum = hasher.ComputeHash(memStream); // reset the memory stream again to 0 and then call Azure Storage to put this as a block with the given block ID and MD5 hash memStream.Position = 0; Debug.WriteLine($"Putting block {currRange.Name}"); destBlob.PutBlockAsync(currRange.Name, memStream, Convert.ToBase64String(md5Checksum)).GetAwaiter().GetResult(); } return(hasher); }, (hasherFinally) => { } ); // keep adding the blocks we just copied, to the final block list finalBlockList.AddRange(from r in blockRanges select r.Name); // TODO review this whether we put this for each source file or at the end of all source files // when we are all done, execute a "commit" by using Put Block List operation destBlob.PutBlockListAsync(finalBlockList).GetAwaiter().GetResult(); Debug.WriteLine($"{DateTime.Now}: END {sourceBlobName}"); // TODO optionally allow user to specify extra character(s) to append in between files. This is typically needed when the source files do not have a trailing \n character. } // release the SHA hasher resources shaHasher.Dispose(); // TODO handle failures. return(true); }
/// <summary> /// Appends a stream's contents to the end of a blob (creating the blob and appending a header if the blob doesn't exist). /// Before calling this method, set the blob's ContentType Property (ex: "text/csv; charset=utf-8") /// </summary> /// <param name="blob">The blob to have the byte array appended to.</param> /// <param name="data">The stream with contents to append.</param> /// <param name="header">The header byte array to put in the blob if the blob is being created.</param> /// <param name="maxBlockSize">The maximum block sized in bytes (0=Azure default of 4MB)</param> /// <param name="options">A Microsoft.WindowsAzure.Storage.Blob.BlobRequestOptions object that specifies additional options for the request.</param> /// <param name="operationContext">An Microsoft.WindowsAzure.Storage.OperationContext object that represents the context for the current operation.</param> /// <param name="cancellationToken">A System.Threading.CancellationToken to observe while waiting for a task to complete.</param> /// <returns>A System.Threading.Tasks.Task object that represents the current operation.</returns> public static async Task AppendAsync(this CloudBlockBlob blob, Stream data, Byte[] header, Int32 maxBlockSize = 0, BlobRequestOptions options = null, OperationContext operationContext = null, CancellationToken cancellationToken = default(CancellationToken)) { if (maxBlockSize == 0) { maxBlockSize = 4 * 1024 * 1024; } if (data.Length > maxBlockSize) { throw new ArgumentOutOfRangeException("data", "A single data object cannot be larger than " + maxBlockSize.ToString() + " bytes."); } if (header != null && header.Length > maxBlockSize) { throw new ArgumentOutOfRangeException("header", "The header cannot be larger than " + maxBlockSize.ToString() + " bytes."); } while (true) { using (var ms = new MemoryStream()) { AccessCondition accessCondition; IEnumerable <ListBlockItem> blockList = Enumerable.Empty <ListBlockItem>(); try { blockList = await blob.DownloadBlockListAsync(BlockListingFilter.Committed, null, options, operationContext, cancellationToken).ConfigureAwait(false); // 404 if blob not found accessCondition = AccessCondition.GenerateIfMatchCondition(blob.Properties.ETag); // Write if blob matches what we read } catch (StorageException se) { if (!se.Matches(HttpStatusCode.NotFound, BlobErrorCodeStrings.BlobNotFound)) { throw; } accessCondition = AccessCondition.GenerateIfNoneMatchCondition("*"); // Write if blob doesn't exist } try { List <String> blockIds = blockList.Select(lbi => lbi.Name).ToList(); ListBlockItem lastBlock = blockList.LastOrDefault(); if (lastBlock == null) { if (header != null) { // No blocks exist, add header (if specified) ms.Write(header, 0, header.Length); } } else { // A block exists, can it hold the new data? if (lastBlock.Length + data.Length < maxBlockSize) { // Yes, download the last block's current data as long as the blob's etag hasn't changed Int64 lastBlockOffset = blockList.Sum(lbi => lbi.Length) - lastBlock.Length; await blob.DownloadRangeToStreamAsync(ms, lastBlockOffset, lastBlock.Length, accessCondition, options, operationContext, cancellationToken).ConfigureAwait(false); // 412 if blob modified behind our back blockIds.Remove(lastBlock.Name); // Remove the block we're appending to } } await data.CopyToAsync(ms).ConfigureAwait(false); // Append new data to end of stream ms.Seek(0, SeekOrigin.Begin); // Upload new block and append it to the blob String newBlockId = Guid.NewGuid().ToString("N").Encode().ToBase64String(); blockIds.Add(newBlockId); // Append new block to end of blob await blob.PutBlockAsync(newBlockId, ms, null, accessCondition, options, operationContext, cancellationToken).ConfigureAwait(false); // PutBlock ignores access condition so this always succeeds await blob.PutBlockListAsync(blockIds, accessCondition, options, operationContext, cancellationToken).ConfigureAwait(false); // 409 if blob created behind our back; 400 if block ID doesn't exist (happens in another PC calls PutBlockList after our PutBlock) break; // If successful, we're done; don't retry } catch (StorageException se) { // Blob got created behind our back, retry if (se.Matches(HttpStatusCode.Conflict, BlobErrorCodeStrings.BlobAlreadyExists)) { continue; } // Blob got created or modified behind our back, retry if (se.Matches(HttpStatusCode.PreconditionFailed)) { continue; } // Another PC called PutBlockList between our PutBlock & PutBlockList, // our block got destroyed, retry if (se.Matches(HttpStatusCode.BadRequest, BlobErrorCodeStrings.InvalidBlockList)) { continue; } throw; } } } }