public async Task <DatasetStorageDetails> GetDatasetStorageDetails(Guid datasetId, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); var options = new RequestOptions { PartitionKey = new PartitionKey(datasetId.ToString()) }; var documentLink = CreateDatasetDocumentAttachmentUri(datasetId, "Content"); var response = await Client.ReadAttachmentAsync(documentLink, options).ConfigureAwait(false); var resource = response?.Resource; if (resource == null) { return(null); } DatasetStorageDetails details = null; var storageType = resource.GetPropertyValue <string>("storageType") ?? string.Empty; switch (storageType) { case "blob": details = new DatasetBlobStorageDetails { DatasetId = datasetId, StorageType = DatasetStorageTypes.Blob, Account = resource.GetPropertyValue <string>("account"), Container = resource.GetPropertyValue <string>("container"), }; break; default: throw new InvalidOperationException($"Unknown storage type, \"{storageType}\", for dataset."); } return(details); }
private async Task ReadDatasetFiles(DatasetBlobStorageDetails storage, CancellationToken cancellationToken) { Log.Add("Reading dataset files."); var ctx = new CompressContext { ContainerName = storage.Container, Buffer = new byte[32768], }; var credentials = new StorageCredentials(storage.Account, StorageConfig.Accounts[storage.Account]); var storageAcct = new CloudStorageAccount(credentials, true); var blobClient = storageAcct.CreateCloudBlobClient(); var blobContainer = blobClient.GetContainerReference(storage.Container); ctx.ArchiveContainer = blobClient.GetContainerReference(ctx.ArchiveContainerName); Log.Add($"Writing archives to {ctx.ArchiveContainer.Uri}"); await ctx.ArchiveContainer.CreateIfNotExistsAsync(); BlobContinuationToken continuationToken = null; const bool useFlatBlobListing = true; const BlobListingDetails blobListingDetails = BlobListingDetails.None; const int maxBlobsPerRequest = 100; int totalCount = 0; long totalSize = 0; await OpenOutputArchives(ctx, cancellationToken); do { var listingResult = await blobContainer .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken) .ConfigureAwait(false); continuationToken = listingResult.ContinuationToken; var results = listingResult.Results .Cast <CloudBlockBlob>() .Where(r => r.Name != "_metadata.txt") .Select(blob => new FileDetails { Name = Path.GetFileName(blob.Name), FullName = blob.Name, Length = blob.Properties.Length, Modified = blob.Properties.LastModified ?? DateTimeOffset.UtcNow, }) .ToList(); foreach (var result in results) { Log.Add($"- {result.FullName}"); ctx.Details = result; var blobReference = blobContainer.GetBlockBlobReference(result.FullName); await AddDatasetFileToArchive(ctx, blobReference, cancellationToken); } totalCount += results.Count(); totalSize += results.Sum(t => t.Length); } while (continuationToken != null); CloseOutputArchives(ctx, cancellationToken); (var zipSize, var tgzSize) = await GetArchiveDetails(ctx, cancellationToken); await DatasetStorage.UpdateDatasetCompressedDetails(DatasetId, zipSize, tgzSize); Console.WriteLine($"Compressed {totalCount:n0} total files, {totalSize:n0} bytes."); Console.WriteLine($"zip file: {zipSize:n0} bytes ({Ratio(totalSize, zipSize):n2}%)."); Console.WriteLine($"tgz file: {tgzSize:n0} bytes ({Ratio(totalSize, tgzSize):n2}%)."); }