public async Task <DatasetStorageDetails> GetDatasetStorageDetails(Guid datasetId, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
            var options = new RequestOptions
            {
                PartitionKey = new PartitionKey(datasetId.ToString())
            };

            var documentLink = CreateDatasetDocumentAttachmentUri(datasetId, "Content");
            var response     = await Client.ReadAttachmentAsync(documentLink, options).ConfigureAwait(false);

            var resource = response?.Resource;

            if (resource == null)
            {
                return(null);
            }

            DatasetStorageDetails details = null;
            var storageType = resource.GetPropertyValue <string>("storageType") ?? string.Empty;

            switch (storageType)
            {
            case "blob":
                details = new DatasetBlobStorageDetails
                {
                    DatasetId   = datasetId,
                    StorageType = DatasetStorageTypes.Blob,
                    Account     = resource.GetPropertyValue <string>("account"),
                    Container   = resource.GetPropertyValue <string>("container"),
                };
                break;

            default:
                throw new InvalidOperationException($"Unknown storage type, \"{storageType}\", for dataset.");
            }

            return(details);
        }
示例#2
0
        private async Task ReadDatasetFiles(DatasetBlobStorageDetails storage, CancellationToken cancellationToken)
        {
            Log.Add("Reading dataset files.");

            var ctx = new CompressContext
            {
                ContainerName = storage.Container,
                Buffer        = new byte[32768],
            };

            var credentials   = new StorageCredentials(storage.Account, StorageConfig.Accounts[storage.Account]);
            var storageAcct   = new CloudStorageAccount(credentials, true);
            var blobClient    = storageAcct.CreateCloudBlobClient();
            var blobContainer = blobClient.GetContainerReference(storage.Container);

            ctx.ArchiveContainer = blobClient.GetContainerReference(ctx.ArchiveContainerName);
            Log.Add($"Writing archives to {ctx.ArchiveContainer.Uri}");
            await ctx.ArchiveContainer.CreateIfNotExistsAsync();

            BlobContinuationToken    continuationToken  = null;
            const bool               useFlatBlobListing = true;
            const BlobListingDetails blobListingDetails = BlobListingDetails.None;
            const int maxBlobsPerRequest = 100;

            int  totalCount = 0;
            long totalSize  = 0;

            await OpenOutputArchives(ctx, cancellationToken);

            do
            {
                var listingResult = await blobContainer
                                    .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken)
                                    .ConfigureAwait(false);

                continuationToken = listingResult.ContinuationToken;
                var results = listingResult.Results
                              .Cast <CloudBlockBlob>()
                              .Where(r => r.Name != "_metadata.txt")
                              .Select(blob => new FileDetails
                {
                    Name     = Path.GetFileName(blob.Name),
                    FullName = blob.Name,
                    Length   = blob.Properties.Length,
                    Modified = blob.Properties.LastModified ?? DateTimeOffset.UtcNow,
                })
                              .ToList();

                foreach (var result in results)
                {
                    Log.Add($"- {result.FullName}");
                    ctx.Details = result;
                    var blobReference = blobContainer.GetBlockBlobReference(result.FullName);
                    await AddDatasetFileToArchive(ctx, blobReference, cancellationToken);
                }

                totalCount += results.Count();
                totalSize  += results.Sum(t => t.Length);
            } while (continuationToken != null);

            CloseOutputArchives(ctx, cancellationToken);

            (var zipSize, var tgzSize) = await GetArchiveDetails(ctx, cancellationToken);

            await DatasetStorage.UpdateDatasetCompressedDetails(DatasetId, zipSize, tgzSize);

            Console.WriteLine($"Compressed {totalCount:n0} total files, {totalSize:n0} bytes.");
            Console.WriteLine($"zip file: {zipSize:n0} bytes ({Ratio(totalSize, zipSize):n2}%).");
            Console.WriteLine($"tgz file: {tgzSize:n0} bytes ({Ratio(totalSize, tgzSize):n2}%).");
        }