Beispiel #1
0
        public async Task <IActionResult> GetDomains(CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();
            var results = await DatasetStorage.GetAllDomains(cancellationToken).ConfigureAwait(false);

            return(Json(results));
        }
Beispiel #2
0
        private async Task GetUniqueDatasetContainerName(DatasetStorage storage, params string[] suffixes)
        {
            var blobClient    = CreateBlobClient(storage);
            int nextCharOffs  = 0;
            var containerName = ContainerNameFromDatasetName(storage.DatasetName, suffixes);

            while (true)
            {
                var  container = blobClient.GetContainerReference(containerName);
                bool exists    = await container.ExistsAsync();

                if (!exists)
                {
                    storage.ContainerName = containerName;
                    break;
                }

                if (nextCharOffs >= NextChars.Length)
                {
                    throw new InvalidOperationException("No more characters available to build valid container name.");
                }

                string   suffix      = NextChars.Substring(nextCharOffs++, 1);
                string[] allSuffixes = suffixes
                                       .Concat(Enumerable.Repeat(suffix, 1))
                                       .ToArray();
                containerName = ContainerNameFromDatasetName(storage.DatasetName, allSuffixes);
            }
        }
Beispiel #3
0
        private CloudBlobClient CreateBlobClient(DatasetStorage datasetStorage)
        {
            if (datasetStorage is null)
            {
                throw new ArgumentNullException(nameof(datasetStorage));
            }

            return(CreateBlobClient(datasetStorage.AccountName));
        }
Beispiel #4
0
        private async Task DeleteDatasetDocuments(CancellationToken cancellationToken)
        {
            Log.Add("Deleting current dataset documents (if any).");

            var count = await FileSearch.DeleteAllFilesDocumentsByDatasetId(DatasetId, cancellationToken);

            Log.Add($"Deleted {count} Azure Search dataset document(s).");
            count = await DatasetStorage.DeleteDatasetDocuments(DatasetId, cancellationToken);

            Log.Add($"Deleted {count} dataset document(s).");
        }
Beispiel #5
0
        /// <summary>
        /// Delete the container for the dataset.
        /// </summary>
        /// <param name="storage">The details of the dataset.</param>
        /// <returns>True if dataset container was deleted.</returns>
        public async Task <bool> DeleteDatasetContainer(DatasetStorage storage)
        {
            if (string.IsNullOrWhiteSpace(storage?.ContainerName))
            {
                throw new ArgumentNullException(nameof(storage.ContainerName));
            }

            var blobClient = CreateBlobClient(storage);
            var container  = blobClient.GetContainerReference(storage.ContainerName);

            return(await container.DeleteIfExistsAsync());
        }
Beispiel #6
0
        private async Task Compress(CancellationToken cancellationToken)
        {
            var result = await DatasetStorage.GetDatasetStorageDetails(DatasetId, cancellationToken);

            if (result is DatasetBlobStorageDetails storage)
            {
                await ReadDatasetFiles(storage, cancellationToken);
            }
            else
            {
                throw new InvalidOperationException("Storage details for dataset not found.");
            }
        }
Beispiel #7
0
        /// <summary>
        /// Create the container for the dataset.
        /// </summary>
        /// <param name="storage">The details of the dataset.</param>
        /// <returns>The url to the dataset container.</returns>
        public async Task <string> CreateDatasetContainer(DatasetStorage storage)
        {
            if (string.IsNullOrWhiteSpace(storage?.ContainerName))
            {
                throw new ArgumentNullException(nameof(storage.ContainerName));
            }

            var blobClient = CreateBlobClient(storage);
            var container  = blobClient.GetContainerReference(storage.ContainerName);
            await container.CreateIfNotExistsAsync();

            return(container.StorageUri.PrimaryUri.ToString());
        }
Beispiel #8
0
        private async Task <Domain> FindDomain(string domainId, CancellationToken cancellationToken)
        {
            var domains = await DatasetStorage.GetAllDomains(cancellationToken);

            var domain = domains
                         .FirstOrDefault(d => string.Compare(d.Id, domainId, StringComparison.InvariantCultureIgnoreCase) == 0);

            if (domain == null)
            {
                throw new InvalidOperationException($"Domain id, \"{domainId}\", was not found.");
            }

            return(domain);
        }
Beispiel #9
0
        public async Task ShouldUpdateDatasetContent()
        {
            await TestUtils.ExecAndCleanup(async cleanup =>
            {
                var dataset = CreateDatasetDoc();
                DatasetStorage datasetStorage = null;
                cleanup.Push(await SetupDataset(dataset, (storage) =>
                {
                    datasetStorage = storage;
                }));

                var editService = Services.GetService <DatasetEditStorageService>();
                var user        = CreateTestUser();

                var updated = await editService.InitiateDatasetContentEdit(dataset.Id, user, default);
                Assert.Equal(DatasetEditStatus.ContentsModified, updated.EditStatus);
                Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditAccount));
                Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditContainer));
                Assert.Equal(datasetStorage.AccountName, updated.OriginalStorageAccount);
                Assert.Equal(datasetStorage.ContainerName, updated.OriginalStorageContainer);

                var blobClient = await Services.GetBlobClient();
                var container  = blobClient.GetContainerReference(updated.ContentEditContainer);
                var exists     = await container.ExistsAsync();
                Assert.True(exists);

                cleanup.Push(async() =>
                {
                    var cosmosClient = await Services.GetCosmosClient();
                    var cosmosConfig = Services.GetService <IOptions <CosmosConfiguration> >().Value;
                    var datasetId    = dataset.Id.ToString();
                    await cosmosClient.DeleteDocumentAsync(
                        UriFactory.CreateDocumentUri(cosmosConfig.Database, cosmosConfig.UserDataCollection, datasetId),
                        new RequestOptions
                    {
                        PartitionKey = new PartitionKey(WellKnownIds.DatasetEditDatasetId.ToString())
                    });
                    await container.DeleteAsync();
                });

                updated = await editService.GetDatasetEditById(dataset.Id, user, default);
                Assert.Equal(dataset.Id, updated.Id);
                Assert.Equal(DatasetEditStatus.ContentsModified, updated.EditStatus);
                Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditAccount));
                Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditContainer));
            });
        }
        /// <summary>
        /// Creates the Azure storage container for the dataset and adds the attachment records
        /// to the nomination document.
        /// </summary>
        /// <param name="storage"></param>
        /// <param name="user"></param>
        /// <param name="cancellationToken"></param>
        /// <returns></returns>
        public async Task <NominationStatus?> CreateDatasetStorageAsync(DatasetStorage storage, ClaimsPrincipal user, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();

            var docUri  = CreateUserDataDocumentUri(storage.Id);
            var options = new RequestOptions
            {
                PartitionKey = new PartitionKey(WellKnownIds.DatasetNominationDatasetId.ToString())
            };

            Document document = await Client.ReadDocumentAsync(docUri, options);

            if (document == null)
            {
                return(null);
            }

            var containerUri = await SasTokens.CreateDatasetContainer(storage);

            var name   = GetUserName(user);
            var email  = GetUserEmail(user);
            var status = NominationStatus.Uploading;

            document.SetPropertyValue("modified", DateTime.UtcNow);
            document.SetPropertyValue("modifiedByUserName", name);
            document.SetPropertyValue("modifiedByUserEmail", email);
            document.SetPropertyValue("nominationStatus", status.ToString());
            await Client.ReplaceDocumentAsync(document.SelfLink, document);

            var datasetRecordLink = new Attachment
            {
                Id          = "Content", // "Slug" is ID with hard-attach
                ContentType = "x-azure-blockstorage",
                MediaLink   = containerUri,
            };

            datasetRecordLink.SetPropertyValue("storageType", "blob");
            datasetRecordLink.SetPropertyValue("container", storage.ContainerName);
            datasetRecordLink.SetPropertyValue("account", storage.AccountName);

            await Client.UpsertAttachmentAsync(document.SelfLink, datasetRecordLink, options);

            return(status);
        }
        public async Task <IActionResult> CreateStorage(
            [FromRoute] Guid id,
            [FromBody] DatasetStorage storage,
            CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();

            if (id != storage.Id)
            {
                throw new InvalidOperationException("Nomination id is not valid.");
            }

            var status = await UserDataStorage.CreateDatasetStorageAsync(storage, this.User, cancellationToken).ConfigureAwait(false);

            if (status == null)
            {
                return(NotFound());
            }

            return(Ok());
        }
        public async Task <IActionResult> DatasetContainer([FromRoute] Guid id, CancellationToken cancellationToken)
        {
            cancellationToken.ThrowIfCancellationRequested();

            var nomination = await UserDataStorage.GetByIdAsync(id, cancellationToken).ConfigureAwait(false);

            if (nomination == null)
            {
                return(NotFound());
            }

            var storage = new DatasetStorage
            {
                Id          = nomination.Id,
                AccountName = SasTokenService.DefaultDatasetStorageAccount(),
                DatasetName = nomination.Name,
            };
            await SasTokenService.FindUniqueDatasetContainerName(storage).ConfigureAwait(false);

            return(Ok(storage));
        }
Beispiel #13
0
        public async Task <bool> CancelDatasetChanges(Guid id, IPrincipal user, CancellationToken token)
        {
            var(original, modified) = await VerifyDatasetOwnership(id, user, token);

            if (original == null)
            {
                throw new InvalidOperationException("Invalid dataset id.");
            }

            var status = modified?.EditStatus ?? DatasetEditStatus.Unmodified;

            if (!(status == DatasetEditStatus.DetailsModified || status == DatasetEditStatus.ContentsModified))
            {
                return(false);
            }

            await Client.DeleteDocumentAsync(
                UserDataDocumentUriById(id.ToString()),
                new RequestOptions
            {
                PartitionKey = new PartitionKey(WellKnownIds.DatasetEditDatasetId.ToString())
            },
                token);

            if (modified.EditStatus == DatasetEditStatus.ContentsModified)
            {
                var datasetStorage = new DatasetStorage
                {
                    Id            = id,
                    DatasetName   = original.Name,
                    AccountName   = modified.ContentEditAccount,
                    ContainerName = modified.ContentEditContainer,
                };

                await SasTokens.DeleteDatasetContainer(datasetStorage);
            }

            return(true);
        }
Beispiel #14
0
        public async Task <DatasetEditStorageItem> InitiateDatasetContentEdit(Guid id, IPrincipal user, CancellationToken token)
        {
            var dataset = await GetDatasetEditById(id, user, token);

            if (dataset.EditStatus == DatasetEditStatus.ContentsModified)
            {
                // Already in content edit mode
                return(dataset);
            }

            var details = await DatasetStorage.GetDatasetStorageDetails(id, token);

            var blobDetails = details as DatasetBlobStorageDetails;

            if (blobDetails == null)
            {
                throw new InvalidOperationException("Dataset storage must be blob storage.");
            }

            var datasetStorage = new DatasetStorage
            {
                Id          = dataset.Id,
                DatasetName = dataset.Name,
                AccountName = blobDetails.Account
            };
            await SasTokens.FindUniqueDatasetUpdateContainerName(datasetStorage);

            await SasTokens.CreateDatasetContainer(datasetStorage);

            dataset.EditStatus               = DatasetEditStatus.ContentsModified;
            dataset.ContentEditAccount       = datasetStorage.AccountName;
            dataset.ContentEditContainer     = datasetStorage.ContainerName;
            dataset.OriginalStorageAccount   = blobDetails.Account;
            dataset.OriginalStorageContainer = blobDetails.Container;
            return(await UpdateDatasetEditItemDocument(user, dataset, token));
        }
Beispiel #15
0
        private async Task <Func <Task> > SetupDataset(DatasetStorageItem dataset, Action <DatasetStorage> setParamsFn = null)
        {
            var sasTokens      = Services.GetService <SasTokenService>();
            var datasetStorage = new DatasetStorage
            {
                Id          = dataset.Id,
                DatasetName = dataset.Name,
                AccountName = sasTokens.DefaultDatasetStorageAccount(),
            };
            await sasTokens.FindUniqueDatasetContainerName(datasetStorage);

            await sasTokens.CreateDatasetContainer(datasetStorage);

            var blobClient = await Services.GetBlobClient();

            var container = blobClient.GetContainerReference(datasetStorage.ContainerName);

            foreach (var testFile in TestFiles)
            {
                var blob    = container.GetBlockBlobReference($"{testFile}.txt");
                var content = $"{testFile}, generated {DateTime.UtcNow.ToString()}";
                await blob.UploadTextAsync(content);
            }

            var datasetId      = dataset.Id.ToString();
            var requestOptions = new RequestOptions
            {
                PartitionKey = new PartitionKey(datasetId)
            };
            var cosmosConfig = Services.GetService <IOptions <CosmosConfiguration> >().Value;
            var cosmosClient = await Services.GetCosmosClient();

            var response = await cosmosClient.CreateDocumentAsync(
                UriFactory.CreateDocumentCollectionUri(cosmosConfig.Database, cosmosConfig.DatasetCollection),
                dataset,
                requestOptions);

            var containerDetails = new DatasetItemContainerDetails
            {
                DatasetId = dataset.Id,
                Account   = datasetStorage.AccountName,
                Container = datasetStorage.ContainerName,
                Uri       = container.Uri.ToString(),
            };
            var link = new Attachment
            {
                Id          = containerDetails.Name,
                ContentType = containerDetails.ContentType,
                MediaLink   = containerDetails.Uri
            };

            link.SetPropertyValue("storageType", "blob");
            link.SetPropertyValue("container", containerDetails.Container);
            link.SetPropertyValue("account", containerDetails.Account);
            await cosmosClient.UpsertAttachmentAsync(
                response.Resource.SelfLink,
                link,
                requestOptions);

            setParamsFn?.Invoke(datasetStorage);

            return(async() =>
            {
                await cosmosClient.DeleteDocumentAsync(
                    UriFactory.CreateDocumentUri(cosmosConfig.Database, cosmosConfig.DatasetCollection, datasetId),
                    requestOptions);

                await sasTokens.DeleteDatasetContainer(datasetStorage);
            });
        }
        private async Task <(ICollection <string> fileTypes, int fileCount, long fileSize, string containerUri)> CreateDatasetFileDocuments(
            DatasetImportProperties storage,
            CancellationToken cancellationToken)
        {
            Log.Add("Creating dataset file documents.");

            var credentials   = new StorageCredentials(storage.AccountName, StorageConfig.Accounts[storage.AccountName]);
            var storageAcct   = new CloudStorageAccount(credentials, true);
            var blobClient    = storageAcct.CreateCloudBlobClient();
            var blobContainer = blobClient.GetContainerReference(storage.ContainerName);

            BlobContinuationToken    continuationToken  = null;
            const bool               useFlatBlobListing = true;
            const BlobListingDetails blobListingDetails = BlobListingDetails.None;
            const int maxBlobsPerRequest = 100;

            var  parents    = new HashSet <string>();
            var  extensions = new HashSet <string>();
            int  totalCount = 0;
            long totalSize  = 0;

            var concurrencySemaphore = new SemaphoreSlim(MaxConcurrent);

            // Add file records
            var taskList = new List <Task>();

            do
            {
                var listingResult = await blobContainer
                                    .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken)
                                    .ConfigureAwait(false);

                continuationToken = listingResult.ContinuationToken;
                var results = listingResult.Results
                              .Cast <CloudBlockBlob>()
                              .Where(r => r.Name != "_metadata.txt")
                              .Select(blob => new
                {
                    Segments = blob.Uri.Segments
                               .Skip(2)
                               .Select(s => s.Trim('/'))
                               .Take(blob.Uri.Segments.Length - 3),
                    File = new FileSystemItem
                    {
                        Id         = Guid.NewGuid(),
                        Name       = Path.GetFileName(blob.Name),
                        FullName   = blob.Name,
                        FileType   = GetFileExtension(blob.Name),
                        CanPreview = false,
                        DatasetId  = DatasetId,
                        DataType   = StorageDataType.FileSystem,
                        EntryType  = FileSystemEntryType.File,
                        Length     = blob.Properties.Length,
                        Parent     = Path.GetDirectoryName(blob.Name).Replace(@"\", @"/"),
                        SortKey    = GenerateSortKey("1", blob.Name),
                        Modified   = blob.Properties.LastModified ?? DateTimeOffset.UtcNow,
                    },
                    Blob = new FileSystemItemBlobDetails
                    {
                        DatasetId   = DatasetId,
                        Account     = storage.AccountName,
                        Container   = storage.ContainerName,
                        Name        = Path.GetFileName(blob.Name),
                        ContentType = blob.Properties.ContentType,
                        Uri         = blob.Uri.ToString(),
                    }
                })
                              .ToList();

                foreach (var result in results)
                {
                    result.Segments
                    .Aggregate(new List <string>(), (list, s) =>
                    {
                        list.Add(list.Count == 0 ? s : string.Concat(list[list.Count - 1], "/", s));
                        return(list);
                    })
                    .ToList()
                    .ForEach(p => parents.Add(p));
                    extensions.Add(result.File.FileType);

                    await concurrencySemaphore.WaitAsync(cancellationToken).ConfigureAwait(false);

                    var task = Task.Run(async() =>
                    {
                        await DatasetStorage.CreateFileRecord(result.File, result.Blob).ConfigureAwait(false);
                        concurrencySemaphore.Release();
                        if (Interlocked.Increment(ref totalCount) % 100 == 0)
                        {
                            Log.Add($"Loaded {totalCount} file records ...");
                        }
                    }, cancellationToken);

                    taskList.Add(task);
                }

                totalSize += results.Sum(t => t.File.Length ?? 0);
            } while (continuationToken != null);

            await Task.WhenAll(taskList).ConfigureAwait(false);

            Log.Add($"Loaded {totalCount} total file records.");

            // Add the file summary record
            var fileTypes   = extensions.OrderBy(e => e).ToList();
            var fileSummary = new FileSystemSummary
            {
                Id        = Guid.NewGuid(),
                DatasetId = DatasetId,
                DataType  = StorageDataType.FileSummary,
                FileCount = totalCount,
                Size      = totalSize,
                FileTypes = fileTypes,
            };
            await DatasetStorage.CreateFileSummaryRecord(fileSummary).ConfigureAwait(false);

            // Add file folder records
            foreach (var folder in parents)
            {
                var fileItem = new FileSystemItem
                {
                    Id         = Guid.NewGuid(),
                    Name       = Path.GetFileName(folder),
                    FullName   = folder,
                    FileType   = null,
                    CanPreview = null,
                    DatasetId  = DatasetId,
                    DataType   = StorageDataType.FileSystem,
                    EntryType  = FileSystemEntryType.Folder,
                    Length     = null,
                    Parent     = Path.GetDirectoryName(folder).Replace(@"\", @"/"),
                    SortKey    = GenerateSortKey("0", folder),
                    Modified   = DateTimeOffset.UtcNow,
                };
                await DatasetStorage.CreateFileRecord(fileItem).ConfigureAwait(false);
            }
            Log.Add($"Loaded {parents.Count} folder records.");

            return(fileTypes, totalCount, totalSize, blobContainer.StorageUri.PrimaryUri.ToString());
        }
Beispiel #17
0
        /// <summary>
        /// Attempts to find a unique container name for the dataset update.
        /// </summary>
        /// <param name="storage">The dataset storage details (will be updated).</param>
        /// <returns>Awaitable Task.</returns>
        public async Task FindUniqueDatasetUpdateContainerName(DatasetStorage storage)
        {
            var suffix = $"u{DateTime.UtcNow.ToString("yyyyMMdd")}";

            await GetUniqueDatasetContainerName(storage, suffix);
        }
Beispiel #18
0
 /// <summary>
 /// Attempts to find a unique container name for the dataset name.
 /// </summary>
 /// <param name="storage">The dataset storage details (will be updated).</param>
 /// <returns>Awaitable Task.</returns>
 public async Task FindUniqueDatasetContainerName(DatasetStorage storage)
 {
     await GetUniqueDatasetContainerName(storage);
 }
Beispiel #19
0
        private async Task ReadDatasetFiles(DatasetBlobStorageDetails storage, CancellationToken cancellationToken)
        {
            Log.Add("Reading dataset files.");

            var ctx = new CompressContext
            {
                ContainerName = storage.Container,
                Buffer        = new byte[32768],
            };

            var credentials   = new StorageCredentials(storage.Account, StorageConfig.Accounts[storage.Account]);
            var storageAcct   = new CloudStorageAccount(credentials, true);
            var blobClient    = storageAcct.CreateCloudBlobClient();
            var blobContainer = blobClient.GetContainerReference(storage.Container);

            ctx.ArchiveContainer = blobClient.GetContainerReference(ctx.ArchiveContainerName);
            Log.Add($"Writing archives to {ctx.ArchiveContainer.Uri}");
            await ctx.ArchiveContainer.CreateIfNotExistsAsync();

            BlobContinuationToken    continuationToken  = null;
            const bool               useFlatBlobListing = true;
            const BlobListingDetails blobListingDetails = BlobListingDetails.None;
            const int maxBlobsPerRequest = 100;

            int  totalCount = 0;
            long totalSize  = 0;

            await OpenOutputArchives(ctx, cancellationToken);

            do
            {
                var listingResult = await blobContainer
                                    .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken)
                                    .ConfigureAwait(false);

                continuationToken = listingResult.ContinuationToken;
                var results = listingResult.Results
                              .Cast <CloudBlockBlob>()
                              .Where(r => r.Name != "_metadata.txt")
                              .Select(blob => new FileDetails
                {
                    Name     = Path.GetFileName(blob.Name),
                    FullName = blob.Name,
                    Length   = blob.Properties.Length,
                    Modified = blob.Properties.LastModified ?? DateTimeOffset.UtcNow,
                })
                              .ToList();

                foreach (var result in results)
                {
                    Log.Add($"- {result.FullName}");
                    ctx.Details = result;
                    var blobReference = blobContainer.GetBlockBlobReference(result.FullName);
                    await AddDatasetFileToArchive(ctx, blobReference, cancellationToken);
                }

                totalCount += results.Count();
                totalSize  += results.Sum(t => t.Length);
            } while (continuationToken != null);

            CloseOutputArchives(ctx, cancellationToken);

            (var zipSize, var tgzSize) = await GetArchiveDetails(ctx, cancellationToken);

            await DatasetStorage.UpdateDatasetCompressedDetails(DatasetId, zipSize, tgzSize);

            Console.WriteLine($"Compressed {totalCount:n0} total files, {totalSize:n0} bytes.");
            Console.WriteLine($"zip file: {zipSize:n0} bytes ({Ratio(totalSize, zipSize):n2}%).");
            Console.WriteLine($"tgz file: {tgzSize:n0} bytes ({Ratio(totalSize, tgzSize):n2}%).");
        }