public async Task <IActionResult> GetDomains(CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); var results = await DatasetStorage.GetAllDomains(cancellationToken).ConfigureAwait(false); return(Json(results)); }
private async Task GetUniqueDatasetContainerName(DatasetStorage storage, params string[] suffixes) { var blobClient = CreateBlobClient(storage); int nextCharOffs = 0; var containerName = ContainerNameFromDatasetName(storage.DatasetName, suffixes); while (true) { var container = blobClient.GetContainerReference(containerName); bool exists = await container.ExistsAsync(); if (!exists) { storage.ContainerName = containerName; break; } if (nextCharOffs >= NextChars.Length) { throw new InvalidOperationException("No more characters available to build valid container name."); } string suffix = NextChars.Substring(nextCharOffs++, 1); string[] allSuffixes = suffixes .Concat(Enumerable.Repeat(suffix, 1)) .ToArray(); containerName = ContainerNameFromDatasetName(storage.DatasetName, allSuffixes); } }
private CloudBlobClient CreateBlobClient(DatasetStorage datasetStorage) { if (datasetStorage is null) { throw new ArgumentNullException(nameof(datasetStorage)); } return(CreateBlobClient(datasetStorage.AccountName)); }
private async Task DeleteDatasetDocuments(CancellationToken cancellationToken) { Log.Add("Deleting current dataset documents (if any)."); var count = await FileSearch.DeleteAllFilesDocumentsByDatasetId(DatasetId, cancellationToken); Log.Add($"Deleted {count} Azure Search dataset document(s)."); count = await DatasetStorage.DeleteDatasetDocuments(DatasetId, cancellationToken); Log.Add($"Deleted {count} dataset document(s)."); }
/// <summary> /// Delete the container for the dataset. /// </summary> /// <param name="storage">The details of the dataset.</param> /// <returns>True if dataset container was deleted.</returns> public async Task <bool> DeleteDatasetContainer(DatasetStorage storage) { if (string.IsNullOrWhiteSpace(storage?.ContainerName)) { throw new ArgumentNullException(nameof(storage.ContainerName)); } var blobClient = CreateBlobClient(storage); var container = blobClient.GetContainerReference(storage.ContainerName); return(await container.DeleteIfExistsAsync()); }
private async Task Compress(CancellationToken cancellationToken) { var result = await DatasetStorage.GetDatasetStorageDetails(DatasetId, cancellationToken); if (result is DatasetBlobStorageDetails storage) { await ReadDatasetFiles(storage, cancellationToken); } else { throw new InvalidOperationException("Storage details for dataset not found."); } }
/// <summary> /// Create the container for the dataset. /// </summary> /// <param name="storage">The details of the dataset.</param> /// <returns>The url to the dataset container.</returns> public async Task <string> CreateDatasetContainer(DatasetStorage storage) { if (string.IsNullOrWhiteSpace(storage?.ContainerName)) { throw new ArgumentNullException(nameof(storage.ContainerName)); } var blobClient = CreateBlobClient(storage); var container = blobClient.GetContainerReference(storage.ContainerName); await container.CreateIfNotExistsAsync(); return(container.StorageUri.PrimaryUri.ToString()); }
private async Task <Domain> FindDomain(string domainId, CancellationToken cancellationToken) { var domains = await DatasetStorage.GetAllDomains(cancellationToken); var domain = domains .FirstOrDefault(d => string.Compare(d.Id, domainId, StringComparison.InvariantCultureIgnoreCase) == 0); if (domain == null) { throw new InvalidOperationException($"Domain id, \"{domainId}\", was not found."); } return(domain); }
public async Task ShouldUpdateDatasetContent() { await TestUtils.ExecAndCleanup(async cleanup => { var dataset = CreateDatasetDoc(); DatasetStorage datasetStorage = null; cleanup.Push(await SetupDataset(dataset, (storage) => { datasetStorage = storage; })); var editService = Services.GetService <DatasetEditStorageService>(); var user = CreateTestUser(); var updated = await editService.InitiateDatasetContentEdit(dataset.Id, user, default); Assert.Equal(DatasetEditStatus.ContentsModified, updated.EditStatus); Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditAccount)); Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditContainer)); Assert.Equal(datasetStorage.AccountName, updated.OriginalStorageAccount); Assert.Equal(datasetStorage.ContainerName, updated.OriginalStorageContainer); var blobClient = await Services.GetBlobClient(); var container = blobClient.GetContainerReference(updated.ContentEditContainer); var exists = await container.ExistsAsync(); Assert.True(exists); cleanup.Push(async() => { var cosmosClient = await Services.GetCosmosClient(); var cosmosConfig = Services.GetService <IOptions <CosmosConfiguration> >().Value; var datasetId = dataset.Id.ToString(); await cosmosClient.DeleteDocumentAsync( UriFactory.CreateDocumentUri(cosmosConfig.Database, cosmosConfig.UserDataCollection, datasetId), new RequestOptions { PartitionKey = new PartitionKey(WellKnownIds.DatasetEditDatasetId.ToString()) }); await container.DeleteAsync(); }); updated = await editService.GetDatasetEditById(dataset.Id, user, default); Assert.Equal(dataset.Id, updated.Id); Assert.Equal(DatasetEditStatus.ContentsModified, updated.EditStatus); Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditAccount)); Assert.False(string.IsNullOrWhiteSpace(updated.ContentEditContainer)); }); }
/// <summary> /// Creates the Azure storage container for the dataset and adds the attachment records /// to the nomination document. /// </summary> /// <param name="storage"></param> /// <param name="user"></param> /// <param name="cancellationToken"></param> /// <returns></returns> public async Task <NominationStatus?> CreateDatasetStorageAsync(DatasetStorage storage, ClaimsPrincipal user, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); var docUri = CreateUserDataDocumentUri(storage.Id); var options = new RequestOptions { PartitionKey = new PartitionKey(WellKnownIds.DatasetNominationDatasetId.ToString()) }; Document document = await Client.ReadDocumentAsync(docUri, options); if (document == null) { return(null); } var containerUri = await SasTokens.CreateDatasetContainer(storage); var name = GetUserName(user); var email = GetUserEmail(user); var status = NominationStatus.Uploading; document.SetPropertyValue("modified", DateTime.UtcNow); document.SetPropertyValue("modifiedByUserName", name); document.SetPropertyValue("modifiedByUserEmail", email); document.SetPropertyValue("nominationStatus", status.ToString()); await Client.ReplaceDocumentAsync(document.SelfLink, document); var datasetRecordLink = new Attachment { Id = "Content", // "Slug" is ID with hard-attach ContentType = "x-azure-blockstorage", MediaLink = containerUri, }; datasetRecordLink.SetPropertyValue("storageType", "blob"); datasetRecordLink.SetPropertyValue("container", storage.ContainerName); datasetRecordLink.SetPropertyValue("account", storage.AccountName); await Client.UpsertAttachmentAsync(document.SelfLink, datasetRecordLink, options); return(status); }
public async Task <IActionResult> CreateStorage( [FromRoute] Guid id, [FromBody] DatasetStorage storage, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); if (id != storage.Id) { throw new InvalidOperationException("Nomination id is not valid."); } var status = await UserDataStorage.CreateDatasetStorageAsync(storage, this.User, cancellationToken).ConfigureAwait(false); if (status == null) { return(NotFound()); } return(Ok()); }
public async Task <IActionResult> DatasetContainer([FromRoute] Guid id, CancellationToken cancellationToken) { cancellationToken.ThrowIfCancellationRequested(); var nomination = await UserDataStorage.GetByIdAsync(id, cancellationToken).ConfigureAwait(false); if (nomination == null) { return(NotFound()); } var storage = new DatasetStorage { Id = nomination.Id, AccountName = SasTokenService.DefaultDatasetStorageAccount(), DatasetName = nomination.Name, }; await SasTokenService.FindUniqueDatasetContainerName(storage).ConfigureAwait(false); return(Ok(storage)); }
public async Task <bool> CancelDatasetChanges(Guid id, IPrincipal user, CancellationToken token) { var(original, modified) = await VerifyDatasetOwnership(id, user, token); if (original == null) { throw new InvalidOperationException("Invalid dataset id."); } var status = modified?.EditStatus ?? DatasetEditStatus.Unmodified; if (!(status == DatasetEditStatus.DetailsModified || status == DatasetEditStatus.ContentsModified)) { return(false); } await Client.DeleteDocumentAsync( UserDataDocumentUriById(id.ToString()), new RequestOptions { PartitionKey = new PartitionKey(WellKnownIds.DatasetEditDatasetId.ToString()) }, token); if (modified.EditStatus == DatasetEditStatus.ContentsModified) { var datasetStorage = new DatasetStorage { Id = id, DatasetName = original.Name, AccountName = modified.ContentEditAccount, ContainerName = modified.ContentEditContainer, }; await SasTokens.DeleteDatasetContainer(datasetStorage); } return(true); }
public async Task <DatasetEditStorageItem> InitiateDatasetContentEdit(Guid id, IPrincipal user, CancellationToken token) { var dataset = await GetDatasetEditById(id, user, token); if (dataset.EditStatus == DatasetEditStatus.ContentsModified) { // Already in content edit mode return(dataset); } var details = await DatasetStorage.GetDatasetStorageDetails(id, token); var blobDetails = details as DatasetBlobStorageDetails; if (blobDetails == null) { throw new InvalidOperationException("Dataset storage must be blob storage."); } var datasetStorage = new DatasetStorage { Id = dataset.Id, DatasetName = dataset.Name, AccountName = blobDetails.Account }; await SasTokens.FindUniqueDatasetUpdateContainerName(datasetStorage); await SasTokens.CreateDatasetContainer(datasetStorage); dataset.EditStatus = DatasetEditStatus.ContentsModified; dataset.ContentEditAccount = datasetStorage.AccountName; dataset.ContentEditContainer = datasetStorage.ContainerName; dataset.OriginalStorageAccount = blobDetails.Account; dataset.OriginalStorageContainer = blobDetails.Container; return(await UpdateDatasetEditItemDocument(user, dataset, token)); }
private async Task <Func <Task> > SetupDataset(DatasetStorageItem dataset, Action <DatasetStorage> setParamsFn = null) { var sasTokens = Services.GetService <SasTokenService>(); var datasetStorage = new DatasetStorage { Id = dataset.Id, DatasetName = dataset.Name, AccountName = sasTokens.DefaultDatasetStorageAccount(), }; await sasTokens.FindUniqueDatasetContainerName(datasetStorage); await sasTokens.CreateDatasetContainer(datasetStorage); var blobClient = await Services.GetBlobClient(); var container = blobClient.GetContainerReference(datasetStorage.ContainerName); foreach (var testFile in TestFiles) { var blob = container.GetBlockBlobReference($"{testFile}.txt"); var content = $"{testFile}, generated {DateTime.UtcNow.ToString()}"; await blob.UploadTextAsync(content); } var datasetId = dataset.Id.ToString(); var requestOptions = new RequestOptions { PartitionKey = new PartitionKey(datasetId) }; var cosmosConfig = Services.GetService <IOptions <CosmosConfiguration> >().Value; var cosmosClient = await Services.GetCosmosClient(); var response = await cosmosClient.CreateDocumentAsync( UriFactory.CreateDocumentCollectionUri(cosmosConfig.Database, cosmosConfig.DatasetCollection), dataset, requestOptions); var containerDetails = new DatasetItemContainerDetails { DatasetId = dataset.Id, Account = datasetStorage.AccountName, Container = datasetStorage.ContainerName, Uri = container.Uri.ToString(), }; var link = new Attachment { Id = containerDetails.Name, ContentType = containerDetails.ContentType, MediaLink = containerDetails.Uri }; link.SetPropertyValue("storageType", "blob"); link.SetPropertyValue("container", containerDetails.Container); link.SetPropertyValue("account", containerDetails.Account); await cosmosClient.UpsertAttachmentAsync( response.Resource.SelfLink, link, requestOptions); setParamsFn?.Invoke(datasetStorage); return(async() => { await cosmosClient.DeleteDocumentAsync( UriFactory.CreateDocumentUri(cosmosConfig.Database, cosmosConfig.DatasetCollection, datasetId), requestOptions); await sasTokens.DeleteDatasetContainer(datasetStorage); }); }
private async Task <(ICollection <string> fileTypes, int fileCount, long fileSize, string containerUri)> CreateDatasetFileDocuments( DatasetImportProperties storage, CancellationToken cancellationToken) { Log.Add("Creating dataset file documents."); var credentials = new StorageCredentials(storage.AccountName, StorageConfig.Accounts[storage.AccountName]); var storageAcct = new CloudStorageAccount(credentials, true); var blobClient = storageAcct.CreateCloudBlobClient(); var blobContainer = blobClient.GetContainerReference(storage.ContainerName); BlobContinuationToken continuationToken = null; const bool useFlatBlobListing = true; const BlobListingDetails blobListingDetails = BlobListingDetails.None; const int maxBlobsPerRequest = 100; var parents = new HashSet <string>(); var extensions = new HashSet <string>(); int totalCount = 0; long totalSize = 0; var concurrencySemaphore = new SemaphoreSlim(MaxConcurrent); // Add file records var taskList = new List <Task>(); do { var listingResult = await blobContainer .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken) .ConfigureAwait(false); continuationToken = listingResult.ContinuationToken; var results = listingResult.Results .Cast <CloudBlockBlob>() .Where(r => r.Name != "_metadata.txt") .Select(blob => new { Segments = blob.Uri.Segments .Skip(2) .Select(s => s.Trim('/')) .Take(blob.Uri.Segments.Length - 3), File = new FileSystemItem { Id = Guid.NewGuid(), Name = Path.GetFileName(blob.Name), FullName = blob.Name, FileType = GetFileExtension(blob.Name), CanPreview = false, DatasetId = DatasetId, DataType = StorageDataType.FileSystem, EntryType = FileSystemEntryType.File, Length = blob.Properties.Length, Parent = Path.GetDirectoryName(blob.Name).Replace(@"\", @"/"), SortKey = GenerateSortKey("1", blob.Name), Modified = blob.Properties.LastModified ?? DateTimeOffset.UtcNow, }, Blob = new FileSystemItemBlobDetails { DatasetId = DatasetId, Account = storage.AccountName, Container = storage.ContainerName, Name = Path.GetFileName(blob.Name), ContentType = blob.Properties.ContentType, Uri = blob.Uri.ToString(), } }) .ToList(); foreach (var result in results) { result.Segments .Aggregate(new List <string>(), (list, s) => { list.Add(list.Count == 0 ? s : string.Concat(list[list.Count - 1], "/", s)); return(list); }) .ToList() .ForEach(p => parents.Add(p)); extensions.Add(result.File.FileType); await concurrencySemaphore.WaitAsync(cancellationToken).ConfigureAwait(false); var task = Task.Run(async() => { await DatasetStorage.CreateFileRecord(result.File, result.Blob).ConfigureAwait(false); concurrencySemaphore.Release(); if (Interlocked.Increment(ref totalCount) % 100 == 0) { Log.Add($"Loaded {totalCount} file records ..."); } }, cancellationToken); taskList.Add(task); } totalSize += results.Sum(t => t.File.Length ?? 0); } while (continuationToken != null); await Task.WhenAll(taskList).ConfigureAwait(false); Log.Add($"Loaded {totalCount} total file records."); // Add the file summary record var fileTypes = extensions.OrderBy(e => e).ToList(); var fileSummary = new FileSystemSummary { Id = Guid.NewGuid(), DatasetId = DatasetId, DataType = StorageDataType.FileSummary, FileCount = totalCount, Size = totalSize, FileTypes = fileTypes, }; await DatasetStorage.CreateFileSummaryRecord(fileSummary).ConfigureAwait(false); // Add file folder records foreach (var folder in parents) { var fileItem = new FileSystemItem { Id = Guid.NewGuid(), Name = Path.GetFileName(folder), FullName = folder, FileType = null, CanPreview = null, DatasetId = DatasetId, DataType = StorageDataType.FileSystem, EntryType = FileSystemEntryType.Folder, Length = null, Parent = Path.GetDirectoryName(folder).Replace(@"\", @"/"), SortKey = GenerateSortKey("0", folder), Modified = DateTimeOffset.UtcNow, }; await DatasetStorage.CreateFileRecord(fileItem).ConfigureAwait(false); } Log.Add($"Loaded {parents.Count} folder records."); return(fileTypes, totalCount, totalSize, blobContainer.StorageUri.PrimaryUri.ToString()); }
/// <summary> /// Attempts to find a unique container name for the dataset update. /// </summary> /// <param name="storage">The dataset storage details (will be updated).</param> /// <returns>Awaitable Task.</returns> public async Task FindUniqueDatasetUpdateContainerName(DatasetStorage storage) { var suffix = $"u{DateTime.UtcNow.ToString("yyyyMMdd")}"; await GetUniqueDatasetContainerName(storage, suffix); }
/// <summary> /// Attempts to find a unique container name for the dataset name. /// </summary> /// <param name="storage">The dataset storage details (will be updated).</param> /// <returns>Awaitable Task.</returns> public async Task FindUniqueDatasetContainerName(DatasetStorage storage) { await GetUniqueDatasetContainerName(storage); }
private async Task ReadDatasetFiles(DatasetBlobStorageDetails storage, CancellationToken cancellationToken) { Log.Add("Reading dataset files."); var ctx = new CompressContext { ContainerName = storage.Container, Buffer = new byte[32768], }; var credentials = new StorageCredentials(storage.Account, StorageConfig.Accounts[storage.Account]); var storageAcct = new CloudStorageAccount(credentials, true); var blobClient = storageAcct.CreateCloudBlobClient(); var blobContainer = blobClient.GetContainerReference(storage.Container); ctx.ArchiveContainer = blobClient.GetContainerReference(ctx.ArchiveContainerName); Log.Add($"Writing archives to {ctx.ArchiveContainer.Uri}"); await ctx.ArchiveContainer.CreateIfNotExistsAsync(); BlobContinuationToken continuationToken = null; const bool useFlatBlobListing = true; const BlobListingDetails blobListingDetails = BlobListingDetails.None; const int maxBlobsPerRequest = 100; int totalCount = 0; long totalSize = 0; await OpenOutputArchives(ctx, cancellationToken); do { var listingResult = await blobContainer .ListBlobsSegmentedAsync("", useFlatBlobListing, blobListingDetails, maxBlobsPerRequest, continuationToken, null, null, cancellationToken) .ConfigureAwait(false); continuationToken = listingResult.ContinuationToken; var results = listingResult.Results .Cast <CloudBlockBlob>() .Where(r => r.Name != "_metadata.txt") .Select(blob => new FileDetails { Name = Path.GetFileName(blob.Name), FullName = blob.Name, Length = blob.Properties.Length, Modified = blob.Properties.LastModified ?? DateTimeOffset.UtcNow, }) .ToList(); foreach (var result in results) { Log.Add($"- {result.FullName}"); ctx.Details = result; var blobReference = blobContainer.GetBlockBlobReference(result.FullName); await AddDatasetFileToArchive(ctx, blobReference, cancellationToken); } totalCount += results.Count(); totalSize += results.Sum(t => t.Length); } while (continuationToken != null); CloseOutputArchives(ctx, cancellationToken); (var zipSize, var tgzSize) = await GetArchiveDetails(ctx, cancellationToken); await DatasetStorage.UpdateDatasetCompressedDetails(DatasetId, zipSize, tgzSize); Console.WriteLine($"Compressed {totalCount:n0} total files, {totalSize:n0} bytes."); Console.WriteLine($"zip file: {zipSize:n0} bytes ({Ratio(totalSize, zipSize):n2}%)."); Console.WriteLine($"tgz file: {tgzSize:n0} bytes ({Ratio(totalSize, tgzSize):n2}%)."); }