public async IAsyncEnumerable <Item> GetBalancedItems([EnumeratorCancellation] CancellationToken cancellationToken) { var allItems = await _persistenceProvider.GetItems(_options.Value.ConnectionString, _options.Value.CategorizedFacesContainerName, cancellationToken) .ToListAsync(); var grouped = allItems .GroupBy(item => Path.GetDirectoryName(item)) .Select(group => (Name: group.Key, Count: group.Count(), Items: group.AsEnumerable())) .GroupJoin(_options.Value.CategoriesToIgnore, tuple => tuple.Name.ToLower(), ignore => ignore.ToLower(), (tuple, ignored) => (tuple.Name, tuple.Count, tuple.Items, Include: !ignored.Any())) .Where(tuple => tuple.Include) .ToArray(); var minItems = grouped.Min(group => group.Count); _logger.LogInformation("Balaned input comprises {0} items per category", minItems); foreach (var group in grouped) { foreach (var blobName in group.Items.Take(minItems)) { var item = await _persistenceProvider.GetBlobItemAsync(_options.Value.ConnectionString, _options.Value.CategorizedFacesContainerName, blobName, stream => CreateItem(group.Name, stream), cancellationToken); yield return(item); } } }
public async Task ReturnAnEqualNumberOfItemsForEachName() { (var persistenceProvider, var subject) = CreateSubject(); var allItems = new[] { "Bob/e71d895b-b944-4642-b04d-2334de1ceb01.png", "Bob/b262626d-150f-438b-af20-1e105f987d3d.png", "Dave/ef5f09bf-b890-4859-94a4-4a4032cf9545.png", "Dave/1ff40435-aafd-44d2-aea1-178699a0e5ec.png", "Dave/053483ef-9b69-4de1-b9f7-4d65a712e362.png", "Colin/66af9bda-8e65-44b2-bc78-f5da0323cfdb.png", "Colin/0c14ec89-1ce4-4821-a5ce-333cc41bd4f7.png", "Colin/9914cf7f-c5cd-4688-850a-2c14fd7dd6bb.png", "Colin/479534b6-e679-4137-a202-93e8c60cf3ab.png" }; A.CallTo(() => persistenceProvider.GetItems(ConnectionString, ContainerName, A <CancellationToken> .Ignored)) .Returns(allItems.ToAsyncEnumerable()); A.CallTo(() => persistenceProvider.GetBlobItemAsync(ConnectionString, ContainerName, A <string> .Ignored, A <Func <Stream, Task <Examiner.Face.Data.Item> > > .Ignored, A <CancellationToken> .Ignored)) .ReturnsLazily(call => call.GetArgument <Func <Stream, Task <Examiner.Face.Data.Item> > >(3).Invoke(new MemoryStream())); var items = await subject.GetBalancedItems(CancellationToken.None).ToListAsync(); var result = items .GroupBy(item => item.Name) .Select(group => (Name: group.Key, Count: group.Count())) .Aggregate( (Count: (int?)null, AllEqual: true), (tuple, item) => tuple.Count.HasValue ? (tuple.Count, AllEqual: tuple.AllEqual && tuple.Count == item.Count) : (item.Count, AllEqual: true)); Assert.That(result.Count, Is.EqualTo(2)); Assert.That(result.AllEqual, Is.True); }