public async void GivenCollectionOfMeasurements_WhenAddAsync_ThenAllEventsAreSentInASingleBatch_Test() { var mockEventDataBatch = EventHubsModelFactory.EventDataBatch( 10000, new List <EventData>(), new CreateBatchOptions() { PartitionKey = "partition123", }, (data) => true); _eventHubService.CreateEventDataBatchAsync(Arg.Any <string>()).Returns(mockEventDataBatch); var measurements = Enumerable.Range(0, 100).Select(i => { var mockMeasurement = Substitute.For <IMeasurement>(); mockMeasurement.DeviceId.Returns($"deviceId_{i}"); return(mockMeasurement); }); await _measurementCollector.AddAsync(measurements, default); await _eventHubService.Received(1).CreateEventDataBatchAsync("123"); await _eventHubService.Received(1) .SendAsync( Arg.Is <EventDataBatch>(data => data.Count == 100), default); }
public async Task AddAsync(IEnumerable <IMeasurement> items, CancellationToken cancellationToken = default) { EnsureArg.IsNotNull(items, nameof(items)); using (var hasher = _hashCodeFactory.CreateDeterministicHashCodeGenerator()) { var submissionTasks = items .GroupBy(m => { // cast as byte to restrict to 256 possible values. This will lead to a greater change of measurements ending up in the same bucket, // while providing partition keys with enough entropy for EventHub to better distribute them across partitions. return(hasher.GenerateHashCode(m.DeviceId.ToLower())); }) .Select(async grp => { var partitionKey = grp.Key; var currentEventDataBatch = await _eventHubService.CreateEventDataBatchAsync(partitionKey); foreach (var m in grp) { var measurementContent = JsonConvert.SerializeObject(m, Formatting.None); var contentBytes = Encoding.UTF8.GetBytes(measurementContent); var eventData = new EventData(contentBytes); if (!currentEventDataBatch.TryAdd(eventData)) { // The current EventDataBatch cannot hold any more events. Create a new EventDataBatch and add this new message to it. var newEventDataBatch = await _eventHubService.CreateEventDataBatchAsync(partitionKey); if (!newEventDataBatch.TryAdd(eventData)) { // The measurement event is greater than the size allowed by EventHub. Log and discard. Keep the existing batch as there may // be room for more events. // TODO in this case we should send this to a dead letter queue. We'd need to see how we can send it, as it is too big for EventHub... _telemetryLogger.LogError(new ArgumentOutOfRangeException($"A measurement event exceeded the maximum message batch size of {newEventDataBatch.MaximumSizeInBytes} bytes. It will be skipped.")); } else { // Submit the current batch, and replace the currentEventDataBatch with newEventDataBatch await _eventHubService.SendAsync(currentEventDataBatch, cancellationToken); currentEventDataBatch.Dispose(); currentEventDataBatch = newEventDataBatch; } } } // Send over the remaining events await _eventHubService.SendAsync(currentEventDataBatch, cancellationToken); }); await Task.WhenAll(submissionTasks); } }