public async Task InvokeAsync() { var instanceWorkerId = _workerRecordStoreService.GenerateUniqueId(); Console.WriteLine($"CommandExecuter {instanceWorkerId} starting..."); await _workerRecordStoreService.RecordPing("commandExecuter", instanceWorkerId); var commandQueueMessages = await _queueClient.Dequeue(_config.CommandQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); if (commandQueueMessages.Count == 0) { await _workerRecordStoreService.RecordHasTerminated("commandExecuter", instanceWorkerId); return; } foreach (var commandQueueMessage in commandQueueMessages) { var serializerSettings = new JsonSerializerSettings { TypeNameHandling = TypeNameHandling.All }; var command = JsonConvert.DeserializeObject <NoResultCommandWrapper>(commandQueueMessage.Message, serializerSettings); await _directCommandExecuter.ExecuteAsync(command); await _queueClient.MessageProcessed(_config.CommandQueueName, commandQueueMessage.MessageId); } await _workerRecordStoreService.RecordHasTerminated("commandExecuter", instanceWorkerId); Console.WriteLine($"Mapper {instanceWorkerId} Terminated"); }
public async Task InvokeAsync() { var instanceWorkerId = _workerRecordStoreService.GenerateUniqueId(); await _workerRecordStoreService.RecordPing("ingester", instanceWorkerId); var rawDataQueueMessages = await _queueClient.Dequeue(_config.RawDataQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); if (rawDataQueueMessages.Count == 0) { await _workerRecordStoreService.RecordHasTerminated("ingester", instanceWorkerId); return; } foreach (var rawDataQueueMessage in rawDataQueueMessages) { var rawDataObjectKey = rawDataQueueMessage.Message; Stream rawDataObjectStream = await _commandDispatcher.DispatchAsync(new RetrieveObjectCommand { Key = rawDataQueueMessage.Message }); var objectName = _keyRegex.Match(rawDataObjectKey).Groups["objectName"].Value; using (var objectReader = new StreamReader(rawDataObjectStream)) { while (!objectReader.EndOfStream) { using (var memoryStream = new MemoryStream()) { var ingestedObjectKey = $"{_config.IngestedFolder}/{objectName}-{Guid.NewGuid()}"; using (var sw = new StreamWriter(memoryStream)) { for (int i = 0; i < _config.IngesterMaxLinesPerFile && !objectReader.EndOfStream; i++) { var line = await objectReader.ReadLineAsync(); await sw.WriteLineAsync(line); } await sw.FlushAsync(); await _commandDispatcher.DispatchAsync(new StoreObjectCommand { Key = ingestedObjectKey, DataStream = memoryStream }); } await _queueClient.Enqueue(_config.IngestedQueueName, ingestedObjectKey); } } } await _queueClient.MessageProcessed(_config.RawDataQueueName, rawDataQueueMessage.MessageId); } await _workerRecordStoreService.RecordHasTerminated("ingester", instanceWorkerId); }
public async Task InvokeAsync() { var instanceWorkerId = _workerRecordStoreService.GenerateUniqueId(); Console.WriteLine($"Mapper {instanceWorkerId} starting..."); await _workerRecordStoreService.RecordPing("mapper", instanceWorkerId); var ingestedQueueMessages = await _queueClient.Dequeue(_config.IngestedQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); if (ingestedQueueMessages.Count == 0) { await _workerRecordStoreService.RecordHasTerminated("mapper", instanceWorkerId); return; } foreach (var ingestedQueueMessage in ingestedQueueMessages) { await _workerRecordStoreService.RecordPing("mapper", instanceWorkerId); Stream ingestedObjectStream = await _commandDispatcher.DispatchAsync(new RetrieveObjectCommand { Key = ingestedQueueMessage.Message }); using (var streamReader = new StreamReader(ingestedObjectStream)) { var lines = new List <string>(); while (!streamReader.EndOfStream) { var line = await streamReader.ReadLineAsync(); lines.Add(line); } await _commandDispatcher.DispatchAsync(new BatchMapDataCommand { Lines = lines, ContextQueueMessage = ingestedQueueMessage }); } await _queueClient.MessageProcessed(_config.IngestedQueueName, ingestedQueueMessage.MessageId); } await _workerRecordStoreService.RecordHasTerminated("mapper", instanceWorkerId); Console.WriteLine($"Mapper {instanceWorkerId} Terminated"); }
public async Task InvokeAsync() { var instanceWorkerId = _workerRecordStoreService.GenerateUniqueId(); await _workerRecordStoreService.RecordPing("reducer", instanceWorkerId); var mappedQueueMessages = await _queueClient.Dequeue(_config.MappedQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); var reducedQueueMessages = await _queueClient.Dequeue(_config.ReducedQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); if (mappedQueueMessages.Count == 0 && reducedQueueMessages.Count <= 1) { foreach (var reducedQueueMessage in reducedQueueMessages) { await _queueClient.ReturnMessageToQueue(_config.ReducedQueueName, reducedQueueMessage.MessageId); } await _workerRecordStoreService.RecordHasTerminated("reducer", instanceWorkerId); return; } var queueMessages = new List <QueueMessage>(); queueMessages.AddRange(mappedQueueMessages); queueMessages.AddRange(reducedQueueMessages); var inputCounts = new List <CompressedMostAccidentProneData>(); foreach (var queueMessage in queueMessages) { Stream mappedObjectStream = await _commandDispatcher.DispatchAsync(new RetrieveObjectCommand { Key = queueMessage.Message }); using (var streamReader = new StreamReader(mappedObjectStream)) { while (!streamReader.EndOfStream) { var line = await streamReader.ReadLineAsync(); try { var keyValuePairs = JsonConvert.DeserializeObject <List <CompressedMostAccidentProneData> >(line, new JsonSerializerSettings { TypeNameHandling = TypeNameHandling.Auto }); inputCounts.AddRange(keyValuePairs); } catch (JsonSerializationException e) { Console.WriteLine($"Error white deserialising value [{line}]"); } } } } await _commandDispatcher.DispatchAsync(new BatchReduceDataCommand { InputKeyValuePairs2 = inputCounts, ProcessedMessageIdsHash = ProcessedMessageIdsHash(queueMessages), QueueMessages = queueMessages.Select(x => x.Message).ToArray() }); // Without this being in a transaction, there is the risk of incorrect results MarkProcessed(_config.MappedQueueName, mappedQueueMessages); MarkProcessed(_config.ReducedQueueName, reducedQueueMessages); await _workerRecordStoreService.RecordHasTerminated("reducer", instanceWorkerId); // Transaction end }
public async Task InvokeAsync() { var instanceWorkerId = _workerRecordStoreService.GenerateUniqueId(); await _workerRecordStoreService.RecordPing("finalReducer", instanceWorkerId); var reducedQueueMessages = await _queueClient.Dequeue(_config.ReducedQueueName, _config.MaxQueueItemsBatchSizeToProcessPerWorker); if (reducedQueueMessages.Count != 1) { await _workerRecordStoreService.RecordHasTerminated("finalReducer", instanceWorkerId); //TODO consider logging and reutrning without throwing an exception throw new ApplicationException("Expected to find just one message on the reducer queue"); } var reducedQueueMessage = reducedQueueMessages.First(); using (var streamReader = new StreamReader(await _commandDispatcher.DispatchAsync(new RetrieveObjectCommand { Key = reducedQueueMessage.Message }))) using (var memoryStream = new MemoryStream()) using (var streamWriter = new StreamWriter(memoryStream)) { if (!streamReader.EndOfStream) { var line = await streamReader.ReadLineAsync(); var keyValuePairs = JsonConvert.DeserializeObject <List <CompressedMostAccidentProneData> >(line, new JsonSerializerSettings { TypeNameHandling = TypeNameHandling.Auto }); foreach (var keyValuePair in keyValuePairs) { var linesToWrite = (await _commandDispatcher.DispatchAsync( new FinalReducerFuncCommand { CompressedMostAccidentProneData = keyValuePair })).Result; foreach (var lineToWrite in linesToWrite) { await streamWriter.WriteLineAsync(lineToWrite); } } await streamWriter.FlushAsync(); var finalObjectKey = $"{_config.FinalReducedFolder}/{Guid.NewGuid()}"; await _commandDispatcher.DispatchAsync(new StoreObjectCommand { Key = finalObjectKey, DataStream = memoryStream }); await _queueClient.Enqueue(_config.FinalReducedQueueName, finalObjectKey); } } await _queueClient.MessageProcessed(_config.ReducedQueueName, reducedQueueMessage.MessageId); await _workerRecordStoreService.RecordHasTerminated("finalReducer", instanceWorkerId); }