public async Task StartAsync(string inputFilePath, int taskId, int numPartitions) { // map IList <(TKey, TValue)> mappings = null; using (var fileStream = File.OpenRead(inputFilePath)) { mappings = await _mappingPhase.MapAsync(fileStream).ConfigureAwait(false); } // combine Dictionary <TKey, List <TValue> > mappingsCombined = new(); foreach (var mapping in mappings) { if (mappingsCombined.ContainsKey(mapping.Item1)) { mappingsCombined[mapping.Item1].Add(mapping.Item2); } else { mappingsCombined[mapping.Item1] = new() { mapping.Item2 }; } } // partition var partitions = _partitioningPhase.Partition(mappingsCombined, numPartitions); List <FileInfoDto> fileInfos = new(); // save each partition to a file foreach (var partition in partitions) { FileInfoDto fileInfo = new(); string fileName = $"mr-temp-{taskId}-{partition.Key}"; Directory.CreateDirectory(_settings.MappedOutputDirectory); string path = Path.Combine(_settings.MappedOutputDirectory, fileName); File.Delete(path); // clean the whole file before pure overwrite using FileStream tempFileStream = File.OpenWrite(path); await System.Text.Json.JsonSerializer .SerializeAsync(tempFileStream, partition.Value) .ConfigureAwait(false); fileInfo.FileSize = (int)tempFileStream.Length; fileInfo.FilePath = tempFileStream.Name; fileInfo.PartitionIndex = partition.Key; fileInfos.Add(fileInfo); } // report to master MapOutputInfoDto message = new() { TaskId = taskId, WorkerInfo = new() { WorkerUuid = _settings.WorkerUuid } }; message.FileInfos.AddRange(fileInfos); await _rpcClient.MapDoneAsync(message); } }