コード例 #1
0
        public async Task StartAsync(string inputFilePath, int taskId, int numPartitions)
        {
            // map
            IList <(TKey, TValue)> mappings = null;

            using (var fileStream = File.OpenRead(inputFilePath))
            {
                mappings = await _mappingPhase.MapAsync(fileStream).ConfigureAwait(false);
            }

            // combine
            Dictionary <TKey, List <TValue> > mappingsCombined = new();

            foreach (var mapping in mappings)
            {
                if (mappingsCombined.ContainsKey(mapping.Item1))
                {
                    mappingsCombined[mapping.Item1].Add(mapping.Item2);
                }
                else
                {
                    mappingsCombined[mapping.Item1] = new() { mapping.Item2 };
                }
            }

            // partition
            var partitions = _partitioningPhase.Partition(mappingsCombined, numPartitions);
            List <FileInfoDto> fileInfos = new();

            // save each partition to a file
            foreach (var partition in partitions)
            {
                FileInfoDto fileInfo = new();

                string fileName = $"mr-temp-{taskId}-{partition.Key}";
                Directory.CreateDirectory(_settings.MappedOutputDirectory);
                string path = Path.Combine(_settings.MappedOutputDirectory, fileName);
                File.Delete(path);  // clean the whole file before pure overwrite
                using FileStream tempFileStream = File.OpenWrite(path);
                await System.Text.Json.JsonSerializer
                .SerializeAsync(tempFileStream, partition.Value)
                .ConfigureAwait(false);

                fileInfo.FileSize       = (int)tempFileStream.Length;
                fileInfo.FilePath       = tempFileStream.Name;
                fileInfo.PartitionIndex = partition.Key;
                fileInfos.Add(fileInfo);
            }

            // report to master
            MapOutputInfoDto message = new()
            {
                TaskId     = taskId,
                WorkerInfo = new()
                {
                    WorkerUuid = _settings.WorkerUuid
                }
            };

            message.FileInfos.AddRange(fileInfos);
            await _rpcClient.MapDoneAsync(message);
        }
    }