public string[] SplitFileToSortedChunks(string sortedFilePath, long bytesOfMemoryToConsume, Func <string, int, string> getChunkPath) { AdjustMemoryLimit(ref bytesOfMemoryToConsume); List <string> chunkFiles = new List <string>(); using (StreamReader reader = new StreamReader(sortedFilePath, Encoding.UTF8)) { int chunkIndex = 0; while (!reader.EndOfStream) { Console.Write($"Chunk:{chunkIndex} "); List <string> chunkData = _chunkReader.ReadFileChunk(bytesOfMemoryToConsume, reader); Console.WriteLine($"loaded;"); _sorter.SortChunk(chunkData, _comparer); string chunkPath = getChunkPath(sortedFilePath, chunkIndex); chunkFiles.Add(chunkPath); File.WriteAllLines(chunkPath, chunkData); chunkIndex++; } } DoLargeObjectHeapCompaction(); return(chunkFiles.ToArray()); }
public string[] SplitFileToSortedChunks(string sortedFilePath, long bytesOfMemoryToConsume, Func <string, int, string> getChunkPath) { AdjustMemoryLimit(ref bytesOfMemoryToConsume); ConcurrentBag <string> chunkFiles = new ConcurrentBag <string>(); List <Task> tasksStarted = new List <Task>(); using (StreamReader reader = new StreamReader(sortedFilePath, Encoding.UTF8)) { int chunkIndex = 0; while (!reader.EndOfStream) { Console.Write($"Chunk:{chunkIndex} "); List <string> chunk = _chunkReader.ReadFileChunk(bytesOfMemoryToConsume, reader); Console.WriteLine($"loaded;"); var newTask = Task.Factory.StartNew((o) => { var state = (Tuple <List <string>, int>)o; List <string> chunkData = state.Item1; int currentChunkIndex = state.Item2; _sorter.SortChunk(chunkData, _comparer); string chunkPath = getChunkPath(sortedFilePath, currentChunkIndex); chunkFiles.Add(chunkPath); File.WriteAllLines(chunkPath, chunk); }, new Tuple <List <string>, int>(chunk, chunkIndex)); tasksStarted.Add(newTask); if (tasksStarted.Count > Environment.ProcessorCount - 1) { Task.WaitAny(tasksStarted.ToArray()); tasksStarted.RemoveAll(t => t.IsCompleted || t.IsCanceled || t.IsFaulted); } chunkIndex++; } Task.WaitAll(tasksStarted.ToArray()); } // LOH is fragmented after splitting, so compaction is required to avoid occasional OOM later on DoLargeObjectHeapCompaction(); return(chunkFiles.ToArray()); }