public string[] SplitFileToSortedChunks(string sortedFilePath, long bytesOfMemoryToConsume, Func <string, int, string> getChunkPath)
        {
            AdjustMemoryLimit(ref bytesOfMemoryToConsume);

            List <string> chunkFiles = new List <string>();

            using (StreamReader reader = new StreamReader(sortedFilePath, Encoding.UTF8))
            {
                int chunkIndex = 0;
                while (!reader.EndOfStream)
                {
                    Console.Write($"Chunk:{chunkIndex} ");
                    List <string> chunkData = _chunkReader.ReadFileChunk(bytesOfMemoryToConsume, reader);
                    Console.WriteLine($"loaded;");

                    _sorter.SortChunk(chunkData, _comparer);
                    string chunkPath = getChunkPath(sortedFilePath, chunkIndex);
                    chunkFiles.Add(chunkPath);
                    File.WriteAllLines(chunkPath, chunkData);

                    chunkIndex++;
                }
            }

            DoLargeObjectHeapCompaction();

            return(chunkFiles.ToArray());
        }
Beispiel #2
0
        public string[] SplitFileToSortedChunks(string sortedFilePath, long bytesOfMemoryToConsume, Func <string, int, string> getChunkPath)
        {
            AdjustMemoryLimit(ref bytesOfMemoryToConsume);

            ConcurrentBag <string> chunkFiles = new ConcurrentBag <string>();

            List <Task> tasksStarted = new List <Task>();

            using (StreamReader reader = new StreamReader(sortedFilePath, Encoding.UTF8))
            {
                int chunkIndex = 0;
                while (!reader.EndOfStream)
                {
                    Console.Write($"Chunk:{chunkIndex} ");
                    List <string> chunk = _chunkReader.ReadFileChunk(bytesOfMemoryToConsume, reader);
                    Console.WriteLine($"loaded;");

                    var newTask = Task.Factory.StartNew((o) =>
                    {
                        var state = (Tuple <List <string>, int>)o;
                        List <string> chunkData = state.Item1;
                        int currentChunkIndex   = state.Item2;

                        _sorter.SortChunk(chunkData, _comparer);

                        string chunkPath = getChunkPath(sortedFilePath, currentChunkIndex);
                        chunkFiles.Add(chunkPath);
                        File.WriteAllLines(chunkPath, chunk);
                    }, new Tuple <List <string>, int>(chunk, chunkIndex));


                    tasksStarted.Add(newTask);

                    if (tasksStarted.Count > Environment.ProcessorCount - 1)
                    {
                        Task.WaitAny(tasksStarted.ToArray());
                        tasksStarted.RemoveAll(t => t.IsCompleted || t.IsCanceled || t.IsFaulted);
                    }

                    chunkIndex++;
                }

                Task.WaitAll(tasksStarted.ToArray());
            }

            // LOH is fragmented after splitting, so compaction is required to avoid occasional OOM later on
            DoLargeObjectHeapCompaction();

            return(chunkFiles.ToArray());
        }