/// <summary> /// Splits big file into some chunks by matching starting characters in each line /// </summary> /// <param name="inputFileName">Big file name</param> /// <param name="chars">Number of starting characters to split by</param> private void SplitFile(string inputFileName, int chars) { var files = new Dictionary <string, FileChunk>(StringComparer.CurrentCulture); using (var sr = new StreamReader(inputFileName, Encoding)) { while (sr.Peek() >= 0) { var line = sr.ReadLine(); DataItem entry = DataItem.FromString(line); //The length of the line is less than the current number of characters we split by //In this cases we add the line to the non-sorted file if (line.Length < chars) { ChunkInfo nameInfo; if (!chunks.TryGetValue(line, out nameInfo)) { chunks.Add(line, nameInfo = new ChunkInfo()); } nameInfo.AddSmallString(line, Encoding); } //Otherwise we add the line to the file corresponding to the first char characters of the line else { string start = entry.Text.Substring(0, chars); FileChunk sfi; if (!files.TryGetValue(start, out sfi)) { sfi = new FileChunk(Encoding); files.Add(start, sfi); } sfi.Append(line, Encoding); } } } //For each of the chunk we check if size of the chunk is still greater than the maxFileSize foreach (var file in files) { file.Value.Close(); //If it is - split to smaller chunks if (file.Value.Size > maxFileSize) { SplitFile(file.Value.FileName, chars + 1); File.Delete(file.Value.FileName); } //Otherwise save it to the dictionary else { SortFile(file.Value.FileName, file.Value.FileName); ChunkInfo nameInfo; if (!chunks.TryGetValue(file.Key, out nameInfo)) { chunks.Add(file.Key, nameInfo = new ChunkInfo()); } nameInfo.FileName = file.Value.FileName; } } }
/// <summary> /// Sorts content of the specified file /// </summary> /// <param name="inputFileName">File to sort</param> /// <param name="outputFileName">File to write results to</param> private void SortFile(string inputFileName, string outputFileName) { var info = new FileInfo(inputFileName); var entries = new List <DataItem>((int)(info.Length / 4L)); using (var sr = new StreamReader(inputFileName, Encoding)) { while (sr.Peek() >= 0) { entries.Add(DataItem.FromString(sr.ReadLine())); } } Sorter.QuicksortParallel(entries, 0, entries.Count - 1, Comparer); using (var sw = new StreamWriter(outputFileName, false, Encoding)) { foreach (var entry in entries) { sw.WriteLine(entry); } } }