public async Task Split( string pathToSourceFile, ExternalRunRepository repository, int runSizeHintInBytes, int lineLengthHintInBytes, int maxDegreeOfParallelism ) { var inputFileInfo = new FileInfo(pathToSourceFile); var splitHelper = new FileSplitHelper(inputFileInfo, _logger); var runSplitInfo = splitHelper.GetRuns(runSizeHintInBytes); var workItems = Enumerable.Range(1, runSplitInfo.Count).Zip(runSplitInfo).Select(i => { var ch = i.Second; var splitter = new RunSorterInPlace(inputFileInfo, ch.offset, ch.length, lineLengthHintInBytes); return(new { Number = i.First, Run = ch, Splitter = splitter, TargetFile = repository.Rent() }); }); await workItems.AsyncParallelForEach(async workItem => { var workItemName = $"{workItem.Number, 5}: [{workItem.Run.offset}, {workItem.Run.offset + workItem.Run.length})"; _logger($"{workItemName,-40} reading"); var run = await workItem.Splitter.Read(); _logger($"{workItemName,-40} sorting"); workItem.Splitter.Sort(run); _logger($"{workItemName,-40} saving"); await workItem.Splitter.Save(run, workItem.TargetFile, returnBuffer: true); run.Buffer = null; _logger($"{workItemName,-40} done"); GC.Collect(); }, maxDegreeOfParallelism : maxDegreeOfParallelism); }
public async Task Test_FileSplitter_G_010_file() { var inputFile = @"f:\atesttask\G010.txt"; var tempDir = Path.Combine(Path.GetDirectoryName(inputFile), "Test_TempDirForRuns"); using var repository = new ExternalRunRepository(tempDir); await _fileSplitter.Split( pathToSourceFile : inputFile, repository : repository, runSizeHintInBytes : 100 * 1024 * 1024, lineLengthHintInBytes : 24, maxDegreeOfParallelism : 4 ); TestUtils.Logger("Done"); }
public async Task Test_FileSplitter_M_099_file() { var inputFile = @"f:\atesttask\M099.txt"; var tempDir = Path.Combine(Path.GetDirectoryName(inputFile), "Test_TempDirForRuns"); using var repository = new ExternalRunRepository(tempDir); await _fileSplitter.Split( pathToSourceFile : inputFile, repository : repository, runSizeHintInBytes : 100 * 1024 * 1024, lineLengthHintInBytes : 24, maxDegreeOfParallelism : 4 ); var singleRun = new DirectoryInfo(tempDir).GetFiles().First(); singleRun.Length.ShouldBe(new FileInfo(inputFile).Length); TestUtils.Logger("Done"); }