///<summary>全ハッシュを一定サイズに分割してソートする ///分割されたファイルをマージソートすると完全なソート済み列が得られる</summary> public static async Task <int> QuickSortAll(int Index, long SortMask) { int FileCount = 0; var SortComp = new BlockSortComparer(SortMask); //これにソート用の配列を入れてメモリ割り当てを減らしてみる var LongPool = new ConcurrentBag <long[]>(); bool LongPoolReturn = true; //ソート用の配列の個数に制限を設ける var FirstSortSemaphore = new SemaphoreSlim(config.hash.InitialSortConcurrency); //ソートは並列 書き込みは並列させない var FirstSortBlock = new TransformBlock <FirstSort, FirstSort>(async(t) => { await QuickSortParllel(SortMask, t.ToSort, t.Length, SortComp).ConfigureAwait(false); return(t); }, new ExecutionDataflowBlockOptions() { SingleProducerConstrained = true, MaxDegreeOfParallelism = config.hash.InitialSortConcurrency, }); var WriterBlock = new ActionBlock <FirstSort>((t) => { using (var writer = new UnbufferedLongWriter(t.WriteFilePath)) { writer.WriteDestructive(t.ToSort, t.Length); } //ここでLongPoolに配列を返却する if (LongPoolReturn) { LongPool.Add(t.ToSort); } FirstSortSemaphore.Release(); }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = 1 }); FirstSortBlock.LinkTo(WriterBlock, new DataflowLinkOptions() { PropagateCompletion = true }); //まずはAllHashを読む using (var reader = new UnbufferedLongReader(AllHashFilePath)) { for (; reader.Readable; FileCount++) { await FirstSortSemaphore.WaitAsync().ConfigureAwait(false); if (!LongPool.TryTake(out var ToSort)) { ToSort = new long[InitialSortUnit]; } int ToSortLength = reader.Read(ToSort); FirstSortBlock.Post(new FirstSort(SortingFilePath(Index, FileCount), ToSort, ToSortLength)); } } //NewerHashを読む int ToSortNewerCursor = 0; await FirstSortSemaphore.WaitAsync().ConfigureAwait(false); if (!LongPool.TryTake(out var ToSortNewer)) { ToSortNewer = new long[InitialSortUnit]; } foreach (var filePath in Directory.EnumerateFiles(config.hash.TempDir, Path.GetFileName(NewerHashFilePathBase("*")))) { using (var reader = new BufferedLongReader(filePath)) { while (reader.Readable) { for (; ToSortNewerCursor < ToSortNewer.Length; ToSortNewerCursor++) { if (!reader.MoveNext(out var next)) { break; } ToSortNewer[ToSortNewerCursor] = next; } if (InitialSortUnit <= ToSortNewerCursor) { FirstSortBlock.Post(new FirstSort(SortingFilePath(Index, FileCount), ToSortNewer, ToSortNewer.Length)); FileCount++; ToSortNewerCursor = 0; await FirstSortSemaphore.WaitAsync().ConfigureAwait(false); if (!LongPool.TryTake(out ToSortNewer)) { ToSortNewer = new long[InitialSortUnit]; } } } } } //余った要素もソートさせる FirstSortingCountはもう使わないので放置 if (0 < ToSortNewerCursor) { FirstSortBlock.Post(new FirstSort(SortingFilePath(Index, FileCount), ToSortNewer, ToSortNewerCursor)); FileCount++; } FirstSortBlock.Complete(); //ソート用配列は作り終わったので用が済んだ配列は解放させる LongPoolReturn = false; LongPool.Clear(); await WriterBlock.Completion.ConfigureAwait(false); return(FileCount); }
///<summary>全ハッシュをファイルに書き出しながらソートしていくやつ</summary> public static async ValueTask <string> MergeSortAll(long SortMask) { long FileCount = 0; //最初はAllHashから読み出しながら個別のファイルを作る using (BufferedLongReader reader = new BufferedLongReader(AllHashFilePath)) { BlockSortComparer SortComp = new BlockSortComparer(SortMask); var FirstSortBlock = new ActionBlock <(string FilePath, long[] ToSort)>((t) => { Array.Sort(t.ToSort, SortComp); using (BufferedLongWriter w = new BufferedLongWriter(t.FilePath)) { foreach (long h in t.ToSort) { w.Write(h); } } }, new ExecutionDataflowBlockOptions() { SingleProducerConstrained = true, MaxDegreeOfParallelism = config.hash.FileSortThreads, BoundedCapacity = config.hash.FileSortThreads << 1 }); int InitialSortUnit = config.hash.InitialSortFileSize / sizeof(long); for (; reader.Length - reader.Position >= config.hash.InitialSortFileSize; FileCount++) { long[] ToSort = new long[InitialSortUnit]; for (int i = 0; i < InitialSortUnit; i++) { ToSort[i] = reader.Read(); } await FirstSortBlock.SendAsync((SortingFilePath(0, FileCount), ToSort)).ConfigureAwait(false); } int SortLastCount = (int)(reader.Length - reader.Position) / sizeof(long); if (SortLastCount > 0) { long[] ToSortLast = new long[SortLastCount]; for (int i = 0; i < SortLastCount; i++) { ToSortLast[i] = reader.Read(); } await FirstSortBlock.SendAsync((SortingFilePath(0, FileCount), ToSortLast)).ConfigureAwait(false); FileCount++; //最後に作ったから足す } FirstSortBlock.Complete(); FirstSortBlock.Completion.Wait(); } GC.Collect(); Random rand = new Random(); int step = 0; //ファイル単位でマージソートしていく for (; FileCount > 1; step++) { ActionBlock <int> MergeSortBlock = new ActionBlock <int>((i) => { MargeSortUnit(SortMask, SortingFilePath(step, i << 1), SortingFilePath(step, (i << 1) | 1), SortingFilePath(step + 1, i)); //余りファイルは最初のファイルにくっつけてしまう if (i == 0 && (FileCount & 1) != 0) { string NextFirstPath = SortingFilePath(step + 1, 0); File.Delete(NextFirstPath + "_"); File.Move(NextFirstPath, NextFirstPath + "_"); MargeSortUnit(SortMask, SortingFilePath(step, FileCount - 1), NextFirstPath + "_", NextFirstPath); } }, new ExecutionDataflowBlockOptions() { MaxDegreeOfParallelism = config.hash.FileSortThreads }); for (int i = 0; i < FileCount >> 1; i++) { MergeSortBlock.Post(i); } MergeSortBlock.Complete(); await MergeSortBlock.Completion.ConfigureAwait(false); FileCount >>= 1; //ソート後のファイル数はこうなる } return(SortingFilePath(step, 0)); }