public static Stats Run(IEnumerable <TSV_DATA> findA, IEnumerable <TSV_DATA> findB, DeltaWriter writer, out List <string> newDirs, out List <string> delDirs, string sortedAfilename, string sortedBfilename) { newDirs = null; delDirs = null; Stats stats = null; List <TSV_DATA> newFiles; List <TSV_DATA> delFiles; Console.Error.WriteLine("trying diff on given data"); bool wasSorted = true; try { stats = DoDiff(findA, findB, writer, out newDirs, out delDirs, out newFiles, out delFiles); } catch (ApplicationException appEx) { wasSorted = false; Console.Error.WriteLine(appEx.Message); Console.Error.WriteLine("data was not sorted!"); } Task writeSortedA = Task.CompletedTask; Task writeSortedB = Task.CompletedTask; if (!wasSorted) { Console.Error.WriteLine("reading files again and sorting"); Task <List <TSV_DATA> > sortedA = SortData(findA, "A"); Task <List <TSV_DATA> > sortedB = SortData(findB, "B"); while (!Task.WaitAll(new Task[] { sortedA, sortedB }, millisecondsTimeout: 5000)) { try { var proc = System.Diagnostics.Process.GetCurrentProcess(); Console.Error.WriteLine($"virtMem: {Misc.GetPrettyFilesize(proc.VirtualMemorySize64)}"); } catch { } } Console.Error.WriteLine($"number sorted items A/B: {sortedA.Result.Count}/{sortedB.Result.Count}"); writeSortedA = (sortedAfilename == null) ? Task.CompletedTask : WriteTsvData(sortedAfilename, sortedA.Result); writeSortedB = (sortedBfilename == null) ? Task.CompletedTask : WriteTsvData(sortedBfilename, sortedB.Result); Console.Error.WriteLine("running diff on sorted data"); stats = DoDiff(sortedA.Result, sortedB.Result, writer, out newDirs, out delDirs, out newFiles, out delFiles); } File.WriteAllLines(@".\DelDirsBeforeCompress.txt", delDirs); delDirs.Sort(); IEnumerable <string> compressDelDirs = CompressToBaseDirs(delDirs); delDirs = compressDelDirs.ToList(); Task.WaitAll(new Task[] { writeSortedA, writeSortedB }); return(stats); }
private static Stats DoDiff(IEnumerable <TSV_DATA> sortedA, IEnumerable <TSV_DATA> sortedB, DeltaWriter writer, out List <string> newDirs, out List <string> delDirs, out List <TSV_DATA> newFiles, out List <TSV_DATA> delFiles) { List <string> tmpNewDirs = new List <string>(); List <string> tmpDelDirs = new List <string>(); List <TSV_DATA> tmpNewFiles = new List <TSV_DATA>(); List <TSV_DATA> tmpDelFiles = new List <TSV_DATA>(); Stats stats = new Stats(); uint diff = Spi.Data.Diff.DiffSortedEnumerables <TSV_DATA>( sortedA, sortedB, KeyComparer: (TSV_DATA a, TSV_DATA b) => { int cmp = String.Compare(a.relativeFilename, b.relativeFilename, StringComparison.OrdinalIgnoreCase); if (cmp != 0) { return(cmp); } bool KindOfA = Spi.Misc.IsDirectoryFlagSet(a.dwFileAttributes); bool KindOfB = Spi.Misc.IsDirectoryFlagSet(b.dwFileAttributes); return(KindOfA == KindOfB ? 0 // two directories OR two files --> same name --> return 0 : -1); // one dir AND one file --> same name --> return -1 to represent the difference }, AttributeComparer: (TSV_DATA a, TSV_DATA b) => { if (Misc.IsDirectoryFlagSet(a.dwFileAttributes) && Misc.IsDirectoryFlagSet(b.dwFileAttributes)) { return(0); } long cmp; if ((cmp = (a.timeModified - b.timeModified)) != 0) { return((int)cmp); } if ((cmp = (long)(a.size - b.size)) != 0) { return((int)cmp); } return(0); }, OnCompared: (DIFF_STATE state, TSV_DATA a, TSV_DATA b) => { switch (state) { case DIFF_STATE.NEW: if (Misc.IsDirectoryFlagSet(b.dwFileAttributes)) { tmpNewDirs.Add(b.relativeFilename); } else { writer.newFilesWriter.WriteLine(b.relativeFilename); tmpNewFiles.Add(b); stats.newFiles += 1; stats.newFilesSize += b.size; } break; case DIFF_STATE.MODIFY: writer.modFilesWriter.WriteLine(b.relativeFilename); stats.modFiles += 1; stats.modFilesSize += b.size; break; case DIFF_STATE.DELETE: if (Misc.IsDirectoryFlagSet(a.dwFileAttributes)) { tmpDelDirs.Add(a.relativeFilename); } else { writer.delFilesWriter.WriteLine(a.relativeFilename); tmpDelFiles.Add(a); stats.delFiles += 1; stats.delFilesSize += a.size; } break; } }, checkSortOrder: true); newDirs = tmpNewDirs; delDirs = tmpDelDirs; newFiles = tmpNewFiles; delFiles = tmpDelFiles; return(stats); }