public static DupeCollection FindDupes(MetricDict metrics) { Dictionary <AMetric, DupeFileCollection> filesByMetrics = new Dictionary <AMetric, DupeFileCollection>(); foreach (var kv in metrics) { if (kv.Value == null) { continue; } DupeFileCollection dfc; if (!filesByMetrics.TryGetValue(kv.Value, out dfc)) { dfc = new DupeFileCollection(kv.Value); filesByMetrics[kv.Value] = dfc; } dfc.Add(kv.Key); } DupeCollection dc = new DupeCollection(); foreach (var kv in filesByMetrics) { if (kv.Value.Count > 1) { dc.Add(kv.Value); } } return(dc); }
public DuplicateFinderResult FindDuplicates() { OnProgress?.Invoke(new BasicProgress(0, "Starting up...")); SearchOption filesSearchOption = topDirOnly ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories; int fileCount = 0; //get the total count of files asynchronously so we can start collecting metrics using the enumerator var fileCountTask = new Task(() => { var getFiles = FileFinder.GetFiles(dir, AppSettings.Instance.extensionsToProcess, filesSearchOption); fileCount = getFiles.Length; }); fileCountTask.Start(); Dictionary <string, MetricDict> metricsPerGenerator = new Dictionary <string, MetricDict>(); foreach (var gen in generators) //should i make this a parallel foreach too? most of it is IO heavy, so only gain would be for saving and loading metrics { MetricDict fileMetrics = null; if (deleteCache) { MetricCache.DeleteCache(dir, gen.ID); } else { fileMetrics = MetricCache.LoadMetrics(dir, gen.ID); } if (fileMetrics == null) { var metricGen = new FolderMetricGenerator(gen, dir, AppSettings.Instance.extensionsToProcess, filesSearchOption); fileMetrics = metricGen.GenerateMetrics(this); MetricCache.SaveMetrics(fileMetrics, dir, gen.ID); } metricsPerGenerator[gen.ID] = fileMetrics; } DuplicateFinderResult dfr = new DuplicateFinderResult(); foreach (var gen in generators) { DupeCollection dc = PotentialDuplicateFinder.FindDupes(metricsPerGenerator[gen.ID]); dfr.dupesByGenerator[gen.ID] = dc; } OnProgress?.Invoke(new BasicProgress(1, "Done")); return(dfr); }