public static DupeCollection FindDupes(MetricDict metrics) { Dictionary <AMetric, DupeFileCollection> filesByMetrics = new Dictionary <AMetric, DupeFileCollection>(); foreach (var kv in metrics) { if (kv.Value == null) { continue; } DupeFileCollection dfc; if (!filesByMetrics.TryGetValue(kv.Value, out dfc)) { dfc = new DupeFileCollection(kv.Value); filesByMetrics[kv.Value] = dfc; } dfc.Add(kv.Key); } DupeCollection dc = new DupeCollection(); foreach (var kv in filesByMetrics) { if (kv.Value.Count > 1) { dc.Add(kv.Value); } } return(dc); }
public MetricDict GenerateMetrics(IProgressReceiver progressReceiver) { int fileCount = 0; //get the total count of files asynchronously so we can start collecting metrics using the enumerator var fileCountTask = new Task(() => { var getFiles = FileFinder.GetFiles(targetDir, extensionsToProcess, searchOption); fileCount = getFiles.Length; }); fileCountTask.Start(); MetricDict metrics = new MetricDict(); var files = FileFinder.EnumerateFiles(targetDir, extensionsToProcess, searchOption); int currentFile = 0; //TODO: make atomic increments to mitigate race conditions Parallel.ForEach(files, new Action <FileInfo>((f) => { var metric = metricGen.Generate(f); if (metric != null) { metrics[f] = metric; } currentFile++; progressReceiver?.Update(new FractionalProgress(currentFile, fileCount, f.FullName)); })); return(metrics); }
public DuplicateFinderResult FindDuplicates() { OnProgress?.Invoke(new BasicProgress(0, "Starting up...")); SearchOption filesSearchOption = topDirOnly ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories; int fileCount = 0; //get the total count of files asynchronously so we can start collecting metrics using the enumerator var fileCountTask = new Task(() => { var getFiles = FileFinder.GetFiles(dir, AppSettings.Instance.extensionsToProcess, filesSearchOption); fileCount = getFiles.Length; }); fileCountTask.Start(); Dictionary <string, MetricDict> metricsPerGenerator = new Dictionary <string, MetricDict>(); foreach (var gen in generators) //should i make this a parallel foreach too? most of it is IO heavy, so only gain would be for saving and loading metrics { MetricDict fileMetrics = null; if (deleteCache) { MetricCache.DeleteCache(dir, gen.ID); } else { fileMetrics = MetricCache.LoadMetrics(dir, gen.ID); } if (fileMetrics == null) { var metricGen = new FolderMetricGenerator(gen, dir, AppSettings.Instance.extensionsToProcess, filesSearchOption); fileMetrics = metricGen.GenerateMetrics(this); MetricCache.SaveMetrics(fileMetrics, dir, gen.ID); } metricsPerGenerator[gen.ID] = fileMetrics; } DuplicateFinderResult dfr = new DuplicateFinderResult(); foreach (var gen in generators) { DupeCollection dc = PotentialDuplicateFinder.FindDupes(metricsPerGenerator[gen.ID]); dfr.dupesByGenerator[gen.ID] = dc; } OnProgress?.Invoke(new BasicProgress(1, "Done")); return(dfr); }
public static void SaveMetrics(MetricDict dict, DirectoryInfo directory, string genId) { string filename = GetMetricsFileName(directory, genId); JObject obj = new JObject(); JArray data = new JArray(); Dictionary <Type, int> typeDict = new Dictionary <Type, int>(); obj["directory"] = directory.FullName; obj["data"] = data; foreach (var kv in dict) { JObject entry = new JObject(); int typeId; if (!typeDict.TryGetValue(kv.Value.GetType(), out typeId)) { typeId = typeDict.Count; typeDict[kv.Value.GetType()] = typeId; } entry["type"] = typeId; entry["metric"] = JObject.FromObject(kv.Value); entry["file"] = kv.Key.FullName; data.Add(entry); } JObject typeDictJson = new JObject(); foreach (var kv in typeDict) { typeDictJson[kv.Value.ToString()] = kv.Key.AssemblyQualifiedName; } obj["types"] = typeDictJson; using (FileStream fs = new FileStream(filename, FileMode.Create)) using (StreamWriter sw = new StreamWriter(fs)) using (JsonTextWriter jtw = new JsonTextWriter(sw)) { obj.WriteTo(jtw); } }
public static MetricDict LoadMetrics(DirectoryInfo directory, string genId) { string filename = GetMetricsFileName(directory, genId); if (File.Exists(filename)) { MetricDict metricDict = new MetricDict(); JObject metrics; using (FileStream fs = new FileStream(filename, FileMode.Open)) using (StreamReader sr = new StreamReader(fs)) using (JsonTextReader jtr = new JsonTextReader(sr)) { metrics = JObject.Load(jtr); } //recover type dict but with id as index Dictionary <int, Type> typeDict = new Dictionary <int, Type>(); JObject typeDictJson = metrics["types"] as JObject; foreach (var kv in typeDictJson) { typeDict[int.Parse(kv.Key)] = Type.GetType(kv.Value.Value <string>()); } JArray data = metrics["data"] as JArray; foreach (JObject entry in data) { Type t = typeDict[entry["type"].Value <int>()]; AMetric metric = entry["metric"].ToObject(t) as AMetric; FileInfo file = new FileInfo(entry["file"].Value <string>()); metricDict[file] = metric; } return(metricDict); } return(null); }