public static DupeCollection FindDupes(MetricDict metrics)
        {
            Dictionary <AMetric, DupeFileCollection> filesByMetrics = new Dictionary <AMetric, DupeFileCollection>();

            foreach (var kv in metrics)
            {
                if (kv.Value == null)
                {
                    continue;
                }
                DupeFileCollection dfc;
                if (!filesByMetrics.TryGetValue(kv.Value, out dfc))
                {
                    dfc = new DupeFileCollection(kv.Value);
                    filesByMetrics[kv.Value] = dfc;
                }
                dfc.Add(kv.Key);
            }

            DupeCollection dc = new DupeCollection();

            foreach (var kv in filesByMetrics)
            {
                if (kv.Value.Count > 1)
                {
                    dc.Add(kv.Value);
                }
            }
            return(dc);
        }
Exemplo n.º 2
0
        public MetricDict GenerateMetrics(IProgressReceiver progressReceiver)
        {
            int fileCount = 0;
            //get the total count of files asynchronously so we can start collecting metrics using the enumerator
            var fileCountTask = new Task(() =>
            {
                var getFiles = FileFinder.GetFiles(targetDir, extensionsToProcess, searchOption);
                fileCount    = getFiles.Length;
            });

            fileCountTask.Start();

            MetricDict metrics = new MetricDict();

            var files       = FileFinder.EnumerateFiles(targetDir, extensionsToProcess, searchOption);
            int currentFile = 0; //TODO: make atomic increments to mitigate race conditions

            Parallel.ForEach(files, new Action <FileInfo>((f) =>
            {
                var metric = metricGen.Generate(f);
                if (metric != null)
                {
                    metrics[f] = metric;
                }
                currentFile++;
                progressReceiver?.Update(new FractionalProgress(currentFile, fileCount, f.FullName));
            }));

            return(metrics);
        }
Exemplo n.º 3
0
        public DuplicateFinderResult FindDuplicates()
        {
            OnProgress?.Invoke(new BasicProgress(0, "Starting up..."));

            SearchOption filesSearchOption = topDirOnly ? SearchOption.TopDirectoryOnly : SearchOption.AllDirectories;

            int fileCount = 0;
            //get the total count of files asynchronously so we can start collecting metrics using the enumerator
            var fileCountTask = new Task(() =>
            {
                var getFiles = FileFinder.GetFiles(dir, AppSettings.Instance.extensionsToProcess, filesSearchOption);
                fileCount    = getFiles.Length;
            });

            fileCountTask.Start();

            Dictionary <string, MetricDict> metricsPerGenerator = new Dictionary <string, MetricDict>();

            foreach (var gen in generators) //should i make this a parallel foreach too? most of it is IO heavy, so only gain would be for saving and loading metrics
            {
                MetricDict fileMetrics = null;
                if (deleteCache)
                {
                    MetricCache.DeleteCache(dir, gen.ID);
                }
                else
                {
                    fileMetrics = MetricCache.LoadMetrics(dir, gen.ID);
                }
                if (fileMetrics == null)
                {
                    var metricGen = new FolderMetricGenerator(gen, dir, AppSettings.Instance.extensionsToProcess, filesSearchOption);
                    fileMetrics = metricGen.GenerateMetrics(this);
                    MetricCache.SaveMetrics(fileMetrics, dir, gen.ID);
                }
                metricsPerGenerator[gen.ID] = fileMetrics;
            }

            DuplicateFinderResult dfr = new DuplicateFinderResult();

            foreach (var gen in generators)
            {
                DupeCollection dc = PotentialDuplicateFinder.FindDupes(metricsPerGenerator[gen.ID]);
                dfr.dupesByGenerator[gen.ID] = dc;
            }

            OnProgress?.Invoke(new BasicProgress(1, "Done"));

            return(dfr);
        }
Exemplo n.º 4
0
        public static void SaveMetrics(MetricDict dict, DirectoryInfo directory, string genId)
        {
            string filename = GetMetricsFileName(directory, genId);

            JObject obj  = new JObject();
            JArray  data = new JArray();
            Dictionary <Type, int> typeDict = new Dictionary <Type, int>();

            obj["directory"] = directory.FullName;
            obj["data"]      = data;
            foreach (var kv in dict)
            {
                JObject entry = new JObject();

                int typeId;
                if (!typeDict.TryGetValue(kv.Value.GetType(), out typeId))
                {
                    typeId = typeDict.Count;
                    typeDict[kv.Value.GetType()] = typeId;
                }
                entry["type"]   = typeId;
                entry["metric"] = JObject.FromObject(kv.Value);
                entry["file"]   = kv.Key.FullName;
                data.Add(entry);
            }
            JObject typeDictJson = new JObject();

            foreach (var kv in typeDict)
            {
                typeDictJson[kv.Value.ToString()] = kv.Key.AssemblyQualifiedName;
            }
            obj["types"] = typeDictJson;

            using (FileStream fs = new FileStream(filename, FileMode.Create))
                using (StreamWriter sw = new StreamWriter(fs))
                    using (JsonTextWriter jtw = new JsonTextWriter(sw))
                    {
                        obj.WriteTo(jtw);
                    }
        }
Exemplo n.º 5
0
        public static MetricDict LoadMetrics(DirectoryInfo directory, string genId)
        {
            string filename = GetMetricsFileName(directory, genId);

            if (File.Exists(filename))
            {
                MetricDict metricDict = new MetricDict();

                JObject metrics;
                using (FileStream fs = new FileStream(filename, FileMode.Open))
                    using (StreamReader sr = new StreamReader(fs))
                        using (JsonTextReader jtr = new JsonTextReader(sr))
                        {
                            metrics = JObject.Load(jtr);
                        }

                //recover type dict but with id as index
                Dictionary <int, Type> typeDict = new Dictionary <int, Type>();
                JObject typeDictJson            = metrics["types"] as JObject;
                foreach (var kv in typeDictJson)
                {
                    typeDict[int.Parse(kv.Key)] = Type.GetType(kv.Value.Value <string>());
                }

                JArray data = metrics["data"] as JArray;
                foreach (JObject entry in data)
                {
                    Type     t      = typeDict[entry["type"].Value <int>()];
                    AMetric  metric = entry["metric"].ToObject(t) as AMetric;
                    FileInfo file   = new FileInfo(entry["file"].Value <string>());
                    metricDict[file] = metric;
                }
                return(metricDict);
            }
            return(null);
        }