Esempio n. 1
0
        public int Run(RunArgs ra)
        {
            bool interactive = !ra.InArgs.ContainsKey("-ni");

            var srcDirs          = new List <DirectoryInfo>();
            var srcs             = string.Empty;
            var skipLessThanSize = 1;
            var skipMoreThanSize = -1;
            var ignExt           = string.Empty;
            var iExt             = new List <string>();
            var compHash         = false;
            var recursive        = false;
            var inParallel       = 1;
            var delete           = false;
            var saveas           = string.Empty;
            var saveType         = string.Empty;

            ra.State.SearchPattern = "*.*";

            if (interactive)
            {
                Utils.ReadString("Enter folders to search into, separated by semicolon: ", ref srcs, true);
                Utils.ReadString("Search pattern (*.*): ", ref ra.State.SearchPattern);
                recursive = !Utils.ReadWord("Recursive search? (n/*): ", "n");
                Utils.ReadInt($"Skip if size < 1Kb: ", ref skipLessThanSize, false);
                Utils.ReadInt($"Skip if size > #Kb: ", ref skipMoreThanSize, false);
                Utils.ReadString("Skip extensions (.xyz): ", ref ignExt, false);
                compHash = Utils.ReadWord("Compare file names (default) or MD5 hashes? (h/*): ", "h");
            }
            else
            {
                if (ra.InArgs.ContainsKey("-dirs"))
                {
                    srcs = ra.InArgs.GetFirstValue("-dirs");
                }
                else
                {
                    throw new ArgumentNullException("-dirs");
                }
                if (ra.InArgs.ContainsKey("-sp"))
                {
                    ra.State.SearchPattern = ra.InArgs.GetFirstValue("-sp");
                }
                if (ra.InArgs.ContainsKey("-rec"))
                {
                    recursive = true;
                }
                if (ra.InArgs.ContainsKey("-j"))
                {
                    inParallel = int.Parse(ra.InArgs.GetFirstValue("-j"));
                }
                if (ra.InArgs.ContainsKey("-hash"))
                {
                    compHash = true;
                }
                if (ra.InArgs.ContainsKey("-del"))
                {
                    delete = true;
                }
                if (ra.InArgs.ContainsKey("-save"))
                {
                    saveas   = ra.InArgs.GetFirstValue("-save");
                    saveType = Path.GetExtension(saveas).Replace(".", "");
                }
            }

            if (!string.IsNullOrEmpty(ignExt))
            {
                foreach (var ext in ignExt.Split(';'))
                {
                    iExt.Add(ext.Trim());
                }
            }

            var useStreamReduction = false;
            var take = 4000;
            var skip = 10000;

            if (compHash && interactive)
            {
                Utils.ReadInt($"Concurrent readers (1-{Environment.ProcessorCount}): ", ref inParallel);
                "By default the hash is computed over the whole file.".PrintLine();
                "You can use skip and take parameters to read a portion of the file.".PrintLine();
                useStreamReduction = Utils.ReadWord("Do you want to use skip/take? (y/*): ", "y");

                if (useStreamReduction)
                {
                    "The reader always starts with a TAKE (position 0).".PrintLine();
                    Utils.ReadInt($"Take bytes ({take}): ", ref take, false);
                    Utils.ReadInt($"Skip bytes ({skip}): ", ref skip, false);

                    if (skip < 0 || take < 0)
                    {
                        throw new ArgumentOutOfRangeException("Negative skip or take value.");
                    }
                }
            }

            if (inParallel < 1 || inParallel > Environment.ProcessorCount)
            {
                throw new ArgumentOutOfRangeException("inParallel", $"The concurrent readers should be between 0 and {Environment.ProcessorCount}");
            }

            foreach (var p in srcs.Split(';'))
            {
                if (Directory.Exists(p))
                {
                    srcDirs.Add(new DirectoryInfo(p));
                }
                else
                {
                    throw new DirectoryNotFoundException(p);                 // will throw if the path is invalid
                }
            }
            var All       = new List <List <FileInfo> >();
            var searchOpt = recursive ? SearchOption.AllDirectories : SearchOption.TopDirectoryOnly;

            // Some subdirectories may be restricted and the GetFiles will throw with AccesDenied
            // so loop the individual folders separately.
            foreach (var d in srcDirs)
            {
                try
                {
                    foreach (var sd in d.EnumerateDirectories("*", searchOpt).Union(new DirectoryInfo[] { d }))
                    {
                        try
                        {
                            var L = new List <FileInfo>();

                            foreach (var file in sd.EnumerateFiles(ra.State.SearchPattern, SearchOption.TopDirectoryOnly))
                            {
                                if (iExt.Count > 0 && iExt.Contains(file.Extension))
                                {
                                    continue;
                                }
                                if (skipLessThanSize > 0 && file.Length < skipLessThanSize)
                                {
                                    continue;
                                }
                                if (skipMoreThanSize > 0 && file.Length > skipMoreThanSize)
                                {
                                    continue;
                                }

                                L.Add(file);
                            }

                            All.Add(L);
                        }
                        catch (Exception ex)
                        {
                            ex.Message.PrintLine(ConsoleColor.Red);
                        }
                    }
                }
                catch (Exception ex)
                {
                    ex.Message.PrintLine(ConsoleColor.Red);
                }
            }

            var dict              = new Dictionary <string, List <FileInfo> >();
            var sb                = new StringBuilder();
            var totalFiles        = 0;
            var totalFilesChecked = 0;

            // In all cases the length will be the same
            foreach (var src in All)
            {
                foreach (var fi in src)
                {
                    var key = $"{fi.Length}";
                    if (!dict.ContainsKey(key))
                    {
                        dict.Add(key, new List <FileInfo>());
                    }
                    else
                    {
                        totalFiles++;
                    }

                    totalFilesChecked++;
                    dict[key].Add(fi);
                }
            }


            totalFiles += totalFiles;

            // Either compare the names or the data hashes
            var hashDict        = new Dictionary <string, List <FileInfo> >();
            var totalDuplicates = 0;
            var counter         = 0;
            var hashDictSpin    = new SpinLock();

            $"{Environment.NewLine}Total files checked: {totalFilesChecked}".PrintLine();
            Console.CursorVisible = false;
            var cursorTop = Console.CursorTop;

            foreach (var kv in dict)
            {
                if (kv.Value.Count > 1)
                {
                    if (compHash)
                    {
                        Console.SetCursorPosition(0, cursorTop);

                        Parallel.For(0, kv.Value.Count, new ParallelOptions()
                        {
                            MaxDegreeOfParallelism = inParallel
                        }, (i) =>
                        {
                            try
                            {
                                Stream stream = File.OpenRead(kv.Value[i].FullName);
                                if (useStreamReduction)
                                {
                                    stream = new StreamReductor(stream, skip, take);
                                }

                                using (var md5 = MD5.Create())
                                    using (stream)
                                    {
                                        $"{++counter}/{totalFiles} [{kv.Value[i].Length / 1000}]Kb file{i} = {kv.Value[i].FullName}".PrintLine(ConsoleColor.Yellow);

                                        var h   = md5.ComputeHash(stream);
                                        var key = BitConverter.ToString(h);
                                        var acq = false;

                                        hashDictSpin.Enter(ref acq);

                                        if (acq)
                                        {
                                            if (!hashDict.ContainsKey(key))
                                            {
                                                hashDict.Add(key, new List <FileInfo>());
                                            }
                                            else
                                            {
                                                totalDuplicates++;
                                            }

                                            hashDict[key].Add(kv.Value[i]);
                                            hashDictSpin.Exit();
                                        }
                                    }
                            }
                            catch (Exception ex)
                            {
                                ex.Message.PrintSysError();
                                return;
                            }
                        });

                        var cursorTopNow = Console.CursorTop;
                        for (int top = cursorTop; top < cursorTopNow; top++)
                        {
                            for (int left = 0; left < Console.WindowWidth; left++)
                            {
                                Console.SetCursorPosition(left, top);
                                " ".Print();
                            }
                        }
                    }
                    else
                    {
                        foreach (var f in kv.Value)
                        {
                            var key = f.Name.ToLowerInvariant();

                            if (!hashDict.ContainsKey(key))
                            {
                                hashDict.Add(key, new List <FileInfo>());
                            }
                            else
                            {
                                totalDuplicates++;
                            }

                            hashDict[key].Add(f);
                            $"Comparing {++counter}/{totalFiles}".Print(ConsoleColor.Yellow);
                            Console.SetCursorPosition(0, Console.CursorTop);
                        }
                    }
                }
            }

            Console.SetCursorPosition(0, cursorTop);
            Console.CursorVisible = true;

            sb.AppendLine("Files with the same length and name/hash are grouped together.");
            sb.AppendLine();

            foreach (var kv in hashDict)
            {
                if (kv.Value.Count > 1)
                {
                    var    size = kv.Value[0].Length / 1000;
                    string lbl  = size < 1 ? lbl = $"{kv.Value[0].Length}b" : $"{size}Kb";

                    sb.AppendLine($"[{kv.Key}] {lbl}");
                    foreach (var p in kv.Value)
                    {
                        sb.AppendLine(p.FullName);
                    }

                    sb.AppendLine();
                }
            }


            $"There are {totalDuplicates} files with clones.".PrintLine();

            if (totalDuplicates > 0)
            {
                ra.Trace = interactive && Utils.ReadWord("Trace? (y/*): ", "y");

                if (ra.Trace)
                {
                    sb.ToString().PrintLine(ConsoleColor.Yellow);
                }

                // In case of -ni
                var opt = saveType;

                if (delete || (interactive && Utils.PickOption("Save results? (fdelete, xml, json, txt/*): ", ref opt, false, "fdelete", "xml", "json", "txt")))
                {
                    var fn   = string.Empty;
                    var data = string.Empty;

                    if (interactive)
                    {
                        Utils.ReadString("Result file path: ", ref fn, true);
                    }
                    else
                    {
                        fn = saveas;
                    }

                    if (opt == "fdelete" || delete)
                    {
                        var toDel = new StringBuilder();

                        foreach (var kv in hashDict)
                        {
                            if (kv.Value.Count > 1)
                            {
                                foreach (var f in kv.Value.Select(x => x.FullName).Skip(1))
                                {
                                    toDel.AppendLine(f);
                                    if (delete && File.Exists(f))
                                    {
                                        File.Delete(f);
                                    }
                                }
                            }
                        }

                        data = toDel.ToString();
                    }

                    if (opt == "txt")
                    {
                        data = sb.ToString();
                    }
                    else
                    {
                        var L = new List <Duplicate>();

                        foreach (var kv in hashDict)
                        {
                            if (kv.Value.Count > 1)
                            {
                                L.Add(new Duplicate(kv.Key, kv.Value[0].Length, kv.Value.Select(x => x.FullName).ToArray()));
                            }
                        }

                        if (opt == "json")
                        {
                            data = L.ToJson();
                        }
                        else
                        {
                            data = L.ToXml();
                        }
                    }

                    if (!string.IsNullOrEmpty(fn))
                    {
                        File.WriteAllText(fn, data);
                    }
                }
            }

            return(0);
        }