public static List <FileComparisonResult> Go(SortFilesSettings settings)
        {
            // first, just make an index that simply maps filesizes to filenames.
            // we don't need to compute any content-hashes yet, because if there
            // is only one file with that filesize, we know it's not a duplicate.
            var results = new List <FileComparisonResult>();
            var di      = new DirectoryInfo(settings.LeftDirectory);
            var index   = SortFilesSearchDuplicates.MapFilesizesToFilenames(
                settings.LeftDirectory,
                di.EnumerateFiles("*", SearchOption.AllDirectories));

            foreach (var list in index.Values)
            {
                if (list.Count > 1)
                {
                    // if there's more than one file with the same filesize,
                    // compute hashes of contents to look for duplicates.
                    for (int i = 0; i < list.Count; i++)
                    {
                        list[i].ContentHash = Utils.GetSha512(
                            settings.LeftDirectory + list[i].Filename);

                        // have we seen this hash before? this is an n-squared loop, but
                        // basically amortized by the cost of computing hashes.
                        for (int j = 0; j < i; j++)
                        {
                            if (list[j].ContentHash == list[i].ContentHash)
                            {
                                // consistently put the first-appearing file on the 'left' side
                                // so that the user can conveniently safely delete all on 'right'.
                                results.Add(new FileComparisonResult(
                                                list[j], list[i], FileComparisonResultType.Same_Contents));
                                break;
                            }
                        }
                    }
                }
            }

            return(results);
        }
Exemplo n.º 2
0
        static void TestMethod_MapFilesizesToFilenames()
        {
            var dirTest = TestUtil.GetTestSubDirectory("testMapFilesizesToFilenames");

            File.WriteAllText(Path.Combine(dirTest, "a.txt"), "abcd");
            File.WriteAllText(Path.Combine(dirTest, "b.txt"), "abcde");
            File.WriteAllText(Path.Combine(dirTest, "c.txt"), "1234");

            // adjust the lmt of c.txt
            File.SetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt"), DateTime.Now.AddDays(1));

            var map = SortFilesSearchDuplicates.MapFilesizesToFilenames(dirTest,
                                                                        new DirectoryInfo(dirTest).EnumerateFiles("*"));
            var mapSorted = (from item in map[4] orderby item.Filename select item).ToArray();

            TestUtil.IsEq(2, map.Count);
            TestUtil.IsEq(2, map[4].Count);
            TestUtil.IsEq(1, map[5].Count);

            // test that FileInfoForComparison was set correctly
            TestUtil.IsEq(Utils.Sep + "a.txt", mapSorted[0].Filename);
            TestUtil.IsEq(null, mapSorted[0].ContentHash);
            TestUtil.IsEq(4L, mapSorted[0].FileSize);
            TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "a.txt")),
                          mapSorted[0].LastModifiedTime);

            TestUtil.IsEq(Utils.Sep + "b.txt", map[5][0].Filename);
            TestUtil.IsEq(null, map[5][0].ContentHash);
            TestUtil.IsEq(5L, map[5][0].FileSize);
            TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "b.txt")),
                          map[5][0].LastModifiedTime);

            TestUtil.IsEq(Utils.Sep + "c.txt", mapSorted[1].Filename);
            TestUtil.IsEq(null, mapSorted[1].ContentHash);
            TestUtil.IsEq(4L, mapSorted[1].FileSize);
            TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt")),
                          mapSorted[1].LastModifiedTime);
        }