public static List <FileComparisonResult> Go(SortFilesSettings settings) { // first, just make an index that simply maps filesizes to filenames. // we don't need to compute any content-hashes yet, because if there // is only one file with that filesize, we know it's not a duplicate. var results = new List <FileComparisonResult>(); var di = new DirectoryInfo(settings.LeftDirectory); var index = SortFilesSearchDuplicates.MapFilesizesToFilenames( settings.LeftDirectory, di.EnumerateFiles("*", SearchOption.AllDirectories)); foreach (var list in index.Values) { if (list.Count > 1) { // if there's more than one file with the same filesize, // compute hashes of contents to look for duplicates. for (int i = 0; i < list.Count; i++) { list[i].ContentHash = Utils.GetSha512( settings.LeftDirectory + list[i].Filename); // have we seen this hash before? this is an n-squared loop, but // basically amortized by the cost of computing hashes. for (int j = 0; j < i; j++) { if (list[j].ContentHash == list[i].ContentHash) { // consistently put the first-appearing file on the 'left' side // so that the user can conveniently safely delete all on 'right'. results.Add(new FileComparisonResult( list[j], list[i], FileComparisonResultType.Same_Contents)); break; } } } } } return(results); }
static void TestMethod_MapFilesizesToFilenames() { var dirTest = TestUtil.GetTestSubDirectory("testMapFilesizesToFilenames"); File.WriteAllText(Path.Combine(dirTest, "a.txt"), "abcd"); File.WriteAllText(Path.Combine(dirTest, "b.txt"), "abcde"); File.WriteAllText(Path.Combine(dirTest, "c.txt"), "1234"); // adjust the lmt of c.txt File.SetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt"), DateTime.Now.AddDays(1)); var map = SortFilesSearchDuplicates.MapFilesizesToFilenames(dirTest, new DirectoryInfo(dirTest).EnumerateFiles("*")); var mapSorted = (from item in map[4] orderby item.Filename select item).ToArray(); TestUtil.IsEq(2, map.Count); TestUtil.IsEq(2, map[4].Count); TestUtil.IsEq(1, map[5].Count); // test that FileInfoForComparison was set correctly TestUtil.IsEq(Utils.Sep + "a.txt", mapSorted[0].Filename); TestUtil.IsEq(null, mapSorted[0].ContentHash); TestUtil.IsEq(4L, mapSorted[0].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "a.txt")), mapSorted[0].LastModifiedTime); TestUtil.IsEq(Utils.Sep + "b.txt", map[5][0].Filename); TestUtil.IsEq(null, map[5][0].ContentHash); TestUtil.IsEq(5L, map[5][0].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "b.txt")), map[5][0].LastModifiedTime); TestUtil.IsEq(Utils.Sep + "c.txt", mapSorted[1].Filename); TestUtil.IsEq(null, mapSorted[1].ContentHash); TestUtil.IsEq(4L, mapSorted[1].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt")), mapSorted[1].LastModifiedTime); }