private void searchForIdenticalToolStripMenuItem_Click(object sender, EventArgs e) { Utils.MessageBox("We'll go through every supposedly-changed file and see if it is " + "truly a changed file, or if only the write-times are different."); var items = listView.Items.Cast <FileComparisonResult>().ToList(); StartBgAction(() => { // retrieve a list of the supposedly-changed items that are actually identical var results = SortFilesSearchDuplicates.SearchForIdenticalFilesWithDifferentWriteTimes( _settings.LeftDirectory, _settings.RightDirectory, items); WrapInvoke(() => { // show the result in the UI, in the left-most column Utils.MessageBox("Found " + results.Count + " identical file(s)."); foreach (var item in results) { item.SubItems[0].Text = "Identical Contents"; } listView.Refresh(); }); }); }
internal void RunSortFilesAction() { _results = new FileComparisonResult[] { }; switch (_action) { case SortFilesAction.SearchDifferences: _results = SortFilesSearchDifferences.Go(_settings).ToArray(); break; case SortFilesAction.SearchDuplicates: _results = SortFilesSearchDuplicates.Go(_settings).ToArray(); break; case SortFilesAction.SearchDuplicatesInOneDir: _results = SortFilesSearchDuplicatesInOneDir.Go(_settings).ToArray(); break; default: Utils.MessageErr("Unrecognized action."); break; } // update UI on main thread WrapInvoke(() => { listView_ColumnClick(null, new ColumnClickEventArgs(0)); listView.Columns[1].Width = -2; // autosize to the longest item in the column lblAction.Text = _caption + Utils.NL; lblAction.Text += "" + _results.Length + " file(s) listed:"; }); }
private void searchMovedFilesToolStripMenuItem_Click(object sender, EventArgs e) { Utils.MessageBox("We'll go through every deleted file (exists on the Left " + "but not the Right) and see if it is just the result of a " + "moved or renamed file (a file with same contents already exists on Right)."); var query = (from item in listView.Items.Cast <FileComparisonResult>() where item.Type == FileComparisonResultType.Left_Only select item).ToList(); StartBgAction(() => { // retrieve a list of the items that are actually moved files var results = SortFilesSearchDuplicates.SearchMovedFiles( _settings.LeftDirectory, _settings.RightDirectory, query); WrapInvoke(() => { // show the result in the UI, in the left-most column Utils.MessageBox("Found " + results.Count + " moved file(s)."); foreach (var item in results) { item.Item1.SubItems[0].Text = item.Item2; } listView.Refresh(); }); }); }
public static List <FileComparisonResult> Go(SortFilesSettings settings) { // first, just make an index that simply maps filesizes to filenames. // we don't need to compute any content-hashes yet, because if there // is only one file with that filesize, we know it's not a duplicate. var results = new List <FileComparisonResult>(); var di = new DirectoryInfo(settings.LeftDirectory); var index = SortFilesSearchDuplicates.MapFilesizesToFilenames( settings.LeftDirectory, di.EnumerateFiles("*", SearchOption.AllDirectories)); foreach (var list in index.Values) { if (list.Count > 1) { // if there's more than one file with the same filesize, // compute hashes of contents to look for duplicates. for (int i = 0; i < list.Count; i++) { list[i].ContentHash = Utils.GetSha512( settings.LeftDirectory + list[i].Filename); // have we seen this hash before? this is an n-squared loop, but // basically amortized by the cost of computing hashes. for (int j = 0; j < i; j++) { if (list[j].ContentHash == list[i].ContentHash) { // consistently put the first-appearing file on the 'left' side // so that the user can conveniently safely delete all on 'right'. results.Add(new FileComparisonResult( list[j], list[i], FileComparisonResultType.Same_Contents)); break; } } } } } return(results); }
static void TestMethod_MapFilesizesToFilenames() { var dirTest = TestUtil.GetTestSubDirectory("testMapFilesizesToFilenames"); File.WriteAllText(Path.Combine(dirTest, "a.txt"), "abcd"); File.WriteAllText(Path.Combine(dirTest, "b.txt"), "abcde"); File.WriteAllText(Path.Combine(dirTest, "c.txt"), "1234"); // adjust the lmt of c.txt File.SetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt"), DateTime.Now.AddDays(1)); var map = SortFilesSearchDuplicates.MapFilesizesToFilenames(dirTest, new DirectoryInfo(dirTest).EnumerateFiles("*")); var mapSorted = (from item in map[4] orderby item.Filename select item).ToArray(); TestUtil.IsEq(2, map.Count); TestUtil.IsEq(2, map[4].Count); TestUtil.IsEq(1, map[5].Count); // test that FileInfoForComparison was set correctly TestUtil.IsEq(Utils.Sep + "a.txt", mapSorted[0].Filename); TestUtil.IsEq(null, mapSorted[0].ContentHash); TestUtil.IsEq(4L, mapSorted[0].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "a.txt")), mapSorted[0].LastModifiedTime); TestUtil.IsEq(Utils.Sep + "b.txt", map[5][0].Filename); TestUtil.IsEq(null, map[5][0].ContentHash); TestUtil.IsEq(5L, map[5][0].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "b.txt")), map[5][0].LastModifiedTime); TestUtil.IsEq(Utils.Sep + "c.txt", mapSorted[1].Filename); TestUtil.IsEq(null, mapSorted[1].ContentHash); TestUtil.IsEq(4L, mapSorted[1].FileSize); TestUtil.IsEq(File.GetLastWriteTimeUtc(Path.Combine(dirTest, "c.txt")), mapSorted[1].LastModifiedTime); }
static void TestMethod_TestSearchMovedFiles() { var settings = new SortFilesSettings(); var left = TestUtil.GetTestSubDirectory("left_fndmved", true); var right = TestUtil.GetTestSubDirectory("right_fndmved", true); settings.LeftDirectory = left; settings.RightDirectory = right; // first, set up test files File.WriteAllText(left + Utils.Sep + "onlyleft.txt", "onlyL"); File.WriteAllText(left + Utils.Sep + "renamed1.txt", "renamed1"); File.WriteAllText(left + Utils.Sep + "renamed2.txt", "renamed2"); File.WriteAllText(left + Utils.Sep + "empty1.txt", ""); File.WriteAllText(left + Utils.Sep + "changed1.txt", "123"); File.WriteAllText(left + Utils.Sep + "same.txt", "s"); File.WriteAllText(right + Utils.Sep + "onlyright.txt", "onlyR"); File.WriteAllText(right + Utils.Sep + "renamed1.a", "renamed1"); File.WriteAllText(right + Utils.Sep + "renamed2.a", "renamed2"); File.WriteAllText(right + Utils.Sep + "empty1.a", ""); File.WriteAllText(right + Utils.Sep + "changed1.txt", "124"); File.WriteAllText(right + Utils.Sep + "same.txt", "s"); // set last-write-times var dtNow = DateTime.Now; foreach (var filename in Directory.EnumerateFiles(left).Concat( Directory.EnumerateFiles(right))) { File.SetLastWriteTimeUtc(filename, dtNow); } File.SetLastWriteTimeUtc(right + Utils.Sep + "changed1.txt", dtNow.AddDays(1)); // run search-for-differences var results = SortFilesSearchDifferences.Go(settings); var expectedDifferences = @"|empty1.a|Right |onlyright.txt|Right |renamed1.a|Right |renamed2.a|Right changed1.txt|changed1.txt|Changed empty1.txt||Left onlyleft.txt||Left renamed1.txt||Left renamed2.txt||Left"; CompareResultsToString(results, expectedDifferences); // run search for moved files var query = from item in results where item.Type == FileComparisonResultType.Left_Only select item; var resultsMoved = SortFilesSearchDuplicates.SearchMovedFiles( settings.LeftDirectory, settings.RightDirectory, query); TestUtil.IsEq(2, resultsMoved.Count); // the 0-length empty.txt isn't included in this list, we don't treat it as a duplicate TestUtil.IsEq(Utils.Sep + "renamed1.txt", resultsMoved[0].Item1.FileInfoLeft.Filename); TestUtil.IsEq(Utils.Sep + "renamed1.a", resultsMoved[0].Item2); TestUtil.IsEq(Utils.Sep + "renamed2.txt", resultsMoved[1].Item1.FileInfoLeft.Filename); TestUtil.IsEq(Utils.Sep + "renamed2.a", resultsMoved[1].Item2); }
static void TestMethod_TestSortFilesOperations() { // run the methods on actual files. first create combinations of modified/not modified. var settings = new SortFilesSettings(); settings.LeftDirectory = TestUtil.GetTestSubDirectory("left_fndmved", true); settings.RightDirectory = TestUtil.GetTestSubDirectory("right_fndmved", true); var filesCreated = CreateFileCombinations.Go( settings.LeftDirectory, settings.RightDirectory); TestUtil.IsEq( CreateFileCombinations.CountPossibleModifiedTimes() * CreateFileCombinations.CountPossibleContents() * CreateFileCombinations.CountPossibleFilenames() * ((1 * 2) + (3 * 2)), // ExtraCopies.None -> 2 files, the rest -> 3 files filesCreated); // search for duplicates in one dir, only ones it will find are 'extra copy on left.' var results = SortFilesSearchDuplicatesInOneDir.Go(settings); TestUtil.IsEq( CreateFileCombinations.CountPossibleModifiedTimes() * CreateFileCombinations.CountPossibleContents() * CreateFileCombinations.CountPossibleFilenames(), results.Count); // verify sort order. for each pair, the left side should sort first alphabetically var expectedDuplicates = @"MTimeAddTextMNameOneOnLeft.a|MTimeAddTextMNameOneOnLeft.a_1|Same_Contents MTimeAddTextSmNameOneOnLeft.a|MTimeAddTextSmNameOneOnLeft.a_1|Same_Contents MTimeAltTextMNameOneOnLeft.a|MTimeAltTextMNameOneOnLeft.a_1|Same_Contents MTimeAltTextSmNameOneOnLeft.a|MTimeAltTextSmNameOneOnLeft.a_1|Same_Contents MTimeSmTextMNameOneOnLeft.a|MTimeSmTextMNameOneOnLeft.a_1|Same_Contents MTimeSmTextSmNameOneOnLeft.a|MTimeSmTextSmNameOneOnLeft.a_1|Same_Contents SmTimeAddTextMNameOneOnLeft.a|SmTimeAddTextMNameOneOnLeft.a_1|Same_Contents SmTimeAddTextSmNameOneOnLeft.a|SmTimeAddTextSmNameOneOnLeft.a_1|Same_Contents SmTimeAltTextMNameOneOnLeft.a|SmTimeAltTextMNameOneOnLeft.a_1|Same_Contents SmTimeAltTextSmNameOneOnLeft.a|SmTimeAltTextSmNameOneOnLeft.a_1|Same_Contents SmTimeSmTextMNameOneOnLeft.a|SmTimeSmTextMNameOneOnLeft.a_1|Same_Contents SmTimeSmTextSmNameOneOnLeft.a|SmTimeSmTextSmNameOneOnLeft.a_1|Same_Contents"; CompareResultsToString(results, expectedDuplicates); // search for duplicates across directories // should find all files on the right marked 'SmText'. results = SortFilesSearchDuplicates.Go(settings); var countExpectedDuplicates = (from filename in Directory.EnumerateFiles(settings.RightDirectory) where filename.Contains("SmText") select filename).Count(); TestUtil.IsEq(countExpectedDuplicates, results.Count); // verify sort order expectedDuplicates = @"MTimeSmTextMNameNone.a|MTimeSmTextMNameNone.z|Same_Contents MTimeSmTextMNameOneOnLeft.a|MTimeSmTextMNameOneOnLeft.z|Same_Contents MTimeSmTextMNameOneOnRight.a|MTimeSmTextMNameOneOnRight.z|Same_Contents MTimeSmTextMNameOneOnRight.a|MTimeSmTextMNameOneOnRight.z_1|Same_Contents MTimeSmTextSmNameNone.a|MTimeSmTextSmNameNone.a|Same_Contents MTimeSmTextSmNameOneOnLeft.a|MTimeSmTextSmNameOneOnLeft.a|Same_Contents MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a|Same_Contents MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a_1|Same_Contents SmTimeSmTextMNameNone.a|SmTimeSmTextMNameNone.z|Same_Contents SmTimeSmTextMNameOneOnLeft.a|SmTimeSmTextMNameOneOnLeft.z|Same_Contents SmTimeSmTextMNameOneOnRight.a|SmTimeSmTextMNameOneOnRight.z|Same_Contents SmTimeSmTextMNameOneOnRight.a|SmTimeSmTextMNameOneOnRight.z_1|Same_Contents SmTimeSmTextSmNameNone.a|SmTimeSmTextSmNameNone.a|Same_Contents SmTimeSmTextSmNameOneOnLeft.a|SmTimeSmTextSmNameOneOnLeft.a|Same_Contents SmTimeSmTextSmNameOneOnRight.a|SmTimeSmTextSmNameOneOnRight.a|Same_Contents SmTimeSmTextSmNameOneOnRight.a|SmTimeSmTextSmNameOneOnRight.a_1|Same_Contents"; CompareResultsToString(results, expectedDuplicates); // search for duplicates across directories, but uses lmt as a shortcut (less thorough) // it will now think that the SmTimeAltText ones are equal because, // when it sees the lmt are the same, it treats them as the same and doesn't check hash settings.SearchDuplicatesCanUseFiletimes = true; results = SortFilesSearchDuplicates.Go(settings); settings.SearchDuplicatesCanUseFiletimes = false; expectedDuplicates = @"MTimeSmTextMNameNone.a|MTimeSmTextMNameNone.z|Same_Contents MTimeSmTextMNameOneOnLeft.a|MTimeSmTextMNameOneOnLeft.z|Same_Contents MTimeSmTextMNameOneOnRight.a|MTimeSmTextMNameOneOnRight.z|Same_Contents MTimeSmTextMNameOneOnRight.a|MTimeSmTextMNameOneOnRight.z_1|Same_Contents MTimeSmTextSmNameNone.a|MTimeSmTextSmNameNone.a|Same_Contents MTimeSmTextSmNameOneOnLeft.a|MTimeSmTextSmNameOneOnLeft.a|Same_Contents MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a|Same_Contents MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a_1|Same_Contents SmTimeAltTextSmNameNone.a|SmTimeAltTextSmNameNone.a|Same_Contents SmTimeAltTextSmNameOneOnLeft.a|SmTimeAltTextSmNameOneOnLeft.a|Same_Contents SmTimeAltTextSmNameOneOnRight.a|SmTimeAltTextSmNameOneOnRight.a|Same_Contents SmTimeSmTextMNameNone.a|SmTimeSmTextMNameNone.z|Same_Contents SmTimeSmTextMNameOneOnLeft.a|SmTimeSmTextMNameOneOnLeft.z|Same_Contents SmTimeSmTextMNameOneOnRight.a|SmTimeSmTextMNameOneOnRight.z|Same_Contents SmTimeSmTextMNameOneOnRight.a|SmTimeSmTextMNameOneOnRight.z_1|Same_Contents SmTimeSmTextSmNameNone.a|SmTimeSmTextSmNameNone.a|Same_Contents SmTimeSmTextSmNameOneOnLeft.a|SmTimeSmTextSmNameOneOnLeft.a|Same_Contents SmTimeSmTextSmNameOneOnRight.a|SmTimeSmTextSmNameOneOnRight.a|Same_Contents SmTimeSmTextSmNameOneOnRight.a|SmTimeSmTextSmNameOneOnRight.a_1|Same_Contents"; CompareResultsToString(results, expectedDuplicates); // search for differences in similar directories. results = SortFilesSearchDifferences.Go(settings); var expectedDifferences = @"|MTimeAddTextMNameNone.z|Right |MTimeAddTextMNameOneOnLeft.z|Right |MTimeAddTextMNameOneOnRight.z|Right |MTimeAddTextMNameOneOnRight.z_1|Right |MTimeAddTextSmNameOneOnRight.a_1|Right |MTimeAltTextMNameNone.z|Right |MTimeAltTextMNameOneOnLeft.z|Right |MTimeAltTextMNameOneOnRight.z|Right |MTimeAltTextMNameOneOnRight.z_1|Right |MTimeAltTextSmNameOneOnRight.a_1|Right |MTimeSmTextMNameNone.z|Right |MTimeSmTextMNameOneOnLeft.z|Right |MTimeSmTextMNameOneOnRight.z|Right |MTimeSmTextMNameOneOnRight.z_1|Right |MTimeSmTextSmNameOneOnRight.a_1|Right |SmTimeAddTextMNameNone.z|Right |SmTimeAddTextMNameOneOnLeft.z|Right |SmTimeAddTextMNameOneOnRight.z|Right |SmTimeAddTextMNameOneOnRight.z_1|Right |SmTimeAddTextSmNameOneOnRight.a_1|Right |SmTimeAltTextMNameNone.z|Right |SmTimeAltTextMNameOneOnLeft.z|Right |SmTimeAltTextMNameOneOnRight.z|Right |SmTimeAltTextMNameOneOnRight.z_1|Right |SmTimeAltTextSmNameOneOnRight.a_1|Right |SmTimeSmTextMNameNone.z|Right |SmTimeSmTextMNameOneOnLeft.z|Right |SmTimeSmTextMNameOneOnRight.z|Right |SmTimeSmTextMNameOneOnRight.z_1|Right |SmTimeSmTextSmNameOneOnRight.a_1|Right MTimeAddTextMNameNone.a||Left MTimeAddTextMNameOneOnLeft.a||Left MTimeAddTextMNameOneOnLeft.a_1||Left MTimeAddTextMNameOneOnRight.a||Left MTimeAddTextSmNameNone.a|MTimeAddTextSmNameNone.a|Changed MTimeAddTextSmNameOneOnLeft.a|MTimeAddTextSmNameOneOnLeft.a|Changed MTimeAddTextSmNameOneOnLeft.a_1||Left MTimeAddTextSmNameOneOnRight.a|MTimeAddTextSmNameOneOnRight.a|Changed MTimeAltTextMNameNone.a||Left MTimeAltTextMNameOneOnLeft.a||Left MTimeAltTextMNameOneOnLeft.a_1||Left MTimeAltTextMNameOneOnRight.a||Left MTimeAltTextSmNameNone.a|MTimeAltTextSmNameNone.a|Changed MTimeAltTextSmNameOneOnLeft.a|MTimeAltTextSmNameOneOnLeft.a|Changed MTimeAltTextSmNameOneOnLeft.a_1||Left MTimeAltTextSmNameOneOnRight.a|MTimeAltTextSmNameOneOnRight.a|Changed MTimeSmTextMNameNone.a||Left MTimeSmTextMNameOneOnLeft.a||Left MTimeSmTextMNameOneOnLeft.a_1||Left MTimeSmTextMNameOneOnRight.a||Left MTimeSmTextSmNameNone.a|MTimeSmTextSmNameNone.a|Changed MTimeSmTextSmNameOneOnLeft.a|MTimeSmTextSmNameOneOnLeft.a|Changed MTimeSmTextSmNameOneOnLeft.a_1||Left MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a|Changed SmTimeAddTextMNameNone.a||Left SmTimeAddTextMNameOneOnLeft.a||Left SmTimeAddTextMNameOneOnLeft.a_1||Left SmTimeAddTextMNameOneOnRight.a||Left SmTimeAddTextSmNameNone.a|SmTimeAddTextSmNameNone.a|Changed SmTimeAddTextSmNameOneOnLeft.a|SmTimeAddTextSmNameOneOnLeft.a|Changed SmTimeAddTextSmNameOneOnLeft.a_1||Left SmTimeAddTextSmNameOneOnRight.a|SmTimeAddTextSmNameOneOnRight.a|Changed SmTimeAltTextMNameNone.a||Left SmTimeAltTextMNameOneOnLeft.a||Left SmTimeAltTextMNameOneOnLeft.a_1||Left SmTimeAltTextMNameOneOnRight.a||Left SmTimeAltTextSmNameOneOnLeft.a_1||Left SmTimeSmTextMNameNone.a||Left SmTimeSmTextMNameOneOnLeft.a||Left SmTimeSmTextMNameOneOnLeft.a_1||Left SmTimeSmTextMNameOneOnRight.a||Left SmTimeSmTextSmNameOneOnLeft.a_1||Left"; CompareResultsToString(results, expectedDifferences); // account for all 96 files. // (SortFilesSearchDifferences doesn't check hashes, so although it knows // AddText are different because filesize changes, // it won't detect AltText unless filesize or lmt are also different.) var expectedSame = @"SmTimeAltTextSmNameNone.a|SmTimeAltTextSmNameNone.a SmTimeAltTextSmNameOneOnLeft.a|SmTimeAltTextSmNameOneOnLeft.a SmTimeAltTextSmNameOneOnRight.a|SmTimeAltTextSmNameOneOnRight.a SmTimeSmTextSmNameNone.a|SmTimeSmTextSmNameNone.a SmTimeSmTextSmNameOneOnLeft.a|SmTimeSmTextSmNameOneOnLeft.a SmTimeSmTextSmNameOneOnRight.a|SmTimeSmTextSmNameOneOnRight.a"; TestUtil.IsEq(filesCreated, CountFilenames(expectedDifferences) + CountFilenames(expectedSame)); // search for identical files with different write times // will find all with MTimeSmText var found = SortFilesSearchDuplicates.SearchForIdenticalFilesWithDifferentWriteTimes( settings.LeftDirectory, settings.RightDirectory, results); var expectedIdenticalContents = @"MTimeSmTextSmNameNone.a|MTimeSmTextSmNameNone.a|Changed MTimeSmTextSmNameOneOnLeft.a|MTimeSmTextSmNameOneOnLeft.a|Changed MTimeSmTextSmNameOneOnRight.a|MTimeSmTextSmNameOneOnRight.a|Changed"; CompareResultsToString(found, expectedIdenticalContents); }