/// <summary> /// Returns all possible matches from a given list of files /// </summary> /// <param name="files">A list containing the information for each file</param> /// <returns>A dictionary where the key is the file hash and the value is the list of files that are identical></returns> private static Dictionary<string, List<FileHashPair>> GetMatchesFromFiles(List<FileInfo> files) { Dictionary<string, List<FileHashPair>> possibleMatches = new Dictionary<string, List<FileHashPair>>(); List<FileHashPair> fileHashPairs = new List<FileHashPair>(); ProgressInfo info = new ProgressInfo(); // We can eliminate a lot of possibilities by comparing file size. // Each file that does not have a matching file with the same size cannot possibly have // the same file hash as another file and can be safely discarded. if (files.Count > 2) { files = new List<FileInfo>(files.OrderBy(p => p.Length)); List<int> indexesToRemove = new List<int>(); for (int i = 0; i < files.Count; i++) { if (i == 0 && files[i].Length != files[i + 1].Length) { indexesToRemove.Add(i); } else if (i == files.Count - 1 && files[i].Length != files[i - 1].Length) { indexesToRemove.Add(i); } else if ((i != 0 && i != files.Count - 1) && files[i].Length != files[i - 1].Length && files[i].Length != files[i + 1].Length) { indexesToRemove.Add(i); } } int indicesRemoved = 0; foreach (var index in indexesToRemove) { files.RemoveAt(index - indicesRemoved); indicesRemoved++; } } // Calculate the hash value for each file foreach (FileInfo file in files) { FileHashPair fileHashPair = new FileHashPair(file.FullName, Cryptography.GetMD5Hash(file), file.Length); fileHashPairs.Add(fileHashPair); if (HashProgressUpdate != null) { HashProgressUpdate(fileHashPairs.Count, files.Count); } if (progressWorker != null && progressWorker.IsBusy && progressWorker.WorkerReportsProgress) { info.FilesProcessed = fileHashPairs.Count; info.TotalNumberOfFiles = files.Count; progressWorker.ReportProgress((int)((fileHashPairs.Count / (double)files.Count) * 100), info); } if (progressWorker != null && progressWorker.WorkerSupportsCancellation && progressWorker.CancellationPending) { possibleMatches.Clear(); return possibleMatches; } } foreach (FileHashPair fileHashPair in fileHashPairs) { if (possibleMatches.ContainsKey(fileHashPair.FileHash)) { (possibleMatches[fileHashPair.FileHash]).Add(fileHashPair); } else { possibleMatches.Add(fileHashPair.FileHash, new List<FileHashPair>()); possibleMatches[fileHashPair.FileHash].Add(fileHashPair); } } var orphanKeys = new List<string>(); foreach (var possibleMatch in possibleMatches.Keys) { if (possibleMatches[possibleMatch].Count == 1) { orphanKeys.Add(possibleMatch); } } foreach (var orphanKey in orphanKeys) { possibleMatches.Remove(orphanKey); } return possibleMatches; }
public bool Equals(FileHashPair fileHashPair) { if ((object)fileHashPair == null) { return false; } return FileHash != null && (FileHash == fileHashPair.FileHash); }