public static void SelectAndDeleteDuplicates(FileDictionary fileDictionary) { foreach (var file in fileDictionary) { Console.WriteLine($"Duplicate found. Select which file to keep:"); int i = 1, maxSelection = 0, selection = -1; Dictionary <int, string> filePathDictionary = new Dictionary <int, string>(); foreach (var filePath in file.Value) { filePathDictionary.Add(i, filePath); Console.WriteLine($"{i}) {filePath}"); i++; } Console.WriteLine($"\nOr press 0 to skip.\n"); maxSelection = i - 1; while (selection < 0 || selection > maxSelection) { var userInput = Console.ReadLine(); if (!int.TryParse(userInput, out selection)) { selection = -1; Console.WriteLine($"Selection '{userInput}' is not valid. Please select a value between 0 and {maxSelection}"); } else if (selection < 0 || selection > maxSelection) { selection = -1; Console.WriteLine($"Selection '{userInput}' was out of bounds. Please select a value between 0 and {maxSelection}"); } } if (selection == 0) { Console.WriteLine("Skipping selection.\n"); continue; } else { Console.WriteLine($"{filePathDictionary[selection]} is selected. All others will be deleted."); file.Value.Remove(filePathDictionary[selection]); foreach (var filePath in file.Value) { Console.WriteLine($"Deleting file located at: {filePath}\n"); File.Delete(filePath); } } } }
static void Main(string[] args) { ParameterOptions.Parse(args); DupeFinder dupeFinder = new DupeFinder(); dupeFinder.DuplicateFound += OutputDuplicateFound; FileDictionary fileDictionary = dupeFinder.FindMatches(ParameterOptions.Directories, ParameterOptions.Filters); //Write the output files to csv Console.WriteLine(); Console.WriteLine($"\n{fileDictionary.Count} Matches Found\n"); File.Delete(ParameterOptions.Output); File.AppendAllLines(ParameterOptions.Output, fileDictionary.ToStringArray()); SelectAndDeleteDuplicates(fileDictionary); }
public FileDictionary FindMatches(List <string> searchPaths, List <string> filters) { FileDictionary fileDictionary = new FileDictionary(); List <string> filesStringList = new List <string>(); //Get all files to be processed. foreach (var directory in searchPaths) { filesStringList.AddRange(GetFilteredFiles(directory, filters)); } //Perform preliminary file scan on the first kilobyte. fileDictionary = PreliminaryFileScan(filesStringList); //Process records that represent potential matches //TODO: Revisit this, why are you using async? fileDictionary = Task.Run(() => ScanAllFiles(fileDictionary)).Result; return(fileDictionary); }
public FileDictionary PreliminaryFileScan(List <string> filePaths) { //Get contents of directory; int fileCounter = 0; int fileCount = filePaths.Count; FileDictionary returnFileDictionary = new FileDictionary(); Console.WriteLine($"\nBegin Preliminary File Scan: {filePaths.Count} Files"); foreach (var filePath in filePaths) { char outputChar = outpucChar_noMatch; var firstKilobyteHash = ByteTool.GetKilobyteMd5Hash(filePath); try { if (returnFileDictionary.Add(firstKilobyteHash, filePath)) { this.OnDuplicateFound(); } else { this.OnDuplicateNotFound(); } } catch (Exception ex) { throw ex; } Console.Write(outputChar); fileCounter++; if (fileCounter % 100 == 0) { Console.WriteLine(); } } return(returnFileDictionary); }
/// Process all preliminary scans and do a full byte comparison to see if they are actual matches. /// /// fileDictionary: A list of all possible matches. public async static Task <FileDictionary> ScanAllFiles(FileDictionary fileDictionary) { int fileCount = fileDictionary.Count; var filesForFullScan = fileDictionary.Where(x => x.Value.Count > 1); FileDictionary returnFileDictionary = new FileDictionary(); Console.WriteLine($"\n\nBegin Full File Scan: {filesForFullScan.Count()} Files"); int iterationCounter = 0; foreach (var fileDictionaryItem in filesForFullScan) { char outputChar = outpucChar_noMatch; string[] filePathArray = fileDictionaryItem.Value.ToArray(); //Scrolling through the array of potential file matches to determine if there is a full match. //referencePathPosition is the the reference file for the comparison. //comparePathPosition is the file being compared. for (int referencePathPosition = 0; referencePathPosition < filePathArray.Length; referencePathPosition++) { for (int comparePathPosition = referencePathPosition + 1; comparePathPosition < filePathArray.Length; comparePathPosition++) { //If the files are a byte match then we get the full hash for the dictionary for accurate tracking. //This is going to be a problem for really big files (over a couple of gigs). //I'll have to figure that out later. bool fileMatch = await ByteTool.CompareByteArray(filePathArray[referencePathPosition], filePathArray[comparePathPosition]); if (fileMatch) { using (MD5 crypt = MD5.Create()) { try { returnFileDictionary.Add(fileDictionaryItem.Key, filePathArray[referencePathPosition]); //This statement is an if because it's possible the file might have been added once during the comparions. //Right now the comparison looks like this: //| F1 | F2 | F3 | F4 | // If F1 matches F2 and F3 on the first sweep then on the second sweep F2 will match F3. //This means that it would appear to be another match, but really we already knew this. //There is probably a more convenient way of dropping those out. Probably I can just pop it out of the array. if (returnFileDictionary.Add(fileDictionaryItem.Key, filePathArray[comparePathPosition])) { outputChar = outputChar_match; } } catch (Exception ex) { throw ex; } } } Console.Write(outputChar); iterationCounter++; if (iterationCounter % 100 == 0) { Console.WriteLine(); } } } } return(returnFileDictionary); }