private List <ImgHash> FindDuplicatesTo(ImgHash hash, int minSimilarity, ref List <ImgHash> alreadyMarkedAsDupl) { var currHashDupl = new List <ImgHash>(); foreach (var hashCompareWith in _hashLib) { if (hash.CompareWith(hashCompareWith) >= minSimilarity) { if (!alreadyMarkedAsDupl.Contains(hash)) { alreadyMarkedAsDupl.Add(hash); currHashDupl.Add(hash); } if (!alreadyMarkedAsDupl.Contains(hashCompareWith)) { alreadyMarkedAsDupl.Add(hashCompareWith); currHashDupl.Add(hashCompareWith); } } } return(currHashDupl); }
//This comment can be fixed for faster duplicates search in future //public List<List<ImgHash>> FindDuplicatesWithTollerance(int minSimilarity = 90, bool useFastSearch = true) //{ // List<ImgHash> duplicatesFound = new List<ImgHash>(); // var result = new List<List<ImgHash>>(); // ParallelTaskRunner ptr = new ParallelTaskRunner(); // var ranges = ptr.CalculateTaskRanges(0, _hashLib.Count, 1); // ptr.Add(new Task(() => { // FindDuplicatesInRange(0, _hashLib.Count, minSimilarity, ref result, ref duplicatesFound); // })); // ptr.StartAllTasksAndWaitForFinish(); // result = result.Where(a => a.Count >= 2).ToList(); // return result; //} //public void FindDuplicatesInRange(int start, int end, int minSimilarity, ref List<List<ImgHash>> result, ref List<ImgHash> duplicatesFound) //{ // for (int i = start; i < end; i++) // { // var lastAddedLst = new List<ImgHash>(); // result.Add(lastAddedLst); // if (!duplicatesFound.Contains(_hashLib[i])) // { // foreach (var anotherHash in _hashLib) // { // if (_hashLib[i].CompareWith(anotherHash) > minSimilarity) // { // if (!duplicatesFound.Contains(_hashLib[i])) // { // duplicatesFound.Add(_hashLib[i]); // lastAddedLst.Add(_hashLib[i]); // } // if (!duplicatesFound.Contains(anotherHash)) // { // duplicatesFound.Add(anotherHash); // lastAddedLst.Add(anotherHash); // } // } // } // } // } //} public void AddPicByPath(string path) { var hash = new ImgHash(_hashSize); hash.GenerateFromPath(path); _hashLib.Add(hash); }
/// <summary> /// Method to compare 2 image hashes /// </summary> /// <returns>% of similarity</returns> public double CompareWith(ImgHash compareWith) { if (HashData.Length != compareWith.HashData.Length) { throw new Exception("Cannot compare hashes with different sizes"); } int differenceCounter = 0; for (int i = 0; i < HashData.Length; i++) { if (HashData[i] != compareWith.HashData[i]) { differenceCounter++; } } return(100 - differenceCounter / 100.0 * HashData.Length / 2.0); }