public List <ScraperMaster> LevenIteration(List <ScraperMaster> games, string searchStr) { int levCount = 0; List <ScraperMaster> temp = new List <ScraperMaster>(); while (levCount <= 10) { levCount++; double it = Convert.ToDouble(levCount) / 10; List <ScraperMaster> found = FuzzySearch.FSearch(StripSymbols(searchStr.ToLower()), SearchCollection, it); if (found.Count == 1) { return(found); } if (found.Count > 1) { // multiple entries returned temp = new List <ScraperMaster>(); temp.AddRange(found); } if (found.Count == 0) { return(temp); } } return(temp); }
/// <summary> /// Fuzzy string matching (not currently used) /// </summary> /// <param name="searchStr"></param> /// <param name="manualIterator"></param> private void StartFuzzySearch(string searchStr, int manualIterator) { // start iterator if (manualIterator > 0) { } else { LocalIterationCount++; manualIterator = LocalIterationCount; } // setup fuzzystring options based on iteration List <FuzzyStringComparisonOptions> fuzzOptions = new List <FuzzyStringComparisonOptions>(); FuzzyStringComparisonTolerance tolerance; switch (manualIterator) { /* Iterations to widen the selection */ // first auto iteration - strong matching using substring, subsequence and overlap coefficient case 1: //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient); tolerance = FuzzyStringComparisonTolerance.Normal; break; // second iteration - same as the first but with normal matching case 2: //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring); fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient); tolerance = FuzzyStringComparisonTolerance.Normal; break; // 3rd auto iteration - same as the first but with weak matching case 3: //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring); fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient); tolerance = FuzzyStringComparisonTolerance.Weak; break; /* Iterations to narrow down selection */ // first manual iteration case 100: //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring); //fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient); fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity); fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance); fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient); tolerance = FuzzyStringComparisonTolerance.Strong; break; default: // end and return return; } // iterate through each gamesdb game in the list foreach (ScraperMaster g in SystemCollection) { bool result = searchStr.ApproximatelyEquals(g.GDBTitle, tolerance, fuzzOptions.ToArray()); if (result == true) { // match found - add to searchcollection SearchCollection.Add(g); } else { // match not found } } if (SearchCollection.Count == 1) { WorkingSearchCollection = SearchCollection; return; } // Check whether the actual game name contains the actual search //GDBPlatformGame gp = SystemCollection.Where(a => StripSymbols(a.GameTitle.ToLower()).Contains(searchStr)).FirstOrDefault(); List <ScraperMaster> gp = SystemCollection.Where(a => AddTrailingWhitespace(a.GDBTitle.ToLower()).Contains(AddTrailingWhitespace(SearchString))).ToList(); if (gp == null) { // nothing found - proceed to other searches } else { if (gp.Count > 1) { // multiples found - wipe out search collection and create a new one SearchCollection = new List <ScraperMaster>(); SearchCollection.AddRange(gp); } else { // only 1 entry found - return SearchCollection = new List <ScraperMaster>(); SearchCollection.AddRange(gp); WorkingSearchCollection = new List <ScraperMaster>(); WorkingSearchCollection.AddRange(gp); return; } } // we should now have a pretty wide SearchCollection - count how many matched words Dictionary <ScraperMaster, int> totals = new Dictionary <ScraperMaster, int>(); foreach (ScraperMaster g in SearchCollection) { int matchingWords = 0; // get total substrings in search string string[] arr = BuildArray(searchStr); int searchLength = arr.Length; // get total substrings in result string string[] rArr = BuildArray(g.GDBTitle); int resultLength = rArr.Length; // find matching words foreach (string s in arr) { int i = 0; while (i < resultLength) { if (StripSymbols(s) == StripSymbols(rArr[i])) { matchingWords++; break; } i++; } } // add to dictionary with count totals.Add(g, matchingWords); } // order dictionary totals.OrderByDescending(a => a.Value); // get max value var maxValueRecord = totals.OrderByDescending(v => v.Value).FirstOrDefault(); int maxValue = maxValueRecord.Value; // select all records that have the max value List <ScraperMaster> matches = (from a in totals where a.Value == maxValue select a.Key).ToList(); if (matches.Count == 1) { // single match found WorkingSearchCollection = new List <ScraperMaster>(); WorkingSearchCollection.AddRange(matches); return; } // run levenshetein fuzzy search on SearchCollection - 10 iterations int levCount = 0; while (levCount <= 10) { levCount++; double it = Convert.ToDouble(levCount) / 10; List <ScraperMaster> found = FuzzySearch.FSearch(searchStr, SearchCollection, it); //WorkingSearchCollection = new List<GDBPlatformGame>(); if (found.Count == 1) { // one entry returned WorkingSearchCollection = new List <ScraperMaster>(); WorkingSearchCollection.AddRange(found); return; } if (found.Count > 1) { // multiple entries returned } if (found.Count == 0) { } //WorkingSearchCollection.AddRange(found); //return; } //return; // check how many matches we have if (SearchCollection.Count == 1) { WorkingSearchCollection = new List <ScraperMaster>(); WorkingSearchCollection.Add(SearchCollection.Single()); return; } if (SearchCollection.Count > 1) { // add to working search collection WorkingSearchCollection.AddRange(SearchCollection.ToList()); // clear SearchCollection //SearchCollection = new List<GDBPlatformGame>(); // try the first word string[] arr = BuildArray(searchStr); int i = 0; string builder = ""; while (i < arr.Length) { if (i == 0) { builder += arr[i]; } else { builder += " " + arr[i]; } string b = StripSymbols(builder).ToLower(); var s = SystemCollection.Where(a => a.GDBTitle.ToLower().Contains(b)).ToList(); if (s.Count == 1) { // one entry returned - this is the one to keep WorkingSearchCollection = new List <ScraperMaster>(); //SearchCollection = new List<GDBPlatformGame>(); WorkingSearchCollection.Add(s.Single()); return; } if (s.Count > 1) { // still multiple entries returned - single match not found - continue WorkingSearchCollection = new List <ScraperMaster>(); WorkingSearchCollection.AddRange(s); //SearchCollection = new List<GDBPlatformGame>(); } if (s.Count == 0) { // no matches returned - this should never happen } i++; } // multiple matches found - run search again from the beginning but remove FIRST substring //StartFuzzySearch(searchStr, 100); return; /* * string[] arr = BuildArray(searchStr); * StartFuzzySearch(BuildSearchString(arr.Take(0).ToArray()), 1); * // multiple matches found - run search again from the beginning but remove last substring * StartFuzzySearch(BuildSearchString(arr.Take(arr.Count() - 1).ToArray()), 1); */ } if (SearchCollection.Count == 0) { // no matches found - run this method again with the next iterator (slightly weaker tolerance) StartFuzzySearch(searchStr, 0); } }