Пример #1
0
        public List <ScraperMaster> LevenIteration(List <ScraperMaster> games, string searchStr)
        {
            int levCount = 0;
            List <ScraperMaster> temp = new List <ScraperMaster>();

            while (levCount <= 10)
            {
                levCount++;
                double it = Convert.ToDouble(levCount) / 10;
                List <ScraperMaster> found = FuzzySearch.FSearch(StripSymbols(searchStr.ToLower()), SearchCollection, it);

                if (found.Count == 1)
                {
                    return(found);
                }
                if (found.Count > 1)
                {
                    // multiple entries returned
                    temp = new List <ScraperMaster>();
                    temp.AddRange(found);
                }
                if (found.Count == 0)
                {
                    return(temp);
                }
            }
            return(temp);
        }
Пример #2
0
        /// <summary>
        /// Fuzzy string matching (not currently used)
        /// </summary>
        /// <param name="searchStr"></param>
        /// <param name="manualIterator"></param>
        private void StartFuzzySearch(string searchStr, int manualIterator)
        {
            // start iterator
            if (manualIterator > 0)
            {
            }
            else
            {
                LocalIterationCount++;
                manualIterator = LocalIterationCount;
            }

            // setup fuzzystring options based on iteration
            List <FuzzyStringComparisonOptions> fuzzOptions = new List <FuzzyStringComparisonOptions>();
            FuzzyStringComparisonTolerance      tolerance;

            switch (manualIterator)
            {
            /* Iterations to widen the selection */
            // first auto iteration - strong matching using substring, subsequence and overlap coefficient
            case 1:
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient);
                tolerance = FuzzyStringComparisonTolerance.Normal;
                break;

            // second iteration - same as the first but with normal matching
            case 2:
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient);
                tolerance = FuzzyStringComparisonTolerance.Normal;
                break;

            // 3rd auto iteration - same as the first but with weak matching
            case 3:
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient);
                tolerance = FuzzyStringComparisonTolerance.Weak;
                break;

            /* Iterations to narrow down selection */
            // first manual iteration
            case 100:
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseNormalizedLevenshteinDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubsequence);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaccardDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseHammingDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseJaroWinklerDistance);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseLongestCommonSubstring);
                //fuzzOptions.Add(FuzzyStringComparisonOptions.UseOverlapCoefficient);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseRatcliffObershelpSimilarity);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseSorensenDiceDistance);
                fuzzOptions.Add(FuzzyStringComparisonOptions.UseTanimotoCoefficient);
                tolerance = FuzzyStringComparisonTolerance.Strong;
                break;

            default:
                // end and return
                return;
            }

            // iterate through each gamesdb game in the list
            foreach (ScraperMaster g in SystemCollection)
            {
                bool result = searchStr.ApproximatelyEquals(g.GDBTitle, tolerance, fuzzOptions.ToArray());
                if (result == true)
                {
                    // match found - add to searchcollection
                    SearchCollection.Add(g);
                }
                else
                {
                    // match not found
                }
            }

            if (SearchCollection.Count == 1)
            {
                WorkingSearchCollection = SearchCollection;
                return;
            }

            // Check whether the actual game name contains the actual search
            //GDBPlatformGame gp = SystemCollection.Where(a => StripSymbols(a.GameTitle.ToLower()).Contains(searchStr)).FirstOrDefault();
            List <ScraperMaster> gp = SystemCollection.Where(a => AddTrailingWhitespace(a.GDBTitle.ToLower()).Contains(AddTrailingWhitespace(SearchString))).ToList();

            if (gp == null)
            {
                // nothing found - proceed to other searches
            }
            else
            {
                if (gp.Count > 1)
                {
                    // multiples found - wipe out search collection and create a new one
                    SearchCollection = new List <ScraperMaster>();
                    SearchCollection.AddRange(gp);
                }
                else
                {
                    // only 1 entry found - return
                    SearchCollection = new List <ScraperMaster>();
                    SearchCollection.AddRange(gp);
                    WorkingSearchCollection = new List <ScraperMaster>();
                    WorkingSearchCollection.AddRange(gp);
                    return;
                }
            }


            // we should now have a pretty wide SearchCollection - count how many matched words
            Dictionary <ScraperMaster, int> totals = new Dictionary <ScraperMaster, int>();

            foreach (ScraperMaster g in SearchCollection)
            {
                int matchingWords = 0;
                // get total substrings in search string
                string[] arr          = BuildArray(searchStr);
                int      searchLength = arr.Length;

                // get total substrings in result string
                string[] rArr         = BuildArray(g.GDBTitle);
                int      resultLength = rArr.Length;

                // find matching words
                foreach (string s in arr)
                {
                    int i = 0;
                    while (i < resultLength)
                    {
                        if (StripSymbols(s) == StripSymbols(rArr[i]))
                        {
                            matchingWords++;
                            break;
                        }
                        i++;
                    }
                }
                // add to dictionary with count
                totals.Add(g, matchingWords);
            }

            // order dictionary
            totals.OrderByDescending(a => a.Value);
            // get max value
            var maxValueRecord = totals.OrderByDescending(v => v.Value).FirstOrDefault();
            int maxValue       = maxValueRecord.Value;
            // select all records that have the max value
            List <ScraperMaster> matches = (from a in totals
                                            where a.Value == maxValue
                                            select a.Key).ToList();

            if (matches.Count == 1)
            {
                // single match found
                WorkingSearchCollection = new List <ScraperMaster>();
                WorkingSearchCollection.AddRange(matches);
                return;
            }

            // run levenshetein fuzzy search on SearchCollection - 10 iterations
            int levCount = 0;

            while (levCount <= 10)
            {
                levCount++;
                double it = Convert.ToDouble(levCount) / 10;
                List <ScraperMaster> found = FuzzySearch.FSearch(searchStr, SearchCollection, it);
                //WorkingSearchCollection = new List<GDBPlatformGame>();

                if (found.Count == 1)
                {
                    // one entry returned
                    WorkingSearchCollection = new List <ScraperMaster>();
                    WorkingSearchCollection.AddRange(found);
                    return;
                }
                if (found.Count > 1)
                {
                    // multiple entries returned
                }

                if (found.Count == 0)
                {
                }

                //WorkingSearchCollection.AddRange(found);
                //return;
            }

            //return;

            // check how many matches we have
            if (SearchCollection.Count == 1)
            {
                WorkingSearchCollection = new List <ScraperMaster>();
                WorkingSearchCollection.Add(SearchCollection.Single());
                return;
            }

            if (SearchCollection.Count > 1)
            {
                // add to working search collection
                WorkingSearchCollection.AddRange(SearchCollection.ToList());
                // clear SearchCollection
                //SearchCollection = new List<GDBPlatformGame>();

                // try the first word
                string[] arr     = BuildArray(searchStr);
                int      i       = 0;
                string   builder = "";
                while (i < arr.Length)
                {
                    if (i == 0)
                    {
                        builder += arr[i];
                    }
                    else
                    {
                        builder += " " + arr[i];
                    }
                    string b = StripSymbols(builder).ToLower();


                    var s = SystemCollection.Where(a => a.GDBTitle.ToLower().Contains(b)).ToList();
                    if (s.Count == 1)
                    {
                        // one entry returned - this is the one to keep
                        WorkingSearchCollection = new List <ScraperMaster>();
                        //SearchCollection = new List<GDBPlatformGame>();
                        WorkingSearchCollection.Add(s.Single());
                        return;
                    }
                    if (s.Count > 1)
                    {
                        // still multiple entries returned - single match not found - continue
                        WorkingSearchCollection = new List <ScraperMaster>();
                        WorkingSearchCollection.AddRange(s);
                        //SearchCollection = new List<GDBPlatformGame>();
                    }
                    if (s.Count == 0)
                    {
                        // no matches returned - this should never happen
                    }
                    i++;
                }

                // multiple matches found - run search again from the beginning but remove FIRST substring
                //StartFuzzySearch(searchStr, 100);
                return;

                /*
                 * string[] arr = BuildArray(searchStr);
                 * StartFuzzySearch(BuildSearchString(arr.Take(0).ToArray()), 1);
                 * // multiple matches found - run search again from the beginning but remove last substring
                 * StartFuzzySearch(BuildSearchString(arr.Take(arr.Count() - 1).ToArray()), 1);
                 */
            }
            if (SearchCollection.Count == 0)
            {
                // no matches found - run this method again with the next iterator (slightly weaker tolerance)
                StartFuzzySearch(searchStr, 0);
            }
        }