Пример #1
0
        public void Evaluate()
        {
            var a1 = Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng");
            var a2 = Fuzz.Ratio("mysmilarstring", "mysimilarstring");

            var b1 = Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring");

            var c1 = Fuzz.TokenSortRatio("order words out of", "  words out of order");
            var c2 = Fuzz.PartialTokenSortRatio("order words out of", "  words out of order");

            var d1 = Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
            var d2 = Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");

            var e1 = Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog");

            var f1 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration");
            var f2 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration");

            var f3 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
            var f4 = Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");

            var g1 = Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
            var g2 = Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);



            var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" });
            var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3));
            var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));
            var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40));
            var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));

            var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get <DefaultRatioScorer>());

            var events = new[]
            {
                new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" },
                new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" },
                new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" },
            };
            var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" };

            var best = Process.ExtractOne(query, events, strings => strings[0]);

            var ratio                    = ScorerCache.Get <DefaultRatioScorer>();
            var partial                  = ScorerCache.Get <PartialRatioScorer>();
            var tokenSet                 = ScorerCache.Get <TokenSetScorer>();
            var partialTokenSet          = ScorerCache.Get <PartialTokenSetScorer>();
            var tokenSort                = ScorerCache.Get <TokenSortScorer>();
            var partialTokenSort         = ScorerCache.Get <PartialTokenSortScorer>();
            var tokenAbbreviation        = ScorerCache.Get <TokenAbbreviationScorer>();
            var partialTokenAbbreviation = ScorerCache.Get <PartialTokenAbbreviationScorer>();
            var weighted                 = ScorerCache.Get <WeightedRatioScorer>();
        }
Пример #2
0
        public void TestWithProcessor()
        {
            var events = new[]
            {
                new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" },
                new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" },
                new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" },
            };
            var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" };

            var best = Process.ExtractOne(query, events, SelectFirst, ScorerCache.Get <WeightedRatioScorer>(), 0);

            Assert.AreEqual(best.Value, events[0]);
        }
Пример #3
0
 public static IEnumerable <T> SearchItemsUsingTokeniseScorer <T>(
     IEnumerable <T> items,
     string?searchString,
     int matchCutOffScore = MatchCutoffScore,
     bool stripStopWords  = false
     ) where T : BaseSearchableItem
 {
     return(SearchItems(
                items,
                searchString,
                matchCutOffScore,
                stripStopWords,
                ScorerCache.Get <PartialTokenSetScorer>()
                ));
 }
Пример #4
0
        public static IEnumerable <RoleProfile> FilterRoleProfiles(
            IEnumerable <RoleProfile> roleProfiles,
            string?searchString,
            int minMatchScore,
            bool stripStopWords
            )
        {
            if (searchString == null)
            {
                return(roleProfiles);
            }

            if (stripStopWords)
            {
                searchString = CleanSearchedWords(searchString);
            }

            var query = new RoleProfile
            {
                RoleProfileName = searchString.ToLower()
            };

            if (stripStopWords)
            {
                var results = Process.ExtractSorted(
                    query,
                    roleProfiles,
                    roleProfile => roleProfile.RoleProfileName.ToLower(),
                    ScorerCache.Get <DefaultRatioScorer>(),
                    minMatchScore
                    );
                return(results.Select(result => result.Value));
            }
            else
            {
                var results = Process.ExtractSorted(
                    query,
                    roleProfiles,
                    roleProfile => roleProfile.RoleProfileName.ToLower(),
                    ScorerCache.Get <PartialRatioScorer>(),
                    minMatchScore
                    );
                return(results.Select(result => result.Value));
            }
        }
Пример #5
0
        /// <summary>
        /// Performs a search across all package handlers with the given query.
        /// </summary>
        public async Task <List <PackageBase> > SearchAsync(string query)
        {
            var packages = new List <PackageBase>();

            foreach (var handler in PackageHandlers.Values)
            {
                List <PackageBase> results;
                try
                {
                    results = await handler.SearchAsync(query);
                } catch { continue; }
                // Filter results already in list
                packages.AddRange(results);
            }

            // Fuzzy search to resort by relevance
            var scorer = ScorerCache.Get <FuzzySharp.SimilarityRatio.Scorer.StrategySensitive.TokenDifferenceScorer>();

            return(Process.ExtractSorted(query, packages.Select(p => p.Title + " - " + p.DeveloperName), scorer: scorer)
                   .Select(r => packages.ElementAt(r.Index)).ToList());
        }
Пример #6
0
        public static IEnumerable <T> SearchItems <T>(
            IEnumerable <T> items,
            string?searchString,
            int matchCutOffScore = MatchCutoffScore,
            bool stripStopWords  = false,
            IRatioScorer?scorer  = null
            ) where T : BaseSearchableItem
        {
            if (searchString == null)
            {
                return(items);
            }

            if (stripStopWords)
            {
                searchString = CleanSearchedWords(searchString);
            }

            var query = Activator.CreateInstance(typeof(T)) as BaseSearchableItem;

            query !.SearchableName = searchString;

            var ratioScorer = scorer ??
                              (stripStopWords
                                  ? ScorerCache.Get <DefaultRatioScorer>()
                                  : ScorerCache.Get <PartialRatioScorer>());

            var results = Process.ExtractAll(
                (T)query,
                items,
                item => string.Join(" ", item.SearchableContent.Where(s => s != null)).ToLower(),
                ratioScorer,
                matchCutOffScore
                );

            return(results.Select(result => result.Value));
        }
Пример #7
0
        public void TestWithScorer()
        {
            var choices = new[]
            {
                "new york mets vs chicago cubs",
                "chicago cubs at new york mets",
                "atlanta braves vs pittsbugh pirates",
                "new york yankees vs boston red sox"
            };

            var choicesDict = new Dictionary <int, string>
            {
                [1] = "new york mets vs chicago cubs",
                [2] = "chicago cubs vs chicago white sox",
                [3] = "philladelphia phillies vs atlanta braves",
                [4] = "braves vs mets"
            };

            // in this hypothetical example we care about ordering, so we use quick ratio
            var query = "new york mets at chicago cubs";

            // first, as an example, the normal way would select the "more
            // 'complete' match of choices[1]"

            var best = Process.ExtractOne(query, choices);

            Assert.AreEqual(best.Value, choices[1]);

            // now, use the custom scorer

            best = Process.ExtractOne(query, choices, null, ScorerCache.Get <DefaultRatioScorer>());
            Assert.AreEqual(best.Value, choices[0]);

            best = Process.ExtractOne(query, choicesDict.Select(k => k.Value));
            Assert.AreEqual(best.Value, choicesDict[1]);
        }
Пример #8
0
 /// <summary>
 /// Find all alphanumeric tokens in the string and sort
 /// those tokens and then take ratio of resulting
 /// joined strings.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int TokenSortRatio(string input1, string input2)
 {
     return(ScorerCache.Get <TokenSortScorer>().Score(input1, input2));
 }
Пример #9
0
 /// <summary>
 /// Inconsistent substrings lead to problems in matching. This ratio
 /// uses a heuristic called "best partial" for when two strings
 /// are of noticeably different lengths.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static int PartialRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <PartialRatioScorer>().Score(input1, input2, preprocessMode));
 }
Пример #10
0
 /// <summary>
 /// Inconsistent substrings lead to problems in matching. This ratio
 /// uses a heuristic called "best partial" for when two strings
 /// are of noticeably different lengths.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int PartialRatio(string input1, string input2)
 {
     return(ScorerCache.Get <PartialRatioScorer>().Score(input1, input2));
 }
Пример #11
0
 /// <summary>
 /// Calculates a weighted ratio between the different algorithms for best results
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int WeightedRatio(string input1, string input2)
 {
     return(ScorerCache.Get <WeightedRatioScorer>().Score(input1, input2));
 }
Пример #12
0
 /// <summary>
 /// Similarity ratio that attempts to determine whether one strings tokens are an abbreviation
 /// of the other strings tokens. One string must have all its characters in order in the other string
 /// to even be considered.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static double TokenAbbreviationRatio(string input1, string input2)
 {
     return(ScorerCache.Get <TokenAbbreviationScorer>().Score(input1, input2));
 }
Пример #13
0
        public void TestWithCutoff2()
        {
            var choices = new[]
            {
                "new york mets vs chicago cubs",
                "chicago cubs at new york mets",
                "atlanta braves vs pittsbugh pirates",
                "new york yankees vs boston red sox"
            };

            var query = "new york mets vs chicago cubs";
            // Only find 100-score cases
            var res = Process.ExtractSorted(query, choices, StringPreprocessorFactory.Default, ScorerCache.Get <WeightedRatioScorer>(), 100);

            Assert.IsTrue(res.Any());
            var bestMatch = res.First();

            Assert.IsTrue(bestMatch.Value == choices[0]);
        }
Пример #14
0
 /// <summary>
 /// Splits the strings into tokens and computes the ratio on those tokens (not the individual chars,
 /// but the strings themselves)
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static double TokenDifferenceRatio(string input1, string input2)
 {
     return(ScorerCache.Get <TokenDifferenceScorer>().Score(input1, input2));
 }
Пример #15
0
 /// <summary>
 /// Calculates a Levenshtein simple ratio between the strings.
 /// This indicates a measure of similarity
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int Ratio(string input1, string input2)
 {
     return(ScorerCache.Get <DefaultRatioScorer>().Score(input1, input2));
 }
Пример #16
0
 public void SetUp()
 {
     _scorer        = ScorerCache.Get <TokenSetScorer>();
     _partialScorer = ScorerCache.Get <PartialTokenSetScorer>();
 }
Пример #17
0
 /// <summary>
 /// Splits the strings into tokens and computes the ratio on those tokens (not the individual chars,
 /// but the strings themselves)
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static double PartialTokenDifferenceRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <PartialTokenDifferenceScorer>().Score(input1, input2, preprocessMode));
 }
Пример #18
0
 /// <summary>
 /// Calculates a weighted ratio between the different algorithms for best results
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static double WeightedRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <WeightedRatioScorer>().Score(input1, input2, preprocessMode));
 }
Пример #19
0
 /// <summary>
 /// Similarity ratio that attempts to determine whether one strings tokens are an abbreviation
 /// of the other strings tokens. One string must have all its characters in order in the other string
 /// to even be considered.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static double PartialTokenAbbreviationRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <PartialTokenAbbreviationScorer>().Score(input1, input2, preprocessMode));
 }
Пример #20
0
 /// <summary>
 /// Find all alphanumeric tokens in the string and sort
 /// those tokens and then take ratio of resulting
 /// joined strings.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static int TokenSortRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <TokenSortScorer>().Score(input1, input2, preprocessMode));
 }
Пример #21
0
 /// <summary>
 /// Splits longer string into tokens and takes the initialism and compares it to the shorter
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int TokenInitialismRatio(string input1, string input2)
 {
     return(ScorerCache.Get <TokenInitialismScorer>().Score(input1, input2));
 }
Пример #22
0
 /// <summary>
 /// Splits the strings into tokens and computes intersections and remainders
 /// between the tokens of the two strings.A comparison string is then
 /// built up and is compared using the simple ratio algorithm.
 /// Useful for strings where words appear redundantly.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static double PartialTokenSetRatio(string input1, string input2)
 {
     return(ScorerCache.Get <PartialTokenSetScorer>().Score(input1, input2));
 }
Пример #23
0
 /// <summary>
 /// Splits longer string into tokens and takes the initialism and compares it to the shorter
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static int PartialTokenInitialismRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <PartialTokenInitialismScorer>().Score(input1, input2));
 }
Пример #24
0
        public void TestWithCutoff()
        {
            var choices = new[]
            {
                "new york mets vs chicago cubs",
                "chicago cubs at new york mets",
                "atlanta braves vs pittsbugh pirates",
                "new york yankees vs boston red sox"
            };

            var query = "los angeles dodgers vs san francisco giants";

            // in this situation, this is an event that does not exist in the list
            // we don't want to randomly match to something, so we use a reasonable cutoff

            var best = Process.ExtractSorted(query, choices, StringPreprocessorFactory.Default, ScorerCache.Get <WeightedRatioScorer>(), 50);

            Assert.IsTrue(!best.Any());
            // .assertIsNone(best) // unittest.TestCase did not have assertIsNone until Python 2.7

            // however if we had no cutoff, something would get returned

            // best = Process.ExtractOne(query, choices)
            // .assertIsNotNone(best)
        }
Пример #25
0
 /// <summary>
 /// Similarity ratio that attempts to determine whether one strings tokens are an abbreviation
 /// of the other strings tokens. One string must have all its characters in order in the other string
 /// to even be considered.
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int PartialTokenAbbreviationRatio(string input1, string input2)
 {
     return(ScorerCache.Get <PartialTokenAbbreviationScorer>().Score(input1, input2));
 }
Пример #26
0
 /// <summary>
 /// Splits the strings into tokens and computes the ratio on those tokens (not the individual chars,
 /// but the strings themselves)
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <returns></returns>
 public static int PartialTokenDifferenceRatio(string input1, string input2)
 {
     return(ScorerCache.Get <PartialTokenDifferenceScorer>().Score(input1, input2));
 }
Пример #27
0
 /// <summary>
 /// Splits longer string into tokens and takes the initialism and compares it to the shorter
 /// </summary>
 /// <param name="input1"></param>
 /// <param name="input2"></param>
 /// <param name="preprocessMode"></param>
 /// <returns></returns>
 public static double TokenInitialismRatio(string input1, string input2, PreprocessMode preprocessMode)
 {
     return(ScorerCache.Get <TokenInitialismScorer>().Score(input1, input2, preprocessMode));
 }