Example #1
0
        public void Evaluate()
        {
            var a1 = Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng");
            var a2 = Fuzz.Ratio("mysmilarstring", "mysimilarstring");

            var b1 = Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring");

            var c1 = Fuzz.TokenSortRatio("order words out of", "  words out of order");
            var c2 = Fuzz.PartialTokenSortRatio("order words out of", "  words out of order");

            var d1 = Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");
            var d2 = Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear");

            var e1 = Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog");

            var f1 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration");
            var f2 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration");

            var f3 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");
            var f4 = Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899");

            var g1 = Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);
            var g2 = Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full);



            var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" });
            var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3));
            var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));
            var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40));
            var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }));

            var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get <DefaultRatioScorer>());

            var events = new[]
            {
                new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" },
                new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" },
                new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" },
            };
            var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" };

            var best = Process.ExtractOne(query, events, strings => strings[0]);

            var ratio                    = ScorerCache.Get <DefaultRatioScorer>();
            var partial                  = ScorerCache.Get <PartialRatioScorer>();
            var tokenSet                 = ScorerCache.Get <TokenSetScorer>();
            var partialTokenSet          = ScorerCache.Get <PartialTokenSetScorer>();
            var tokenSort                = ScorerCache.Get <TokenSortScorer>();
            var partialTokenSort         = ScorerCache.Get <PartialTokenSortScorer>();
            var tokenAbbreviation        = ScorerCache.Get <TokenAbbreviationScorer>();
            var partialTokenAbbreviation = ScorerCache.Get <PartialTokenAbbreviationScorer>();
            var weighted                 = ScorerCache.Get <WeightedRatioScorer>();
        }
Example #2
0
        public void TestWRatioUnicodeString()
        {
            _s1 = "\u00C1";
            _s2 = "ABCD";
            var score = Fuzz.WeightedRatio(_s1, _s2);

            Assert.AreEqual(0, score);

            // Cyrillic.
            _s1   = "\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433";
            _s2   = "\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442";
            score = Fuzz.WeightedRatio(_s1, _s2);
            Assert.AreNotEqual(0, score);

            // Chinese.
            _s1   = "\u6211\u4e86\u89e3\u6570\u5b66";
            _s2   = "\u6211\u5b66\u6570\u5b66";
            score = Fuzz.WeightedRatio(_s1, _s2);
            Assert.AreNotEqual(0, score);
        }
Example #3
0
 public void TestWeightedRatioMisorderedMatch()
 {
     Assert.AreEqual(Fuzz.WeightedRatio(_s4, _s5), 95);
 }
Example #4
0
 public void TestWeightedRatioPartialMatch()
 {
     Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s3), 90);
 }
Example #5
0
 public void TestWeightedRatioCaseInsensitive()
 {
     Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s2, PreprocessMode.Full), 100);
 }
Example #6
0
 public void TestWeightedRatioEqual()
 {
     Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s1A), 100);
 }
Example #7
0
        public List <FullAddressFields> getBestAddresses(string user_county, string[] user_search_normalized)
        {
            TextFieldParser parser = new TextFieldParser(addrPath);

            parser.TextFieldType = FieldType.Delimited;
            parser.SetDelimiters(",");

            //Dictionary<int, string> header = new Dictionary<int, string>();
            string[] fields     = parser.ReadFields();
            int      fields_len = fields.Length;

            int county_index              = -1;
            int locality_index            = -1;
            int secLocality_index         = -1;
            int thorofare_index           = -1;
            int address_index             = -1;
            int building_id_index         = -1;
            int address_reference_index   = -1;
            int building_number_index     = -1;
            int building_group_name_index = -1;
            int building_name_index       = -1;
            int sub_building_name_index   = -1;
            int department_index          = -1;
            int organisation_name_index   = -1;

            for (int col_index = 0; col_index < fields_len; col_index++)
            {
                string value = fields.GetValueAt <string>(col_index);
                if (string.Equals(county, value, StringComparison.OrdinalIgnoreCase))
                {
                    county_index = col_index;
                }

                else if (string.Equals(locality, value, StringComparison.OrdinalIgnoreCase))
                {
                    locality_index = col_index;
                }

                else if (string.Equals(secLocality, value, StringComparison.OrdinalIgnoreCase))
                {
                    secLocality_index = col_index;
                }

                else if (string.Equals(thorofare, value, StringComparison.OrdinalIgnoreCase))
                {
                    thorofare_index = col_index;
                }

                else if (string.Equals(address, value, StringComparison.OrdinalIgnoreCase))
                {
                    address_index = col_index;
                }

                else if (string.Equals(building, value, StringComparison.OrdinalIgnoreCase))
                {
                    building_id_index = col_index;
                }

                else if (string.Equals(address_reference, value, StringComparison.OrdinalIgnoreCase))
                {
                    address_reference_index = col_index;
                }

                else if (string.Equals(building_number, value, StringComparison.OrdinalIgnoreCase))
                {
                    building_number_index = col_index;
                }

                else if (string.Equals(building_group_name, value, StringComparison.OrdinalIgnoreCase))
                {
                    building_group_name_index = col_index;
                }

                else if (string.Equals(building_name, value, StringComparison.OrdinalIgnoreCase))
                {
                    building_name_index = col_index;
                }

                else if (string.Equals(sub_building_name, value, StringComparison.OrdinalIgnoreCase))
                {
                    sub_building_name_index = col_index;
                }

                else if (string.Equals(department, value, StringComparison.OrdinalIgnoreCase))
                {
                    department_index = col_index;
                }

                else if (string.Equals(organisation_name, value, StringComparison.OrdinalIgnoreCase))
                {
                    organisation_name_index = col_index;
                }

                //header.Add(col_id, fields.GetValueAt<string>(col_id));
            }

            System.Collections.Generic.HashSet <FullAddressFields> top_addresses = new System.Collections.Generic.HashSet <FullAddressFields>();
            List <int> address_scores = new List <int>();

            while (!parser.EndOfData)
            {
                fields = parser.ReadFields();

                // build json using full address fields
                FullAddressFields fulladdr = new FullAddressFields();
                fulladdr.locality           = fields.GetValueAt <string>(locality_index);
                fulladdr.secondary_locality = fields.GetValueAt <string>(secLocality_index);
                fulladdr.thorofare          = fields.GetValueAt <string>(thorofare_index);
                fulladdr.building_number    = fields.GetValueAt <string>(building_number_index);
                fulladdr.county             = fields.GetValueAt <string>(county_index);
                fulladdr.address            = fields.GetValueAt <string>(address_index);
                fulladdr.building           = fields.GetValueAt <int>(building_id_index);
                fulladdr.address_reference  = fields.GetValueAt <long>(address_reference_index);

                fulladdr.building_group_name = fields.GetValueAt <string>(building_group_name_index);
                fulladdr.building_name       = fields.GetValueAt <string>(building_name_index);
                fulladdr.sub_building_name   = fields.GetValueAt <string>(sub_building_name_index);
                fulladdr.department          = fields.GetValueAt <string>(department_index);
                fulladdr.organisation_name   = fields.GetValueAt <string>(organisation_name_index);

                if (string.Equals(user_county, fulladdr.county, StringComparison.OrdinalIgnoreCase))
                {
                    string   address_split            = fulladdr.address.Replace(fulladdr.address.Split(",").Last(), "");
                    string[] address_split_normalized = ner.normalize(address_split);

                    if (!fulladdr.building_number.IsEmpty())
                    {
                        if (!ner.numbers.Contains <string>(fulladdr.building_number.ToLower().Trim()))
                        {
                            continue;
                        }
                    }
                    if (!fulladdr.sub_building_name.IsEmpty())
                    {
                        string sub_building_number = Regex.Replace(fulladdr.sub_building_name, @"(apartment)|(unit)|(flat)", "", RegexOptions.IgnoreCase).ToLower().Trim();
                        if (!ner.numbers.Contains <string>(sub_building_number))
                        {
                            continue;
                        }
                    }


                    int tokens_match = 0;
                    var sim          = new F23.StringSimilarity.JaroWinkler();
                    var dist         = new F23.StringSimilarity.Damerau();
                    foreach (string user_search_token in user_search_normalized)
                    {
                        foreach (string address_token in address_split_normalized)
                        {
                            //double score = sim.Similarity(user_search_token, address_token);
                            double score  = Fuzz.WeightedRatio(user_search_token, address_token, PreprocessMode.Full);
                            double score2 = dist.Distance(user_search_token, address_token);
                            if (score >= MIN_SCORE * 100 || score2 <= MAX_DIST)
                            //if (score >= MIN_SCORE && score2 <= MAX_DIST)
                            {
                                tokens_match += 1;
                                break;
                            }
                        }
                    }

                    if (user_search_normalized.Length == tokens_match)
                    {
                        top_addresses.Add(fulladdr);
                    }

                    //break;
                }
            }

            List <FullAddressFields> best_addresses = new List <FullAddressFields>();

            foreach (FullAddressFields addr in top_addresses)
            {
                if (!addr.building_number.IsEmpty() || !addr.sub_building_name.IsEmpty())
                {
                    best_addresses.Add(addr);
                }
            }

            if (best_addresses.Count == 0)
            {
                return(top_addresses.ToList <FullAddressFields>());
            }

            return(best_addresses);
        }
Example #8
0
        public async Task <Result <int> > Handle(FuzzyStringComparisonCommand request, CancellationToken cancellationToken)
        {
            var ratio = await Task.FromResult(Fuzz.WeightedRatio(request.TargetString, request.ControlString));

            return(Result.Success(ratio));
        }