public void Evaluate() { var a1 = Fuzz.Ratio("mysmilarstring", "myawfullysimilarstirng"); var a2 = Fuzz.Ratio("mysmilarstring", "mysimilarstring"); var b1 = Fuzz.PartialRatio("similar", "somewhresimlrbetweenthisstring"); var c1 = Fuzz.TokenSortRatio("order words out of", " words out of order"); var c2 = Fuzz.PartialTokenSortRatio("order words out of", " words out of order"); var d1 = Fuzz.TokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); var d2 = Fuzz.PartialTokenSetRatio("fuzzy was a bear", "fuzzy fuzzy fuzzy bear"); var e1 = Fuzz.WeightedRatio("The quick brown fox jimps ofver the small lazy dog", "the quick brown fox jumps over the small lazy dog"); var f1 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics and Space Administration"); var f2 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration"); var f3 = Fuzz.TokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); var f4 = Fuzz.PartialTokenInitialismRatio("NASA", "National Aeronautics Space Administration, Kennedy Space Center, Cape Canaveral, Florida 32899"); var g1 = Fuzz.TokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); var g2 = Fuzz.PartialTokenAbbreviationRatio("bl 420", "Baseline section 420", PreprocessMode.Full); var h1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }); var h2 = string.Join(", ", Process.ExtractTop("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, limit: 3)); var h3 = string.Join(", ", Process.ExtractAll("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); var h4 = string.Join(", ", Process.ExtractAll("goolge", new[] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" }, cutoff: 40)); var h5 = string.Join(", ", Process.ExtractSorted("goolge", new [] { "google", "bing", "facebook", "linkedin", "twitter", "googleplus", "bingnews", "plexoogl" })); var i1 = Process.ExtractOne("cowboys", new[] { "Atlanta Falcons", "New York Jets", "New York Giants", "Dallas Cowboys" }, s => s, ScorerCache.Get <DefaultRatioScorer>()); var events = new[] { new[] { "chicago cubs vs new york mets", "CitiField", "2011-05-11", "8pm" }, new[] { "new york yankees vs boston red sox", "Fenway Park", "2011-05-11", "8pm" }, new[] { "atlanta braves vs pittsburgh pirates", "PNC Park", "2011-05-11", "8pm" }, }; var query = new[] { "new york mets vs chicago cubs", "CitiField", "2017-03-19", "8pm" }; var best = Process.ExtractOne(query, events, strings => strings[0]); var ratio = ScorerCache.Get <DefaultRatioScorer>(); var partial = ScorerCache.Get <PartialRatioScorer>(); var tokenSet = ScorerCache.Get <TokenSetScorer>(); var partialTokenSet = ScorerCache.Get <PartialTokenSetScorer>(); var tokenSort = ScorerCache.Get <TokenSortScorer>(); var partialTokenSort = ScorerCache.Get <PartialTokenSortScorer>(); var tokenAbbreviation = ScorerCache.Get <TokenAbbreviationScorer>(); var partialTokenAbbreviation = ScorerCache.Get <PartialTokenAbbreviationScorer>(); var weighted = ScorerCache.Get <WeightedRatioScorer>(); }
public void TestWRatioUnicodeString() { _s1 = "\u00C1"; _s2 = "ABCD"; var score = Fuzz.WeightedRatio(_s1, _s2); Assert.AreEqual(0, score); // Cyrillic. _s1 = "\u043f\u0441\u0438\u0445\u043e\u043b\u043e\u0433"; _s2 = "\u043f\u0441\u0438\u0445\u043e\u0442\u0435\u0440\u0430\u043f\u0435\u0432\u0442"; score = Fuzz.WeightedRatio(_s1, _s2); Assert.AreNotEqual(0, score); // Chinese. _s1 = "\u6211\u4e86\u89e3\u6570\u5b66"; _s2 = "\u6211\u5b66\u6570\u5b66"; score = Fuzz.WeightedRatio(_s1, _s2); Assert.AreNotEqual(0, score); }
public void TestWeightedRatioMisorderedMatch() { Assert.AreEqual(Fuzz.WeightedRatio(_s4, _s5), 95); }
public void TestWeightedRatioPartialMatch() { Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s3), 90); }
public void TestWeightedRatioCaseInsensitive() { Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s2, PreprocessMode.Full), 100); }
public void TestWeightedRatioEqual() { Assert.AreEqual(Fuzz.WeightedRatio(_s1, _s1A), 100); }
public List <FullAddressFields> getBestAddresses(string user_county, string[] user_search_normalized) { TextFieldParser parser = new TextFieldParser(addrPath); parser.TextFieldType = FieldType.Delimited; parser.SetDelimiters(","); //Dictionary<int, string> header = new Dictionary<int, string>(); string[] fields = parser.ReadFields(); int fields_len = fields.Length; int county_index = -1; int locality_index = -1; int secLocality_index = -1; int thorofare_index = -1; int address_index = -1; int building_id_index = -1; int address_reference_index = -1; int building_number_index = -1; int building_group_name_index = -1; int building_name_index = -1; int sub_building_name_index = -1; int department_index = -1; int organisation_name_index = -1; for (int col_index = 0; col_index < fields_len; col_index++) { string value = fields.GetValueAt <string>(col_index); if (string.Equals(county, value, StringComparison.OrdinalIgnoreCase)) { county_index = col_index; } else if (string.Equals(locality, value, StringComparison.OrdinalIgnoreCase)) { locality_index = col_index; } else if (string.Equals(secLocality, value, StringComparison.OrdinalIgnoreCase)) { secLocality_index = col_index; } else if (string.Equals(thorofare, value, StringComparison.OrdinalIgnoreCase)) { thorofare_index = col_index; } else if (string.Equals(address, value, StringComparison.OrdinalIgnoreCase)) { address_index = col_index; } else if (string.Equals(building, value, StringComparison.OrdinalIgnoreCase)) { building_id_index = col_index; } else if (string.Equals(address_reference, value, StringComparison.OrdinalIgnoreCase)) { address_reference_index = col_index; } else if (string.Equals(building_number, value, StringComparison.OrdinalIgnoreCase)) { building_number_index = col_index; } else if (string.Equals(building_group_name, value, StringComparison.OrdinalIgnoreCase)) { building_group_name_index = col_index; } else if (string.Equals(building_name, value, StringComparison.OrdinalIgnoreCase)) { building_name_index = col_index; } else if (string.Equals(sub_building_name, value, StringComparison.OrdinalIgnoreCase)) { sub_building_name_index = col_index; } else if (string.Equals(department, value, StringComparison.OrdinalIgnoreCase)) { department_index = col_index; } else if (string.Equals(organisation_name, value, StringComparison.OrdinalIgnoreCase)) { organisation_name_index = col_index; } //header.Add(col_id, fields.GetValueAt<string>(col_id)); } System.Collections.Generic.HashSet <FullAddressFields> top_addresses = new System.Collections.Generic.HashSet <FullAddressFields>(); List <int> address_scores = new List <int>(); while (!parser.EndOfData) { fields = parser.ReadFields(); // build json using full address fields FullAddressFields fulladdr = new FullAddressFields(); fulladdr.locality = fields.GetValueAt <string>(locality_index); fulladdr.secondary_locality = fields.GetValueAt <string>(secLocality_index); fulladdr.thorofare = fields.GetValueAt <string>(thorofare_index); fulladdr.building_number = fields.GetValueAt <string>(building_number_index); fulladdr.county = fields.GetValueAt <string>(county_index); fulladdr.address = fields.GetValueAt <string>(address_index); fulladdr.building = fields.GetValueAt <int>(building_id_index); fulladdr.address_reference = fields.GetValueAt <long>(address_reference_index); fulladdr.building_group_name = fields.GetValueAt <string>(building_group_name_index); fulladdr.building_name = fields.GetValueAt <string>(building_name_index); fulladdr.sub_building_name = fields.GetValueAt <string>(sub_building_name_index); fulladdr.department = fields.GetValueAt <string>(department_index); fulladdr.organisation_name = fields.GetValueAt <string>(organisation_name_index); if (string.Equals(user_county, fulladdr.county, StringComparison.OrdinalIgnoreCase)) { string address_split = fulladdr.address.Replace(fulladdr.address.Split(",").Last(), ""); string[] address_split_normalized = ner.normalize(address_split); if (!fulladdr.building_number.IsEmpty()) { if (!ner.numbers.Contains <string>(fulladdr.building_number.ToLower().Trim())) { continue; } } if (!fulladdr.sub_building_name.IsEmpty()) { string sub_building_number = Regex.Replace(fulladdr.sub_building_name, @"(apartment)|(unit)|(flat)", "", RegexOptions.IgnoreCase).ToLower().Trim(); if (!ner.numbers.Contains <string>(sub_building_number)) { continue; } } int tokens_match = 0; var sim = new F23.StringSimilarity.JaroWinkler(); var dist = new F23.StringSimilarity.Damerau(); foreach (string user_search_token in user_search_normalized) { foreach (string address_token in address_split_normalized) { //double score = sim.Similarity(user_search_token, address_token); double score = Fuzz.WeightedRatio(user_search_token, address_token, PreprocessMode.Full); double score2 = dist.Distance(user_search_token, address_token); if (score >= MIN_SCORE * 100 || score2 <= MAX_DIST) //if (score >= MIN_SCORE && score2 <= MAX_DIST) { tokens_match += 1; break; } } } if (user_search_normalized.Length == tokens_match) { top_addresses.Add(fulladdr); } //break; } } List <FullAddressFields> best_addresses = new List <FullAddressFields>(); foreach (FullAddressFields addr in top_addresses) { if (!addr.building_number.IsEmpty() || !addr.sub_building_name.IsEmpty()) { best_addresses.Add(addr); } } if (best_addresses.Count == 0) { return(top_addresses.ToList <FullAddressFields>()); } return(best_addresses); }
public async Task <Result <int> > Handle(FuzzyStringComparisonCommand request, CancellationToken cancellationToken) { var ratio = await Task.FromResult(Fuzz.WeightedRatio(request.TargetString, request.ControlString)); return(Result.Success(ratio)); }