/// <summary> /// Perform a Fuzzy Search of the submitted team against a list of team names to find the best match. /// </summary> /// <param name="submittedTeam"></param> /// <returns></returns> public static string DoSearch(string submittedTeam) { double ratOberDist = 999 /*, jaccardDist = 999*/; //string overlapCoeff = "", lcSubseqDist = "", lcSubstrDist = "", levDist = "", normLevDist = ""; //string hammDist = "", jaroWinkDist = "", sorensenDist = "", tanimotoDist = ""; // Read list of team names from CSV generated from the BigQuery Betfair dataset List <string> apiTeams = File.ReadLines(HttpContext.Current.Server.MapPath("soccer-teams-top500.txt")).ToList(); // Check if the submitted team exactly matches any of the team names in the list and return if match found if (apiTeams.Contains(submittedTeam)) { return(submittedTeam); } // If exact match not found, then create a KeyValuePair List var approxMatches = new List <KeyValuePair <string, double> >(); int approxMatchesFound = 0; // Set counter to 0 // Loop through each team in the apiTeams List foreach (var apiTeam in apiTeams) { // Check if the IsApproxEqual method returns true if (FuzzyMatching.IsApproxEqual(submittedTeam, apiTeam)) { // Get the Ratcliff Obershelp Similarity ratOberDist = FuzzyMatching.GetRatOberDistance(submittedTeam, apiTeam); // After trial and error the above was found to be the most suitable distance measure to use // //overlapCoeff = FuzzyMatching.GetOverlapCoefficient(submittedTeam, apiTeam).ToString("0.00"); //lcSubseqDist = FuzzyMatching.GetLCSubsequence(submittedTeam, apiTeam).ToString(); //lcSubstrDist = FuzzyMatching.GetLCSubsring(submittedTeam, apiTeam).ToString(); //jaccardDist = FuzzyMatching.GetJaccardDistance(submittedTeam, apiTeam); //levDist = FuzzyMatching.GetLevDistance(submittedTeam, apiTeam).ToString(); //normLevDist = FuzzyMatching.GetNormLevDistance(submittedTeam, apiTeam).ToString("0.00"); //hammDist = FuzzyMatching.GetHammingDistance(submittedTeam, apiTeam).ToString(); //jaroWinkDist = FuzzyMatching.GetJaroWinklerDistance(submittedTeam, apiTeam).ToString("0.00"); //sorensenDist = FuzzyMatching.GetSorensonDistance(submittedTeam, apiTeam).ToString("0.00"); //tanimotoDist = FuzzyMatching.GetTanimotoDistance(submittedTeam, apiTeam).ToString("0.00"); approxMatchesFound++; // Increment counter // Store the matched team and it's Ratcliff Obershelp Similarity measure to the submitted team approxMatches.Add(new KeyValuePair <string, double>(apiTeam, ratOberDist)); } } // If only one approximate match is found then return it. if (approxMatchesFound == 1) { return(approxMatches[0].Key); } // If more than one approximate match is found then return the one with the highest Ratcliff Obershelp Similarity else if (approxMatchesFound > 1) { var max = default(KeyValuePair <string, double>); foreach (var match in approxMatches) { if (match.Value > max.Value) { max = match; } } return(max.Key); } else { return("NO MATCH"); } }
protected void Button1_Click(object sender, EventArgs e) { string submittedTeam = SourceTextBox.Text; MatchResult.Text = FuzzyMatching.DoSearch(submittedTeam); /* * bool MatchFound = false; * * string approxMatch = ""; * string overlapCoeff = ""; * string lcSubseqDist = ""; * string lcSubstrDist = ""; * string levDist = ""; * string normLevDist = ""; * string hammDist = ""; * string jaroWinkDist = ""; * double jaccardDist = 999; * double ratOberDist = 999; * string sorensenDist = ""; * string tanimotoDist = ""; * * List<string> apiTeams = System.IO.File.ReadLines(Server.MapPath("soccer-teams-top500.txt")).ToList(); * foreach (var apiTeam in apiTeams) * { * MatchResult.Text = ""; * ResultLabel.Text = ""; * * if (apiTeam == submittedTeam) * { * MatchResult.Text = MatchResult.Text + submittedTeam + " : " + apiTeam + " = Match"; * MatchFound = true; * break; * } * } * if (!MatchFound) { * var approxMatches = new List<KeyValuePair<string, double>>(); * int approxMatchesFound = 0; * * foreach (var apiTeam in apiTeams) * { * MatchResult.Text = ""; * ResultLabel.Text = ""; * * if (FuzzyMatching.IsApproxEqual(submittedTeam, apiTeam)) * { * // TODO: find best match - store all matches in list * overlapCoeff = FuzzyMatching.GetOverlapCoefficient(submittedTeam, apiTeam).ToString("0.00"); * lcSubseqDist = FuzzyMatching.GetLCSubsequence(submittedTeam, apiTeam).ToString(); * lcSubstrDist = FuzzyMatching.GetLCSubsring(submittedTeam, apiTeam).ToString(); * * levDist = FuzzyMatching.GetLevDistance(submittedTeam, apiTeam).ToString(); * normLevDist = FuzzyMatching.GetNormLevDistance(submittedTeam, apiTeam).ToString("0.00"); * hammDist = FuzzyMatching.GetHammingDistance(submittedTeam, apiTeam).ToString(); * jaroWinkDist = FuzzyMatching.GetJaroWinklerDistance(submittedTeam, apiTeam).ToString("0.00"); * jaccardDist = FuzzyMatching.GetJaccardDistance(submittedTeam, apiTeam); * ratOberDist = FuzzyMatching.GetRatOberDistance(submittedTeam, apiTeam); * sorensenDist = FuzzyMatching.GetSorensonDistance(submittedTeam, apiTeam).ToString("0.00"); * tanimotoDist = FuzzyMatching.GetTanimotoDistance(submittedTeam, apiTeam).ToString("0.00"); * * approxMatchesFound++; * approxMatches.Add(new KeyValuePair<string, double>(apiTeam, ratOberDist)); * //MatchResult.Text = approxMatches.ElementAt(approxMatchesFound-1).ToString(); * * //approxMatch = "Is approximate match."; * //MatchResult.Text = MatchResult.Text + submittedTeam + " : " + apiTeam + " = " + approxMatch + "<br />"; * * //break; * } * else if (approxMatchesFound == 0) * { * approxMatch = "Not approximate match."; * MatchResult.Text = submittedTeam + " : " + apiTeam + " = " + approxMatch + "<br />"; * } * } * * var max = default(KeyValuePair<string, double>); * foreach (var match in approxMatches) * { * if (match.Value > max.Value) * max = match; * } * MatchResult.Text = MatchResult.Text + "<br />" + max + " : " + approxMatchesFound; * * ResultLabel.Text = * "<br />Overlap coeffecient [1]: " + overlapCoeff + "<br />" + * "<br />Longest common subsequence: " + lcSubseqDist + "<br />" + * "<br />Longest common substring: " + lcSubstrDist + "<br />" + * "<br /><br />Levenshtein distance [0]: " + levDist + "<br />" + * "<br />Normalised Levenshtein distance [0]: " + normLevDist + "<br />" + * "<br />Hamming distance [0]: " + hammDist + "<br />" + * "<br />Jaro-Winkler distance [1]: " + jaroWinkDist + "<br />" + * "<br />Jaccard distance [0]: " + jaccardDist + "<br />" + * "<br />Ratcliff-Obershelp Similarity [1]: " + ratOberDist + "<br />" + * "<br />Sorenson-Dice distance: [0]" + sorensenDist + "<br />" + * "<br />Tanimoto coeffecient: [1]" + tanimotoDist + "<br />"; * * }*/ }