public bool IsDuplicate() { int len1 = unifFirstString.Length; int len2 = unifSecondString.Length; double avLen = (len1 + len2) / 2.0; int levenDist = LevenshteinDistanceExtensions.LevenshteinDistance(unifSecondString, unifFirstString); double dicecoef = DiceCoefficientExtensions.DiceCoefficient(unifFirstString, unifSecondString); Tuple <string, double> subs = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(unifFirstString, unifSecondString); string metFirst = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifFirstString); string metSecond = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifSecondString); int metLevDist = LevenshteinDistanceExtensions.LevenshteinDistance(metFirst, metSecond); if (levenDist / avLen < 0.2 && dicecoef > 0.8) { return(true); } else if (dicecoef > 0.9) { return(true); } else if (subs.Item2 > 0.75) { return(true); } else if (metLevDist == 0) { return(true); } else { return(false); } }
public static string Match(string test, IEnumerable <string> set) { var closest = set.First(); var closestValue = int.MaxValue; foreach (var item in set) { var cleanedItem = Regex.Replace(item, @"\([^)]+\)", "").Trim(); var value = LevenshteinDistanceExtensions.LevenshteinDistance(cleanedItem, test); if (value < closestValue) { closest = item; closestValue = value; } } return(closest); }
public static double Compare(this string input, string comparedTo, FuzzyLogicType fuzzyLogicType) { double logicResult = 0; try { switch (fuzzyLogicType) { case FuzzyLogicType.DiceCoefficient: logicResult = DiceCoefficientExtensions.DiceCoefficient(input, comparedTo); break; case FuzzyLogicType.LevenshteinDistance: logicResult = LevenshteinDistanceExtensions.LevenshteinDistance(input, comparedTo, true); break; case FuzzyLogicType.LongestCommonSubsequence: var lcResult = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(input, comparedTo, true); logicResult = lcResult.Item2; break; case FuzzyLogicType.SimpleCompare: logicResult = SimpleCompare.Compare(input, comparedTo); break; default: break; } } catch (Exception e) { return(logicResult); } return(logicResult); }
public static IEnumerable <Opinion> RemoveWeakNameMatches(IEnumerable <Opinion> opinions, string expectedName) { Dictionary <string, int> ScoreCache = new Dictionary <string, int>(); foreach (var name in opinions.Select(o => o.Expert)) { if (!ScoreCache.ContainsKey(name)) { ScoreCache[name] = LevenshteinDistanceExtensions.LevenshteinDistance(name, expectedName, false); } } var min = ScoreCache.Values.Min(); var bestMatch = ScoreCache.First(s => s.Value == min).Key; var opinionList = opinions.Where(o => o.Expert.Equals(bestMatch)); return(opinionList); }