Example #1
0
        public bool IsDuplicate()
        {
            int    len1  = unifFirstString.Length;
            int    len2  = unifSecondString.Length;
            double avLen = (len1 + len2) / 2.0;

            int    levenDist            = LevenshteinDistanceExtensions.LevenshteinDistance(unifSecondString, unifFirstString);
            double dicecoef             = DiceCoefficientExtensions.DiceCoefficient(unifFirstString, unifSecondString);
            Tuple <string, double> subs = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(unifFirstString, unifSecondString);
            string metFirst             = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifFirstString);
            string metSecond            = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifSecondString);
            int    metLevDist           = LevenshteinDistanceExtensions.LevenshteinDistance(metFirst, metSecond);

            if (levenDist / avLen < 0.2 && dicecoef > 0.8)
            {
                return(true);
            }
            else if (dicecoef > 0.9)
            {
                return(true);
            }
            else if (subs.Item2 > 0.75)
            {
                return(true);
            }
            else if (metLevDist == 0)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Example #2
0
        public static string Match(string test, IEnumerable <string> set)
        {
            var closest      = set.First();
            var closestValue = int.MaxValue;

            foreach (var item in set)
            {
                var cleanedItem = Regex.Replace(item, @"\([^)]+\)", "").Trim();

                var value = LevenshteinDistanceExtensions.LevenshteinDistance(cleanedItem, test);
                if (value < closestValue)
                {
                    closest      = item;
                    closestValue = value;
                }
            }

            return(closest);
        }
Example #3
0
        public static double Compare(this string input, string comparedTo, FuzzyLogicType fuzzyLogicType)
        {
            double logicResult = 0;

            try
            {
                switch (fuzzyLogicType)
                {
                case FuzzyLogicType.DiceCoefficient:

                    logicResult = DiceCoefficientExtensions.DiceCoefficient(input, comparedTo);
                    break;

                case FuzzyLogicType.LevenshteinDistance:

                    logicResult = LevenshteinDistanceExtensions.LevenshteinDistance(input, comparedTo, true);
                    break;

                case FuzzyLogicType.LongestCommonSubsequence:

                    var lcResult = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(input, comparedTo, true);

                    logicResult = lcResult.Item2;

                    break;

                case FuzzyLogicType.SimpleCompare:

                    logicResult = SimpleCompare.Compare(input, comparedTo);
                    break;

                default:
                    break;
                }
            }
            catch (Exception e)
            {
                return(logicResult);
            }

            return(logicResult);
        }
Example #4
0
        public static IEnumerable <Opinion> RemoveWeakNameMatches(IEnumerable <Opinion> opinions, string expectedName)
        {
            Dictionary <string, int> ScoreCache = new Dictionary <string, int>();

            foreach (var name in opinions.Select(o => o.Expert))
            {
                if (!ScoreCache.ContainsKey(name))
                {
                    ScoreCache[name] = LevenshteinDistanceExtensions.LevenshteinDistance(name, expectedName, false);
                }
            }

            var min = ScoreCache.Values.Min();

            var bestMatch = ScoreCache.First(s => s.Value == min).Key;

            var opinionList = opinions.Where(o => o.Expert.Equals(bestMatch));

            return(opinionList);
        }