Exemplo n.º 1
0
        public bool IsDuplicate()
        {
            int    len1  = unifFirstString.Length;
            int    len2  = unifSecondString.Length;
            double avLen = (len1 + len2) / 2.0;

            int    levenDist            = LevenshteinDistanceExtensions.LevenshteinDistance(unifSecondString, unifFirstString);
            double dicecoef             = DiceCoefficientExtensions.DiceCoefficient(unifFirstString, unifSecondString);
            Tuple <string, double> subs = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(unifFirstString, unifSecondString);
            string metFirst             = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifFirstString);
            string metSecond            = DoubleMetaphoneExtensions.ToDoubleMetaphone(unifSecondString);
            int    metLevDist           = LevenshteinDistanceExtensions.LevenshteinDistance(metFirst, metSecond);

            if (levenDist / avLen < 0.2 && dicecoef > 0.8)
            {
                return(true);
            }
            else if (dicecoef > 0.9)
            {
                return(true);
            }
            else if (subs.Item2 > 0.75)
            {
                return(true);
            }
            else if (metLevDist == 0)
            {
                return(true);
            }
            else
            {
                return(false);
            }
        }
Exemplo n.º 2
0
        //---------------------------------------------------------------------
        /// <summary>
        /// Fuzzy searches a dictionary<string,string> using LINQ.
        /// </summary>
        /// <param name="word">
        /// The word to find.
        /// </param>
        /// <param name="wordList">
        /// A dictionary of words to be searched.
        /// </param>
        /// <param name="fuzzyness">
        /// Ration of the fuzzyness. A value of 0.8 means that the
        /// difference between the word to find and the found words
        /// is less than 20%.
        /// </param>
        /// <returns>
        /// The dictionary with the found words.
        /// </returns>
        /// <example>
        ///
        /// </example>
        public static Dictionary <string, string> Search_v3(string word, Dictionary <string, string> wordList, double fuzzyness, string algorithm = "Levenshtein Distance")
        {
            Dictionary <string, string> foundWords;

            if (algorithm.Equals("Levenshtein Distance"))
            {
                foundWords =
                    (
                        from s in wordList
                        let levenshteinDistance = LevenshteinDistanceExtensions.LevenshteinDistance(word, s.Value)
                                                  let length = Math.Max(s.Value.Length, word.Length)
                                                               let score = 1.0 - (double)levenshteinDistance / length
                                                                           where score > fuzzyness
                                                                           select s
                    ).ToDictionary(t => t.Key, t => t.Value);
            }
            else if (algorithm.Equals("Dice Coefficient"))
            {
                foundWords =
                    (
                        from s in wordList
                        let score = DiceCoefficientExtensions.DiceCoefficient(word, s.Value)
                                    where score > fuzzyness
                                    select s
                    ).ToDictionary(t => t.Key, t => t.Value);
            }
            else if (algorithm.Equals("Longest Common Subsequence"))
            {
                foundWords =
                    (
                        from s in wordList
                        let score = LongestCommonSubsequenceExtensions.LongestCommonSubsequence(word, s.Value)
                                    where score.Item2 > fuzzyness
                                    select s
                    ).ToDictionary(t => t.Key, t => t.Value);
            }
            else if (algorithm.Equals("Double Metaphone"))
            {
                foundWords =
                    (
                        from s in wordList
                        let score = DoubleMetaphoneExtensions.DoubleMetaphoneCoefficient(word, s.Value)
                                    where score > fuzzyness
                                    select s
                    ).ToDictionary(t => t.Key, t => t.Value);
            }
            else
            {
                foundWords =
                    (
                        from s in wordList
                        where word.FuzzyEquals(s.Value, fuzzyness)
                        select s
                    ).ToDictionary(t => t.Key, t => t.Value);
            }

            return(foundWords);
        }