public static int GetDistances(string s, string t, FuzzyAlgorithm algorithm = FuzzyAlgorithm.LevenshteinDistance)
        {
            int distance = 100000;

            switch (algorithm)
            {
            case FuzzyAlgorithm.LevenshteinDistance:
                distance = LevenshteinDistance.GetLevenshteinDistance(s, t);
                break;

            case FuzzyAlgorithm.DamerauLevenshteinDistance:
                distance = DamerauLevenshteinDistance.GetDamerauLevenshteinDistance(s, t);
                break;

            case FuzzyAlgorithm.HammingDistance:
                distance = HammingDistance.GetHammingDistance(s, t);
                break;

            default:
                distance = LevenshteinDistance.GetLevenshteinDistance(s, t);
                break;
            }

            return(distance);
        }
Ejemplo n.º 2
0
        public void DamerauLevenshteinTest()
        {
            var first  = "fкул";
            var second = "кит";
            var result = DamerauLevenshteinDistance.GetDistance(first, second);

            Assert.AreEqual(3, result);
        }
Ejemplo n.º 3
0
        public async Task <List <DocumentModel> > ExecuteSearch(IndexModel indexModel, SearchModel searchModel)
        {
            var ids    = new List <Guid>();
            var tokens = await _analyzer.Anal(searchModel.Term);

            var keys = new List <string>();
            var idxs = await DatabaseService.GetIndexes(indexModel, searchModel.Key);

            foreach (var dict in idxs)
            {
                var keys1 = dict.Keys.Where(x => tokens
                                            .Any(y => DamerauLevenshteinDistance
                                                 .GetDistance(x, y) < 2))
                            .ToList();

                foreach (var k in keys1)
                {
                    if (!keys.Contains(k))
                    {
                        keys.Add(k);
                    }
                }
            }

            foreach (var dict in idxs)
            {
                if (dict.Keys.Count(x => keys.Contains(x)) >= keys.Count - 2)
                {
                    foreach (var key in keys)
                    {
                        if (dict.ContainsKey(key))
                        {
                            ids.Add(dict[key]);
                        }
                    }
                }
            }

            var result = new List <DocumentModel>();

            foreach (var id in ids.Distinct())
            {
                result.Add(await DatabaseService.FindById(indexModel, id));
            }
            return(result);
        }
Ejemplo n.º 4
0
        //private StringBuilder msg = new StringBuilder();

        public override float Compare(string str1, string str2)
        {
            float isMainCityFactor = 1f;
            float similarity       = 0.0f;

            // normalize "Wiesbaden-Dotzheim" -> "wiesbaden-dotzheim"
            string city1 = this.Normalize(str1);
            string city2 = this.Normalize(str2);

            //msg.AppendLine("Normalize1:" + str1 + " -> " + city1);
            //msg.AppendLine("Normalize2:" + str2 + " -> " + city2);

            // "Mainz-Bingen/Bingen" -> "Bingen"
            if (city1.Contains("/"))
            {
                city1            = this.GetLeftPart(city1, "/");
                isMainCityFactor = 0.9f;
            }
            else if (city1.Contains("-"))
            {
                // e.g. "Wiesbaden-Dotzheim"
                // e.g. "Mainz-Bingen"
                city1            = this.GetLeftPart(city1, "-");
                isMainCityFactor = 0.9f;
            }

            if (city2.Contains("/"))
            {
                city2            = this.GetLeftPart(city2, "/");
                isMainCityFactor = 0.9f;
            }
            else if (city2.Contains("-"))
            {
                city2            = this.GetLeftPart(city2, "-");
                isMainCityFactor = 0.9f;
            }

            StringFuzzyComparer comparer = new DamerauLevenshteinDistance();

            similarity = comparer.Compare(city1, city2);

            // reduce similarity, 100% cannot be reached, when one city is only part of the other
            similarity = similarity * isMainCityFactor;

            return(similarity);
        }
Ejemplo n.º 5
0
        public override float Compare(string str1, string str2)
        {
            float similarity = 0.0f;

            string name1 = str1;
            string name2 = str2;

            // check if name is shortened like "Müller" -> "M."
            if (name1.EndsWith(".") || name2.EndsWith("."))
            {
                // normalize "M.-Thurgau" -> "m thurgau"
                name1 = this.Normalize(name1);
                name2 = this.Normalize(name2);

                // take length of the shortened name "M"
                int minLength = Math.Min(name1.Length, name2.Length);
                name1 = name1.TrySubstring(minLength);
                name2 = name2.TrySubstring(minLength);

                StringFuzzyComparer comparer = new DamerauLevenshteinDistance();
                similarity = comparer.Compare(name1, name2);

                // reduce similarity, 100% cannot be reached, when one name is shortened
                similarity = similarity * 0.8f;
            }
            else
            {
                // normalize "M.-Thurgau" -> "m thurgau"
                name1 = this.Normalize(name1);
                name2 = this.Normalize(name2);

                StringFuzzyComparer comparer = new DamerauLevenshteinDistance();
                similarity = comparer.Compare(name1, name2);

                // Reduce the score if the first letters don't match
                //if (name1.CharAt(0) != name2.CharAt(0))
                //{
                //    similarity = Math.Min(similarity, MAX_SCORE_FOR_NO_FIRST_LETTER_MATCH);
                //}
            }

            return(similarity);
        }
Ejemplo n.º 6
0
        public override float Compare(string str1, string str2)
        {
            string name1 = this.Normalize(str1);
            string name2 = this.Normalize(str2);

            // check if company is shortened like "International Business Machines" -> "IBM"
            string firstChars1 = this.GetFirstCharsFromWords(name1).Join("");
            string firstChars2 = this.GetFirstCharsFromWords(name2).Join("");

            if (firstChars1 == firstChars2)
            {
                // company name is shortened an equals (e.g. "IBM" == "IBM")
                return(0.9f);
            }

            StringFuzzyComparer comparer = new DamerauLevenshteinDistance();
            float similarityShortened    = comparer.Compare(firstChars1, firstChars2);
            float similarityNormal       = comparer.Compare(name1, name2);

            // return what is better: the shortened version vs. the normal version
            return(Math.Max(similarityShortened, similarityNormal));
        }
Ejemplo n.º 7
0
        private void StringCompareTest(string input, string[] testCases)
        {
            Debug.WriteLine("Dice Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer dice = new DiceCoefficent();
                double diceValue         = dice.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", diceValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Jaccard Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer jaccard = new Jaccard();
                double jaccardValue         = jaccard.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", jaccardValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("ExtendedJaccard Coefficient for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer exjaccard = new ExtendedJaccard();
                double exjaccardValue         = exjaccard.Compare(input, name);
                Debug.WriteLine("\t{0} against {1}", exjaccardValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("DamerauLevenshteinDistance for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer lev = new DamerauLevenshteinDistance();
                var levenStein          = lev.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", levenStein, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("JaroWinkler for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer jw = new JaroWinkler();
                var jwValue            = jw.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", jwValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Monge-Elkan for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer me = new MongeElkan();
                var meValue            = me.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", meValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("NGramDistance(2) for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer ngram2 = new NGramDistance();
                (ngram2 as NGramDistance).NGramLength = 2;
                var ngramValue2 = ngram2.Compare(input, name);

                Debug.WriteLine("\t{0}, against {1}", ngramValue2, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("SmithWaterman for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer sw = new SmithWaterman();
                var swValue            = sw.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", swValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Extended Editex for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer edx = new ExtendedEditex();
                var edxValue            = edx.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", edxValue, name);
            }

            Debug.WriteLine("");
            Debug.WriteLine("Longest Common Subsequence for {0}:", input);
            foreach (var name in testCases)
            {
                StringFuzzyComparer lcs = new LongestCommonSubsequence();
                var lcsValue            = lcs.Compare(input, name);
                Debug.WriteLine("\t{0}, against {1}", lcsValue.ToString("###,###.00000"), name);
            }

            Debug.WriteLine("");
        }
Ejemplo n.º 8
0
            public void ShouldReturnDamerauLevenshteinDistanceOf0()
            {
                var result = DamerauLevenshteinDistance.DamLev(InputArm, InputArm);

                Assert.AreEqual(0, result);
            }
Ejemplo n.º 9
0
            public void ShouldReturnDamerauLevenshteinDistanceOf2()
            {
                var result = DamerauLevenshteinDistance.Calculate(InputRam, InputMom);

                Assert.AreEqual(2, result);
            }
Ejemplo n.º 10
0
            public void ShouldSetDamerauLevenshteinDistanceInput2()
            {
                Algorithm = new DamerauLevenshteinDistance(InputRam, InputArm);

                Assert.AreEqual(InputArm, Algorithm.Input2);
            }
Ejemplo n.º 11
0
            public void ShouldReturnDamerauLevenshteinDistanceInstance()
            {
                Algorithm = new DamerauLevenshteinDistance(InputArm, InputRam);

                Assert.IsNotNull(Algorithm);
            }
Ejemplo n.º 12
0
            public void ShouldReturnDamerauLevenshteinDistanceOfNegative1()
            {
                var result = DamerauLevenshteinDistance.DamLev(InputRam, InputMom, 1);

                Assert.AreEqual(-1, result);
            }
Ejemplo n.º 13
0
            public void ShouldReturnDamerauLevenshteinDistanceOf1()
            {
                var result = DamerauLevenshteinDistance.DamLev(InputRam, InputReam);

                Assert.AreEqual(1, result);
            }
        /// <summary>
        ///
        /// </summary>
        /// <param name="comparisons"></param>
        /// <param name="concFiles"></param>
        /// <returns></returns>
        private List <string> pairWithPk(IEnumerable <TreatmentComparison> comparisons, List <string> files)
        {
            var pairedFiles = Enumerable.Repeat <string>(null, comparisons.Count()).ToList();
            var distance    = new DamerauLevenshteinDistance();

            // Initialize scores array
            var scoresArray = comparisons.Select((c, i) => new { c, i })
                              .ToDictionary(row => row.i, row =>
            {
                var pkFilename = Path.GetFileNameWithoutExtension(row.c.PkFile.Path).ToLower();
                return(files.Select((f, j) => new { f, j })
                       .ToDictionary(col => col.j, col =>
                                     distance.Calculate(
                                         Path.GetFileNameWithoutExtension(col.f).ToLower(), pkFilename)
                                     ));
            });

            // Iteratively extract the best matches
            while (scoresArray.Any() && scoresArray.Count * scoresArray.First().Value.Count > 1)
            {
                // find the current best match
                int currentScore = int.MaxValue, pkId = -1, fId = -1;
                foreach (var row in scoresArray)
                {
                    int rowScore = int.MaxValue, rowfId = -1;
                    foreach (var col in row.Value)
                    {
                        if (col.Value < rowScore)
                        {
                            rowfId   = col.Key;
                            rowScore = col.Value;
                        }
                    }
                    if (rowScore < currentScore)
                    {
                        pkId         = row.Key;
                        fId          = rowfId;
                        currentScore = rowScore;
                    }
                }

                // Save selected match
                pairedFiles[pkId] = files[fId];

                // Clean scores array for next iteration
                scoresArray.Remove(pkId);
                foreach (var row in scoresArray)
                {
                    row.Value.Remove(fId);
                }
            }

            // if one match remaining
            if (scoresArray.Any())
            {
                pairedFiles[scoresArray.First().Key] =
                    files[scoresArray.First().Value.First().Key];
            }

            return(pairedFiles);
        }