예제 #1
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
예제 #2
0
        private static void Main(string[] args)
        {
            /****************************************************/
            /*				BagOfTokensSimilarity				*/
            /****************************************************/

            // the recommend method for complex similarity on more words
            var          bagOfTokens = new BagOfWordsSimilarity();
            const string pattern     = "John Smith";
            const string targetText  = "Mr. John Smith, Jr.";

            // using normalized string and tokenizer returns score 1.0
            var resultingSim = bagOfTokens.GetSimilarity(new Tokenizer(new NormalizedString(pattern)),
                                                         new Tokenizer(new NormalizedString(targetText)));


            /****************************************************/
            /*				Levenshtein							*/
            /****************************************************/
            const string nameCorrect = "martha";
            const string nameError   = "marhta";

            // Levenshtein distance (implements interface IDistance)
            // & similarity (implements interface ISimilarity)
            var lev = new Levenshtein();

            // returns edit distance 2
            var distLev = lev.GetDistance(nameCorrect, nameError);

            // normalized string removes special symbols, diacritics and make case insensitivity
            // returns score 0.67
            var simLev = lev.GetSimilarity(new NormalizedString(nameCorrect), new NormalizedString(nameError));

            /****************************************************/
            /*				Damerau-Levenshtein							*/
            /****************************************************/
            // DamerauLevenshtein implements IDistance and ISimilarity
            var damLev = new DamerauLevenshtein();

            // returns edit distance 1
            var distDamLev = damLev.GetDistance(nameCorrect, nameError);

            // returns score 0.83
            var simDamLev = damLev.GetSimilarity(nameCorrect, nameError);

            /****************************************************/
            /*				Jaro, Jaro-Winler					*/
            /****************************************************/

            // Jaro, Jaro-Winkler implements only ISimilarity
            var nameFirst   = new Token("dwayne");
            var nameSecond  = new Token("duane");
            var jaro        = new Jaro();
            var jaroWinkler = new JaroWinkler();

            jaroWinkler.GetSimilarity(nameFirst, nameSecond);


            /****************************************************/
            /*				Q-grams coefficient					*/
            /****************************************************/
            // q-grams similarity coefficient - Dice, Jaccard, Overlap
            // with different q-grams type
            var diceUnigrams    = new DiceCoefficient <Bigram>();
            var jaccardBigrams  = new JaccardCoefficient <Bigram>();
            var overlapTrigrams = new OverlapCoefficient <Bigram>();

            // returns score 0.5
            var jaccardSim = jaccardBigrams.GetSimilarity(pattern, targetText);

            // returns score 0.67
            var diceSim = diceUnigrams.GetSimilarity(pattern, targetText);

            // returns score 1.0
            var overlapSim = overlapTrigrams.GetSimilarity(pattern, targetText);

            Console.ReadKey();
        }