コード例 #1
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
コード例 #2
0
        public double GetSimilarity(string str1, string str2, string type)
        {
            IStringMetric stringMetric;

            switch (type)
            {
            case AlgorithmTypes.BlockDistance:
                stringMetric = new BlockDistance();
                break;

            case AlgorithmTypes.ChapmanLengthDeviation:
                stringMetric = new ChapmanLengthDeviation();
                break;

            case AlgorithmTypes.ChapmanMeanLength:
                stringMetric = new ChapmanMeanLength();
                break;

            case AlgorithmTypes.CosineSimilarity:
                stringMetric = new CosineSimilarity();
                break;

            case AlgorithmTypes.DiceSimilarity:
                stringMetric = new DiceSimilarity();
                break;

            case AlgorithmTypes.EuclideanDistance:
                stringMetric = new EuclideanDistance();
                break;

            case AlgorithmTypes.JaccardSimilarity:
                stringMetric = new JaccardSimilarity();
                break;

            case AlgorithmTypes.Jaro:
                stringMetric = new Jaro();
                break;

            case AlgorithmTypes.JaroWinkler:
                stringMetric = new JaroWinkler();
                break;

            case AlgorithmTypes.Levenstein:
                stringMetric = new Levenstein();
                break;

            case AlgorithmTypes.MatchingCoefficient:
                stringMetric = new MatchingCoefficient();
                break;

            case AlgorithmTypes.MongeElkan:
                stringMetric = new MongeElkan();
                break;

            case AlgorithmTypes.NeedlemanWunch:
                stringMetric = new NeedlemanWunch();
                break;

            case AlgorithmTypes.OverlapCoefficient:
                stringMetric = new OverlapCoefficient();
                break;

            case AlgorithmTypes.QGramsDistance:
                stringMetric = new QGramsDistance();
                break;

            case AlgorithmTypes.SmithWaterman:
                stringMetric = new SmithWaterman();
                break;

            case AlgorithmTypes.SmithWatermanGotoh:
                stringMetric = new SmithWatermanGotoh();
                break;

            case AlgorithmTypes.SmithWatermanGotohWindowedAffine:
                stringMetric = new SmithWatermanGotohWindowedAffine();
                break;

            default:
                stringMetric = new SmithWatermanGotoh();
                break;
            }

            var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim());

            return(similarity);
        }
コード例 #3
0
 // [SetUp]
 public OverlapCoefficientUnitTests()
 {
     LoadData();
     _myOverlapCoefficient = new OverlapCoefficient();
 }
コード例 #4
0
ファイル: Program.cs プロジェクト: Rozinek/BlueSimilarity
        private static void Main(string[] args)
        {
            /****************************************************/
            /*				BagOfTokensSimilarity				*/
            /****************************************************/

            // the recommend method for complex similarity on more words
            var          bagOfTokens = new BagOfWordsSimilarity();
            const string pattern     = "John Smith";
            const string targetText  = "Mr. John Smith, Jr.";

            // using normalized string and tokenizer returns score 1.0
            var resultingSim = bagOfTokens.GetSimilarity(new Tokenizer(new NormalizedString(pattern)),
                                                         new Tokenizer(new NormalizedString(targetText)));


            /****************************************************/
            /*				Levenshtein							*/
            /****************************************************/
            const string nameCorrect = "martha";
            const string nameError   = "marhta";

            // Levenshtein distance (implements interface IDistance)
            // & similarity (implements interface ISimilarity)
            var lev = new Levenshtein();

            // returns edit distance 2
            var distLev = lev.GetDistance(nameCorrect, nameError);

            // normalized string removes special symbols, diacritics and make case insensitivity
            // returns score 0.67
            var simLev = lev.GetSimilarity(new NormalizedString(nameCorrect), new NormalizedString(nameError));

            /****************************************************/
            /*				Damerau-Levenshtein							*/
            /****************************************************/
            // DamerauLevenshtein implements IDistance and ISimilarity
            var damLev = new DamerauLevenshtein();

            // returns edit distance 1
            var distDamLev = damLev.GetDistance(nameCorrect, nameError);

            // returns score 0.83
            var simDamLev = damLev.GetSimilarity(nameCorrect, nameError);

            /****************************************************/
            /*				Jaro, Jaro-Winler					*/
            /****************************************************/

            // Jaro, Jaro-Winkler implements only ISimilarity
            var nameFirst   = new Token("dwayne");
            var nameSecond  = new Token("duane");
            var jaro        = new Jaro();
            var jaroWinkler = new JaroWinkler();

            jaroWinkler.GetSimilarity(nameFirst, nameSecond);


            /****************************************************/
            /*				Q-grams coefficient					*/
            /****************************************************/
            // q-grams similarity coefficient - Dice, Jaccard, Overlap
            // with different q-grams type
            var diceUnigrams    = new DiceCoefficient <Bigram>();
            var jaccardBigrams  = new JaccardCoefficient <Bigram>();
            var overlapTrigrams = new OverlapCoefficient <Bigram>();

            // returns score 0.5
            var jaccardSim = jaccardBigrams.GetSimilarity(pattern, targetText);

            // returns score 0.67
            var diceSim = diceUnigrams.GetSimilarity(pattern, targetText);

            // returns score 1.0
            var overlapSim = overlapTrigrams.GetSimilarity(pattern, targetText);

            Console.ReadKey();
        }
コード例 #5
0
 public void Initializate()
 {
     _overlapdCoefBigram = new OverlapCoefficient <Bigram>();
     _overlapCoefUnigram = new OverlapCoefficient <Unigram>();
     _overlapCoefTrigram = new OverlapCoefficient <Trigram>();
 }
コード例 #6
0
 public void SetUp() {
     LoadData();
     myOverlapCoefficient = new OverlapCoefficient();
 }