コード例 #1
0
 public void NonEmptyCases()
 {
     Assert.Equal(0.8222222, Jaro.Similarity("DWAYNE", "DUANE"), 7);
     Assert.Equal(0.9444444, Jaro.Similarity("MARTHA", "MARHTA"), 7);
     Assert.Equal(0.7666667, Jaro.Similarity("DIXON", "DICKSONX"), 7);
     Assert.Equal(0.8962963, Jaro.Similarity("JELLYFISH", "SMELLYFISH"), 7);
 }
コード例 #2
0
        public static double ApproximatelyEquals(this string firstWord, string secondWord, SimMetricType simMetricType = SimMetricType.Levenstein)
        {
            switch (simMetricType)
            {
            case SimMetricType.BlockDistance:
                var sim2 = new BlockDistance();
                return(sim2.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanLengthDeviation:
                var sim3 = new ChapmanLengthDeviation();
                return(sim3.GetSimilarity(firstWord, secondWord));

            case SimMetricType.CosineSimilarity:
                var sim4 = new CosineSimilarity();
                return(sim4.GetSimilarity(firstWord, secondWord));

            case SimMetricType.DiceSimilarity:
                var sim5 = new DiceSimilarity();
                return(sim5.GetSimilarity(firstWord, secondWord));

            case SimMetricType.EuclideanDistance:
                var sim6 = new EuclideanDistance();
                return(sim6.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaccardSimilarity:
                var sim7 = new JaccardSimilarity();
                return(sim7.GetSimilarity(firstWord, secondWord));

            case SimMetricType.Jaro:
                var sim8 = new Jaro();
                return(sim8.GetSimilarity(firstWord, secondWord));

            case SimMetricType.JaroWinkler:
                var sim9 = new JaroWinkler();
                return(sim9.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MatchingCoefficient:
                var sim10 = new MatchingCoefficient();
                return(sim10.GetSimilarity(firstWord, secondWord));

            case SimMetricType.MongeElkan:
                var sim11 = new MongeElkan();
                return(sim11.GetSimilarity(firstWord, secondWord));

            case SimMetricType.NeedlemanWunch:
                var sim12 = new NeedlemanWunch();
                return(sim12.GetSimilarity(firstWord, secondWord));

            case SimMetricType.OverlapCoefficient:
                var sim13 = new OverlapCoefficient();
                return(sim13.GetSimilarity(firstWord, secondWord));

            case SimMetricType.QGramsDistance:
                var sim14 = new QGramsDistance();
                return(sim14.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWaterman:
                var sim15 = new SmithWaterman();
                return(sim15.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotoh:
                var sim16 = new SmithWatermanGotoh();
                return(sim16.GetSimilarity(firstWord, secondWord));

            case SimMetricType.SmithWatermanGotohWindowedAffine:
                var sim17 = new SmithWatermanGotohWindowedAffine();
                return(sim17.GetSimilarity(firstWord, secondWord));

            case SimMetricType.ChapmanMeanLength:
                var sim18 = new ChapmanMeanLength();
                return(sim18.GetSimilarity(firstWord, secondWord));

            default:
                var sim1 = new Levenstein();
                return(sim1.GetSimilarity(firstWord, secondWord));
            }
        }
コード例 #3
0
 private void Jaro_button_Click(object sender, EventArgs e)
 {
     JaroRez.Text = Jaro.Func(JaroText1.Text, JaroText2.Text).ToString();
 }
コード例 #4
0
        // сравнение
        private List <string[]> Sravn(string[][] data)
        {
            List <string[]> rezlist = new List <string[]>();

            foreach (string[] str in data)
            {
                // количество слов в строке
                if (str.Count() != 2)
                {
                    continue;
                }
                else
                {
                    string[] mass;

                    SimMetricsMetricUtilities.Levenstein ex_l = new SimMetricsMetricUtilities.Levenstein();

                    Stopwatch t = new Stopwatch();
                    t.Start();
                    double rj1 = Math.Round(Jaro.Func(str[0], str[1]), 2);
                    string tj1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    t = new Stopwatch();
                    t.Start();
                    double rj2 = Math.Round(ExJaro.distance(str[0], str[1]), 2);
                    string tj2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    // -----

                    t = new Stopwatch();
                    t.Start();
                    double rjv1 = Math.Round(JaroVincler.Func(str[0], str[1]), 2);
                    string tjv1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    t = new Stopwatch();
                    t.Start();
                    double rjv2 = Math.Round(ExJaroWincler.distance(str[0], str[1]), 2);
                    string tjv2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    // ----

                    t = new Stopwatch();
                    t.Start();
                    double rl1 = Math.Round(Levenstein.Func(str[0], str[1]), 2);
                    string tl1 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    t = new Stopwatch();
                    t.Start();
                    double rl2 = Math.Round(ex_l.GetSimilarity(str[0], str[1]), 2);
                    string tl2 = Math.Round(t.Elapsed.TotalMilliseconds, 2).ToString();

                    rezlist.Add(new string[14] {
                        rj1.ToString(), tj1, rj2.ToString(), tj2,
                        rjv1.ToString(), tjv1, rjv2.ToString(), tjv2,
                        rl1.ToString(), tl1, rl2.ToString(), tl2, str[0], str[1]
                    });
                }
            }

            return(rezlist);
        }
コード例 #5
0
        public double GetSimilarity(string str1, string str2, string type)
        {
            IStringMetric stringMetric;

            switch (type)
            {
            case AlgorithmTypes.BlockDistance:
                stringMetric = new BlockDistance();
                break;

            case AlgorithmTypes.ChapmanLengthDeviation:
                stringMetric = new ChapmanLengthDeviation();
                break;

            case AlgorithmTypes.ChapmanMeanLength:
                stringMetric = new ChapmanMeanLength();
                break;

            case AlgorithmTypes.CosineSimilarity:
                stringMetric = new CosineSimilarity();
                break;

            case AlgorithmTypes.DiceSimilarity:
                stringMetric = new DiceSimilarity();
                break;

            case AlgorithmTypes.EuclideanDistance:
                stringMetric = new EuclideanDistance();
                break;

            case AlgorithmTypes.JaccardSimilarity:
                stringMetric = new JaccardSimilarity();
                break;

            case AlgorithmTypes.Jaro:
                stringMetric = new Jaro();
                break;

            case AlgorithmTypes.JaroWinkler:
                stringMetric = new JaroWinkler();
                break;

            case AlgorithmTypes.Levenstein:
                stringMetric = new Levenstein();
                break;

            case AlgorithmTypes.MatchingCoefficient:
                stringMetric = new MatchingCoefficient();
                break;

            case AlgorithmTypes.MongeElkan:
                stringMetric = new MongeElkan();
                break;

            case AlgorithmTypes.NeedlemanWunch:
                stringMetric = new NeedlemanWunch();
                break;

            case AlgorithmTypes.OverlapCoefficient:
                stringMetric = new OverlapCoefficient();
                break;

            case AlgorithmTypes.QGramsDistance:
                stringMetric = new QGramsDistance();
                break;

            case AlgorithmTypes.SmithWaterman:
                stringMetric = new SmithWaterman();
                break;

            case AlgorithmTypes.SmithWatermanGotoh:
                stringMetric = new SmithWatermanGotoh();
                break;

            case AlgorithmTypes.SmithWatermanGotohWindowedAffine:
                stringMetric = new SmithWatermanGotohWindowedAffine();
                break;

            default:
                stringMetric = new SmithWatermanGotoh();
                break;
            }

            var similarity = stringMetric.GetSimilarity(str1.Trim(), str2.Trim());

            return(similarity);
        }
コード例 #6
0
 // [SetUp]
 public JaroAndJaroWinklerUnitTests()
 {
     LoadData();
     _myJaro        = new Jaro();
     _myJaroWinkler = new JaroWinkler();
 }
コード例 #7
0
        private void button1_Click(object sender, EventArgs e)
        {
            Existing_Method ex_m = new Existing_Method();
            Jaro            m1;
            JaroVincler     m2;
            Levenstein      m3;

            Stopwatch t = new Stopwatch();

            double[] rez_mas = new double[20];

            if (System.IO.File.Exists("data.txt"))
            {
                string[] mass = System.IO.File.ReadAllLines("data.txt");
                System.IO.File.WriteAllText("ex_m.txt", "");

                // Существующий тест
                t.Start();
                foreach (string s in mass)
                {
                    string[] str = s.Split(' ');
                    System.IO.File.AppendAllText("ex_m.txt", ex_m.IndistinctMatching(4, str[0], str[1]).ToString() + Environment.NewLine);
                }
                t.Stop();
                System.IO.File.AppendAllText("ex_m.txt", t.Elapsed.ToString());


                System.IO.File.WriteAllText("m1.txt", "");

                // Метод 1
                t = new Stopwatch();
                t.Start();
                foreach (string s in mass)
                {
                    string[] str = s.Split(' ');
                    m1 = new Jaro(str[0], str[1]);
                    System.IO.File.AppendAllText("m1.txt", (m1.Func() * 100).ToString() + Environment.NewLine);
                }
                t.Stop();
                System.IO.File.AppendAllText("m1.txt", t.Elapsed.ToString());


                System.IO.File.WriteAllText("m2.txt", "");

                // Метод 2
                t = new Stopwatch();
                t.Start();
                foreach (string s in mass)
                {
                    string[] str = s.Split(' ');
                    m2 = new JaroVincler(str[0], str[1]);
                    if (m2.Func())
                    {
                        System.IO.File.AppendAllText("m2.txt", "100" + Environment.NewLine);
                    }
                    else
                    {
                        System.IO.File.AppendAllText("m2.txt", "0" + Environment.NewLine);
                    }
                }
                t.Stop();
                System.IO.File.AppendAllText("m2.txt", t.Elapsed.ToString());

                // Метод 3
                System.IO.File.WriteAllText("m3.txt", "");

                t = new Stopwatch();
                t.Start();
                foreach (string s in mass)
                {
                    string[] str = s.Split(' ');
                    m3 = new Levenstein(str[0], str[1]);
                    System.IO.File.AppendAllText("m3.txt", (100 - m3.Func() * 100).ToString() + Environment.NewLine);
                }
                t.Stop();
                System.IO.File.AppendAllText("m3.txt", t.Elapsed.ToString());

                MessageBox.Show("Выполнено");
            }
        }
コード例 #8
0
ファイル: Program.cs プロジェクト: Rozinek/BlueSimilarity
        private static void Main(string[] args)
        {
            /****************************************************/
            /*				BagOfTokensSimilarity				*/
            /****************************************************/

            // the recommend method for complex similarity on more words
            var          bagOfTokens = new BagOfWordsSimilarity();
            const string pattern     = "John Smith";
            const string targetText  = "Mr. John Smith, Jr.";

            // using normalized string and tokenizer returns score 1.0
            var resultingSim = bagOfTokens.GetSimilarity(new Tokenizer(new NormalizedString(pattern)),
                                                         new Tokenizer(new NormalizedString(targetText)));


            /****************************************************/
            /*				Levenshtein							*/
            /****************************************************/
            const string nameCorrect = "martha";
            const string nameError   = "marhta";

            // Levenshtein distance (implements interface IDistance)
            // & similarity (implements interface ISimilarity)
            var lev = new Levenshtein();

            // returns edit distance 2
            var distLev = lev.GetDistance(nameCorrect, nameError);

            // normalized string removes special symbols, diacritics and make case insensitivity
            // returns score 0.67
            var simLev = lev.GetSimilarity(new NormalizedString(nameCorrect), new NormalizedString(nameError));

            /****************************************************/
            /*				Damerau-Levenshtein							*/
            /****************************************************/
            // DamerauLevenshtein implements IDistance and ISimilarity
            var damLev = new DamerauLevenshtein();

            // returns edit distance 1
            var distDamLev = damLev.GetDistance(nameCorrect, nameError);

            // returns score 0.83
            var simDamLev = damLev.GetSimilarity(nameCorrect, nameError);

            /****************************************************/
            /*				Jaro, Jaro-Winler					*/
            /****************************************************/

            // Jaro, Jaro-Winkler implements only ISimilarity
            var nameFirst   = new Token("dwayne");
            var nameSecond  = new Token("duane");
            var jaro        = new Jaro();
            var jaroWinkler = new JaroWinkler();

            jaroWinkler.GetSimilarity(nameFirst, nameSecond);


            /****************************************************/
            /*				Q-grams coefficient					*/
            /****************************************************/
            // q-grams similarity coefficient - Dice, Jaccard, Overlap
            // with different q-grams type
            var diceUnigrams    = new DiceCoefficient <Bigram>();
            var jaccardBigrams  = new JaccardCoefficient <Bigram>();
            var overlapTrigrams = new OverlapCoefficient <Bigram>();

            // returns score 0.5
            var jaccardSim = jaccardBigrams.GetSimilarity(pattern, targetText);

            // returns score 0.67
            var diceSim = diceUnigrams.GetSimilarity(pattern, targetText);

            // returns score 1.0
            var overlapSim = overlapTrigrams.GetSimilarity(pattern, targetText);

            Console.ReadKey();
        }
コード例 #9
0
 public void Initialize()
 {
     _jaro = new Jaro();
 }
コード例 #10
0
 public void EmptyCases()
 {
     Assert.Equal(1, Jaro.Similarity("", ""));
     Assert.Equal(0, Jaro.Similarity("", "AB"));
     Assert.Equal(0, Jaro.Similarity("AB", ""));
 }