コード例 #1
0
        public void TestSimilarity()
        {
            var instance = new Jaccard(k: 2);

            // AB BC CD DE DF
            // 1  1  1  1  0
            // 1  1  1  0  1
            // => 3 / 5 = 0.6
            var result = instance.Similarity("ABCDE", "ABCDF");

            Assert.Equal(expected: 0.6, actual: result);
        }
コード例 #2
0
        ///// <summary>
        ///// Calcualtes the Levenshtein distance between two strings
        ///// </summary>
        ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23
        ///// Explanation: https://en.wikipedia.org/wiki/Levenshtein_distance
        //private Int32 levenshtein(String a, String b)
        //{

        //    if (string.IsNullOrEmpty(a))
        //    {
        //        if (!string.IsNullOrEmpty(b))
        //        {
        //            return b.Length;
        //        }
        //        return 0;
        //    }

        //    if (string.IsNullOrEmpty(b))
        //    {
        //        if (!string.IsNullOrEmpty(a))
        //        {
        //            return a.Length;
        //        }
        //        return 0;
        //    }

        //    Int32 cost;
        //    Int32[,] d = new int[a.Length + 1, b.Length + 1];
        //    Int32 min1;
        //    Int32 min2;
        //    Int32 min3;

        //    for (Int32 i = 0; i <= d.GetUpperBound(0); i += 1)
        //    {
        //        d[i, 0] = i;
        //    }

        //    for (Int32 i = 0; i <= d.GetUpperBound(1); i += 1)
        //    {
        //        d[0, i] = i;
        //    }

        //    for (Int32 i = 1; i <= d.GetUpperBound(0); i += 1)
        //    {
        //        for (Int32 j = 1; j <= d.GetUpperBound(1); j += 1)
        //        {
        //            cost = Convert.ToInt32(!(a[i - 1] == b[j - 1]));

        //            min1 = d[i - 1, j] + 1;
        //            min2 = d[i, j - 1] + 1;
        //            min3 = d[i - 1, j - 1] + cost;
        //            d[i, j] = Math.Min(Math.Min(min1, min2), min3);
        //        }
        //    }

        //    return d[d.GetUpperBound(0), d.GetUpperBound(1)];

        //}

        ///// <summary>
        ///// String-similarity computed with levenshtein-distance
        ///// </summary>
        //private double similarityLevenshtein(string a, string b)
        //{
        //    if (a.Equals(b))
        //    {
        //        return 1.0;
        //    }
        //    else
        //    {
        //        if (!(a.Length == 0 || b.Length == 0))
        //        {
        //            double sim = 1 - (levenshtein(a, b) / Convert.ToDouble(Math.Min(a.Length, b.Length)));
        //            return sim;
        //        }
        //        else
        //            return 0.0;
        //    }
        //}

        ///// <summary>
        ///// String-similarity computed with Dice Coefficient
        ///// </summary>
        ///// Source: https://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Dice%27s_coefficient#C.23
        ///// Explanation: https://en.wikipedia.org/wiki/S%C3%B8rensen%E2%80%93Dice_coefficient
        //private double similarityDiceCoefficient(string a, string b)
        //{
        //    //Workaround for |a| == |b| == 1
        //    if (a.Length <= 1 && b.Length <= 1)
        //    {
        //        if (a.Equals(b))
        //            return 1.0;
        //        else
        //            return 0.0;
        //    }

        //    HashSet<string> setA = new HashSet<string>();
        //    HashSet<string> setB = new HashSet<string>();

        //    for (int i = 0; i < a.Length - 1; ++i)
        //        setA.Add(a.Substring(i, 2));

        //    for (int i = 0; i < b.Length - 1; ++i)
        //        setB.Add(b.Substring(i, 2));

        //    HashSet<string> intersection = new HashSet<string>(setA);
        //    intersection.IntersectWith(setB);

        //    return (2.0 * intersection.Count) / (setA.Count + setB.Count);
        //}

        /// <summary>
        /// Combines multiple String-similarities with equal weight
        /// </summary>
        private double similarity(string a, string b)
        {
            List <double> similarities = new List <double>();
            double        output       = 0.0;

            var l = new NormalizedLevenshtein();

            similarities.Add(l.Similarity(a, b));
            var jw = new JaroWinkler();

            similarities.Add(jw.Similarity(a, b));
            var jac = new Jaccard();

            similarities.Add(jac.Similarity(a, b));

            foreach (double sim in similarities)
            {
                output += sim;
            }

            return(output / similarities.Count);
        }
コード例 #3
0
 public static double GetSimilarityAverage(string str1, string str2)
 {
     return((cosine.Similarity(str1, str2) + jaccard.Similarity(str1, str2) + sorensenDice.Similarity(str1, str2)) / 3);
 }
コード例 #4
0
        private void Ok_Click(object sender, RoutedEventArgs e)
        {
            var format = new NumberFormatInfo();

            format.NegativeSign = "-";
            var valuesCol1 = (from row in mainWindow.gridData.AsEnumerable()
                              select Convert.ToDouble(ConvertString(row.Field <string>(mainWindow.gridData.Columns[comboBoxCol1.SelectedItem.ToString()].ColumnName)), format)).ToArray();

            var valuesCol2 = (from row in mainWindow.gridData.AsEnumerable()
                              select Convert.ToDouble(ConvertString(row.Field <string>(mainWindow.gridData.Columns[comboBoxCol2.SelectedItem.ToString()].ColumnName)), format)).ToArray();

            if (similarityMeasures.SelectedItem.ToString() == "Jaccard")
            {
                var jaccard = new Jaccard();
                textBlock.Text += "\nPodobieństwo miary Jaccard pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + jaccard.Similarity(valuesCol1, valuesCol2).ToString();
            }
            else if (similarityMeasures.SelectedItem.ToString() == "Korelacja Pearsona")
            {
                var pearsonCorrelation = new PearsonCorrelation();
                textBlock.Text += "\nPodobieństwo miary Korelacja Pearsona pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + pearsonCorrelation.Similarity(valuesCol1, valuesCol2).ToString();
            }
            else if (similarityMeasures.SelectedItem.ToString() == "Cosinus")
            {
                var cosine = new Cosine();
                textBlock.Text += "\nPodobieństwo miary Cosinus pomiędzy kolumnami " + comboBoxCol1.SelectedItem.ToString() + " i " + comboBoxCol2.SelectedItem.ToString() + " wynosi: " + cosine.Similarity(valuesCol1, valuesCol2).ToString();
            }
        }