コード例 #1
0
ファイル: MongeElkan.cs プロジェクト: zekhan00/SimMetrics.Net
        public override double GetSimilarity(string firstWord, string secondWord)
        {
            if (firstWord == null || secondWord == null)
            {
                return(DefaultMismatchScore);
            }

            Collection <string> collection  = Tokeniser.Tokenize(firstWord);
            Collection <string> collection2 = Tokeniser.Tokenize(secondWord);
            double num = 0.0;

            for (int i = 0; i < collection.Count; i++)
            {
                string str  = collection[i];
                double num3 = 0.0;
                for (int j = 0; j < collection2.Count; j++)
                {
                    string str2       = collection2[j];
                    double similarity = _internalStringMetric.GetSimilarity(str, str2);
                    if (similarity > num3)
                    {
                        num3 = similarity;
                    }
                }
                num += num3;
            }
            return(num / collection.Count);
        }
コード例 #2
0
        /// <summary>
        /// gets the similarity of the two strings using Monge Elkan.
        /// </summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns>a value between 0-1 of the similarity</returns>
        public override double GetSimilarity(string firstWord, string secondWord)
        {
            if ((firstWord != null) && (secondWord != null))
            {
                Collection <string> firstTokens  = tokeniser.Tokenize(firstWord);
                Collection <string> secondTokens = tokeniser.Tokenize(secondWord);

                double sumMatches = 0.0;
                for (int i = 0; i < firstTokens.Count; i++)
                {
                    string sToken   = firstTokens[i];
                    double maxFound = 0.0;
                    for (int j = 0; j < secondTokens.Count; j++)
                    {
                        string tToken = secondTokens[j];
                        double found  = internalStringMetric.GetSimilarity(sToken, tToken);
                        if (found > maxFound)
                        {
                            maxFound = found;
                        }
                    }
                    sumMatches += maxFound;
                }
                return(sumMatches / firstTokens.Count);
            }
            return(defaultMismatchScore);
        }
コード例 #3
0
        /// <summary>
        /// gets the un-normalised similarity measure of the metric for the given strings.</summary>
        /// <param name="firstWord">first word</param>
        /// <param name="secondWord">second word</param>
        /// <returns> returns the score of the similarity measure (un-normalised)</returns>
        public override double GetUnnormalisedSimilarity(string firstWord, string secondWord)
        {
            Collection <string> firstTokens  = tokeniser.Tokenize(firstWord);
            Collection <string> secondTokens = tokeniser.Tokenize(secondWord);

            tokenUtilities.CreateMergedList(firstTokens, secondTokens);
            return(GetActualSimilarity(firstTokens, secondTokens));
        }
コード例 #4
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord));
         return(_tokenUtilities.CommonSetTerms() / (double)Math.Min(_tokenUtilities.FirstSetTokenCount, _tokenUtilities.SecondSetTokenCount));
     }
     return(DefaultMismatchScore);
 }
コード例 #5
0
 public double GetEuclidDistance(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         Collection <string> firstTokens  = _tokeniser.Tokenize(firstWord);
         Collection <string> secondTokens = _tokeniser.Tokenize(secondWord);
         return(GetActualDistance(firstTokens, secondTokens));
     }
     return(0.0);
 }
コード例 #6
0
 public override double GetSimilarityTimingEstimated(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         double count = _tokeniser.Tokenize(firstWord).Count;
         double num2  = _tokeniser.Tokenize(secondWord).Count;
         return(num2 * count * _estimatedTimingConstant);
     }
     return(DefaultMismatchScore);
 }
コード例 #7
0
 /// <summary>
 /// gets the estimated time in milliseconds it takes to perform a similarity timing.
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>the estimated time in milliseconds taken to perform the similarity measure</returns>
 public override double GetSimilarityTimingEstimated(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         double firstTokens  = tokeniser.Tokenize(firstWord).Count;
         double secondTokens = tokeniser.Tokenize(secondWord).Count;
         return(secondTokens * firstTokens * estimatedTimingConstant);
     }
     return(0.0);
 }
コード例 #8
0
        /// <summary>
        /// gets the actual euclidean distance ie not the value between 0-1.
        /// </summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns>the actual euclidean distance</returns>
        public double GetEuclidDistance(string firstWord, string secondWord)
        {
            if ((firstWord != null) && (secondWord != null))
            {
                Collection <string> firstTokens  = tokeniser.Tokenize(firstWord);
                Collection <string> secondTokens = tokeniser.Tokenize(secondWord);

                return(GetActualDistance(firstTokens, secondTokens));
            }
            return(defaultMismatchScore);
        }
コード例 #9
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         Collection <string> collection = _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord));
         if (collection.Count > 0)
         {
             return(_tokenUtilities.CommonSetTerms() / (double)collection.Count);
         }
     }
     return(DefaultMismatchScore);
 }
コード例 #10
0
 /// <summary>
 /// gets the similarity of the two strings using OverlapCoefficient
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>overlap_coefficient(q,r) = ( | q and r | ) / min{ | q | , | r | }.</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         //Collection<string> allTokens =
         tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord));
         return
             (tokenUtilities.CommonSetTerms() /
              (double)Math.Min(tokenUtilities.FirstSetTokenCount, tokenUtilities.SecondSetTokenCount));
     }
     return(defaultMismatchScore);
 }
コード例 #11
0
 /// <summary>
 /// gets the similarity of the two strings using DiceSimilarity
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>Dices coefficient = (2*Common Terms) / (Number of terms in String1 + Number of terms in String2).</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         if (tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)).Count > 0)
         {
             return
                 ((2.0 * tokenUtilities.CommonSetTerms()) /
                  (tokenUtilities.FirstSetTokenCount + tokenUtilities.SecondSetTokenCount));
         }
     }
     return(0.0);
 }
コード例 #12
0
 /// <summary>
 /// gets the similarity of the two strings using JaccardSimilarity.
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>Each instance is represented as a Jaccard vector similarity function. The Jaccard between two vectors X and Y is
 /// (X*Y) / (|X||Y|-(X*Y))
 /// where (X*Y) is the inner product of X and Y, and |X| = (X*X)^1/2, i.e. the Euclidean norm of X.
 /// This can more easily be described as ( |X and Y| ) / ( | X or Y | )</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         Collection <string> allTokens =
             tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord));
         if (allTokens.Count > 0)
         {
             return((double)tokenUtilities.CommonSetTerms() / (double)allTokens.Count);
         }
     }
     return(defaultMismatchScore);
 }
コード例 #13
0
        /// <summary>
        /// gets the similarity of the two strings using BlockDistance.
        /// </summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns>a 0-1 similarity score</returns>
        public override double GetSimilarity(string firstWord, string secondWord)
        {
            Collection <string> firstTokens  = tokeniser.Tokenize(firstWord);
            Collection <string> secondTokens = tokeniser.Tokenize(secondWord);
            int    totalPossible             = firstTokens.Count + secondTokens.Count;
            double totalDistance             = GetActualSimilarity(firstTokens, secondTokens);

            return((totalPossible - totalDistance) / totalPossible);
        }
コード例 #14
0
        public override double GetSimilarity(string firstWord, string secondWord)
        {
            Collection <string> firstTokens  = _tokeniser.Tokenize(firstWord);
            Collection <string> secondTokens = _tokeniser.Tokenize(secondWord);
            int    num = firstTokens.Count + secondTokens.Count;
            double actualSimilarity = GetActualSimilarity(firstTokens, secondTokens);

            return((num - actualSimilarity) / num);
        }
コード例 #15
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null && _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord)).Count > 0)
     {
         return(_tokenUtilities.CommonSetTerms() / (Math.Pow(_tokenUtilities.FirstSetTokenCount, 0.5) * Math.Pow(_tokenUtilities.SecondSetTokenCount, 0.5)));
     }
     return(0.0);
 }