예제 #1
0
        private double GetActualSimilarity(Collection <string> firstTokens, Collection <string> secondTokens)
        {
            Collection <string> collection = _tokenUtilities.CreateMergedSet(firstTokens, secondTokens);
            int num = 0;

            foreach (string str in collection)
            {
                int num2 = 0;
                for (int i = 0; i < firstTokens.Count; i++)
                {
                    if (firstTokens[i].Equals(str))
                    {
                        num2++;
                    }
                }
                int num4 = 0;
                for (int j = 0; j < secondTokens.Count; j++)
                {
                    if (secondTokens[j].Equals(str))
                    {
                        num4++;
                    }
                }
                if (num2 > num4)
                {
                    num += num2 - num4;
                }
                else
                {
                    num += num4 - num2;
                }
            }
            return(num);
        }
예제 #2
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null && _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord)).Count > 0)
     {
         return(_tokenUtilities.CommonSetTerms() / (Math.Pow(_tokenUtilities.FirstSetTokenCount, 0.5) * Math.Pow(_tokenUtilities.SecondSetTokenCount, 0.5)));
     }
     return(0.0);
 }
예제 #3
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord));
         return(_tokenUtilities.CommonSetTerms() / (double)Math.Min(_tokenUtilities.FirstSetTokenCount, _tokenUtilities.SecondSetTokenCount));
     }
     return(DefaultMismatchScore);
 }
예제 #4
0
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if (firstWord != null && secondWord != null)
     {
         Collection <string> collection = _tokenUtilities.CreateMergedSet(_tokeniser.Tokenize(firstWord), _tokeniser.Tokenize(secondWord));
         if (collection.Count > 0)
         {
             return(_tokenUtilities.CommonSetTerms() / (double)collection.Count);
         }
     }
     return(DefaultMismatchScore);
 }
예제 #5
0
 /// <summary>
 /// gets the similarity of the two strings using OverlapCoefficient
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>overlap_coefficient(q,r) = ( | q and r | ) / min{ | q | , | r | }.</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         //Collection<string> allTokens =
         tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord));
         return
             (tokenUtilities.CommonSetTerms() /
              (double)Math.Min(tokenUtilities.FirstSetTokenCount, tokenUtilities.SecondSetTokenCount));
     }
     return(defaultMismatchScore);
 }
예제 #6
0
 /// <summary>
 /// gets the similarity of the two strings using DiceSimilarity
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>Dices coefficient = (2*Common Terms) / (Number of terms in String1 + Number of terms in String2).</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         if (tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord)).Count > 0)
         {
             return
                 ((2.0 * tokenUtilities.CommonSetTerms()) /
                  (tokenUtilities.FirstSetTokenCount + tokenUtilities.SecondSetTokenCount));
         }
     }
     return(0.0);
 }
 /// <summary>
 /// gets the similarity of the two strings using JaccardSimilarity.
 /// </summary>
 /// <param name="firstWord"></param>
 /// <param name="secondWord"></param>
 /// <returns>a value between 0-1 of the similarity</returns>
 /// <remarks>Each instance is represented as a Jaccard vector similarity function. The Jaccard between two vectors X and Y is
 /// (X*Y) / (|X||Y|-(X*Y))
 /// where (X*Y) is the inner product of X and Y, and |X| = (X*X)^1/2, i.e. the Euclidean norm of X.
 /// This can more easily be described as ( |X and Y| ) / ( | X or Y | )</remarks>
 public override double GetSimilarity(string firstWord, string secondWord)
 {
     if ((firstWord != null) && (secondWord != null))
     {
         Collection <string> allTokens =
             tokenUtilities.CreateMergedSet(tokeniser.Tokenize(firstWord), tokeniser.Tokenize(secondWord));
         if (allTokens.Count > 0)
         {
             return((double)tokenUtilities.CommonSetTerms() / (double)allTokens.Count);
         }
     }
     return(defaultMismatchScore);
 }
예제 #8
0
        double GetActualSimilarity(Collection <string> firstTokens, Collection <string> secondTokens)
        {
            Collection <string> allTokens = tokenUtilities.CreateMergedSet(firstTokens, secondTokens);

            int difference = 0;

            foreach (string token in allTokens)
            {
                int matchingQGrams1 = 0;
                for (int i = 0; i < firstTokens.Count; i++)
                {
                    if (firstTokens[i].Equals(token))
                    {
                        matchingQGrams1++;
                    }
                }

                int matchingQGrams2 = 0;
                for (int i = 0; i < secondTokens.Count; i++)
                {
                    if (secondTokens[i].Equals(token))
                    {
                        matchingQGrams2++;
                    }
                }
                if (matchingQGrams1 > matchingQGrams2)
                {
                    difference += matchingQGrams1 - matchingQGrams2;
                }
                else
                {
                    difference += matchingQGrams2 - matchingQGrams1;
                }
            }
            return(difference);
        }