Exemple #1
0
        /// <summary>
        /// Gets the similarity.
        /// </summary>
        /// <param name="patternTokens">The pattern tokens.</param>
        /// <param name="targetTokens">The target tokens.</param>
        /// <returns></returns>
        public double GetSimilarity(string[] patternTokens, string[] targetTokens)
        {
            var patternWeights = Vocabulary.GetSemanticWeight(patternTokens);
            var targetWeights  = Vocabulary.GetSemanticWeight(targetTokens);

            SimMetric simMetric = SimHelpers.GetSimMetric(InternalTokenSimilarity);

            // unit vectorizing
            Utils.UnitVectorizing(patternWeights);
            Utils.UnitVectorizing(targetWeights);

            double finalScore = 0;

            for (int p = 0; p < patternTokens.Length; p++)
            {
                var pattern = patternTokens[p];

                if (pattern == null)
                {
                    continue;
                }

                double maxOverToken    = SimHelpers.MinimumScore;
                double weightOverToken = 0;
                for (int t = 0; t < targetTokens.Length; t++)
                {
                    var target = targetTokens[t];

                    if (target == null)
                    {
                        continue;
                    }

                    double currentScore = simMetric(pattern, target);

                    if (currentScore > DefaultThreshold && currentScore > maxOverToken)
                    {
                        maxOverToken    = currentScore;
                        weightOverToken = patternWeights[p] * targetWeights[t];
                    }

                    // if score achieves maximum score then breaks the loop and increases the performance
                    if (Utils.Equals(currentScore, SimHelpers.MaximumScore))
                    {
                        break;
                    }
                }
                finalScore += weightOverToken * maxOverToken;
            }

            return(finalScore);
        }
Exemple #2
0
        /// <summary>
        ///     Gets the similarity.
        /// </summary>
        /// <param name="patternTokens">The tokens pattern.</param>
        /// <param name="targetTokens">The tokens target.</param>
        /// <returns>System.Double.</returns>
        public double GetSimilarity(string[] patternTokens, string[] targetTokens)
        {
            int m = patternTokens.Length;
            int n = targetTokens.Length;


            SimMetric refSimilarity = SimHelpers.GetSimMetric(InternalTokenSimilarity);

            // re-calculate similarity symmetric vs. not symmetric
            if (IsSymmetric && m > n)
            {
                Utils.Swap(ref patternTokens, ref targetTokens);
                Utils.Swap(ref m, ref n);
            }

            double sumOverTokens = 0;

            foreach (var pattern in patternTokens)
            {
                if (pattern == null)
                {
                    continue;
                }

                double maxOverToken = 0;
                foreach (var target in targetTokens)
                {
                    if (target == null)
                    {
                        continue;
                    }

                    double currentScore = refSimilarity(pattern, target);

                    // if score achieves maximum score then breaks the loop and increases the performance
                    if (Utils.Equals(currentScore, SimHelpers.MaximumScore))
                    {
                        maxOverToken = SimHelpers.MaximumScore;
                        break;
                    }

                    maxOverToken = Math.Max(maxOverToken, currentScore);
                }
                sumOverTokens += maxOverToken;
            }

            return(sumOverTokens / m);
        }
        /// <summary>
        /// Gets the semantic similarity between array of tokens. The position of token in array
        /// doesn't have an impact on resulting score.
        /// </summary>
        /// <param name="tokensPattern">The tokens pattern</param>
        /// <param name="tokensTarget">The tokens target.</param>
        /// <returns>the score between 0 and 1</returns>
        public double GetSimilarity(string[] tokensPattern, string[] tokensTarget)
        {
            var patternWeights = Vocabulary.GetSemanticWeight(tokensPattern);
            var targetWeights  = Vocabulary.GetSemanticWeight(tokensTarget);

            SimMetric simMetric = SimHelpers.GetSimMetric(InternalTokenSimilarity);

            int pLen = tokensPattern.Length;
            int tLen = tokensTarget.Length;

            // re-calculate similarity symmetric vs. not symmetric
            if (IsSymmetric && pLen > tLen)
            {
                Utils.Swap(ref tokensPattern, ref tokensTarget);
                Utils.Swap(ref patternWeights, ref targetWeights);
                Utils.Swap(ref pLen, ref tLen);
            }

            double sumOverTokens = 0;
            double sumWeights    = 0;

            for (int p = 0; p < tokensPattern.Length; p++)
            {
                string pattern = tokensPattern[p];
                double pWeight = patternWeights[p];

                if (pattern == null)
                {
                    continue;
                }

                double maxOverToken    = SimHelpers.MinimumScore;
                double weightOverToken = Math.Pow(pWeight, 2.0);

                for (int t = 0; t < tokensTarget.Length; p++)
                {
                    string target  = tokensTarget[t];
                    double tWeight = targetWeights[t];

                    if (target == null)
                    {
                        continue;
                    }

                    double currentScore = simMetric(pattern, target);

                    if (currentScore > maxOverToken)
                    {
                        maxOverToken    = currentScore;
                        weightOverToken = pWeight * tWeight;
                    }

                    // if score achieves maximum score then breaks the loop and increases the performance
                    if (Utils.Equals(currentScore, SimHelpers.MaximumScore))
                    {
                        break;
                    }
                }
                sumOverTokens += weightOverToken * maxOverToken;
                sumWeights    += weightOverToken;
            }

            return(sumOverTokens / sumWeights);
        }