Ejemplo n.º 1
0
        /// <summary>
        /// gets the un-normalised similarity measure of the metric for the given strings.</summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns> returns the score of the similarity measure (un-normalised)</returns>
        /// <remarks>
        /// <p/>
        /// Copy character from string1 over to string2 (cost 0)
        /// Delete a character in string1 (cost 1)
        /// Insert a character in string2 (cost 1)
        /// Substitute one character for another (cost 1)
        /// <p/>
        /// D(i-1,j-1) + d(si,tj) //subst/copy
        /// D(i,j) = min D(i-1,j)+1 //insert
        /// D(i,j-1)+1 //delete
        /// <p/>
        /// d(i,j) is a function whereby d(c,d)=0 if c=d, 1 else.
        /// </remarks>
        public override double GetUnnormalisedSimilarity(string firstWord, string secondWord)
        {
            if ((firstWord != null) && (secondWord != null))
            {
                // Step 1
                int n = firstWord.Length;
                int m = secondWord.Length;
                if (n == 0)
                {
                    return(m);
                }
                if (m == 0)
                {
                    return(n);
                }

                double[][] d = new double[n + 1][];
                for (int i = 0; i < n + 1; i++)
                {
                    d[i] = new double[m + 1];
                }

                // Step 2
                for (int i = 0; i <= n; i++)
                {
                    d[i][0] = i;
                }
                for (int j = 0; j <= m; j++)
                {
                    d[0][j] = j;
                }

                // Step 3
                for (int i = 1; i <= n; i++)
                {
                    // Step 4
                    for (int j = 1; j <= m; j++)
                    {
                        // Step 5
                        double cost = dCostFunction.GetCost(firstWord, i - 1, secondWord, j - 1);
                        // Step 6
                        d[i][j] = MathFunctions.MinOf3(d[i - 1][j] + 1.0, d[i][j - 1] + 1.0, d[i - 1][j - 1] + cost);
                    }
                }

                // Step 7
                return(d[n][m]);
            }
            return(0.0);
        }
Ejemplo n.º 2
0
        public override double GetUnnormalisedSimilarity(string firstWord, string secondWord)
        {
            if (firstWord == null || secondWord == null)
            {
                return(DefaultMismatchScore);
            }

            int length = firstWord.Length;
            int index  = secondWord.Length;

            if (length == 0)
            {
                return(index);
            }

            if (index == 0)
            {
                return(length);
            }
            double[][] numArray = new double[length + 1][];
            for (int i = 0; i < length + 1; i++)
            {
                numArray[i] = new double[index + 1];
            }

            for (int j = 0; j <= length; j++)
            {
                numArray[j][0] = j;
            }

            for (int k = 0; k <= index; k++)
            {
                numArray[0][k] = k;
            }

            for (int m = 1; m <= length; m++)
            {
                for (int n = 1; n <= index; n++)
                {
                    double num8 = _dCostFunction.GetCost(firstWord, m - 1, secondWord, n - 1);
                    numArray[m][n] = MathFunctions.MinOf3(numArray[m - 1][n] + 1.0, numArray[m][n - 1] + 1.0, numArray[m - 1][n - 1] + num8);
                }
            }

            return(numArray[length][index]);
        }
        /// <summary>
        /// gets the un-normalised similarity measure of the metric for the given strings.</summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns> returns the score of the similarity measure (un-normalised)</returns>
        public override double GetUnnormalisedSimilarity(string firstWord, string secondWord)
        {
            if ((firstWord != null) && (secondWord != null))
            {
                int n = firstWord.Length;
                int m = secondWord.Length;
                // check for zero length input
                if (n == 0)
                {
                    return(m);
                }
                if (m == 0)
                {
                    return(n);
                }
                double[][] d = new double[n][];
                for (int i = 0; i < n; i++)
                {
                    d[i] = new double[m];
                }
                //process first row and column first as no need to consider previous rows/columns
                double maxSoFar = 0.0;
                for (int i = 0; i < n; i++)
                {
                    // get the substution cost
                    double cost = dCostFunction.GetCost(firstWord, i, secondWord, 0);
                    if (i == 0)
                    {
                        d[0][0] = Math.Max(defaultMismatchScore, cost);
                    }
                    else
                    {
                        double maxGapCost  = defaultMismatchScore;
                        int    windowStart = i - windowSize;
                        if (windowStart < 1)
                        {
                            windowStart = 1;
                        }
                        for (int k = windowStart; k < i; k++)
                        {
                            maxGapCost = Math.Max(maxGapCost, d[i - k][0] - gGapFunction.GetCost(firstWord, i - k, i));
                        }

                        d[i][0] = MathFunctions.MaxOf3(defaultMismatchScore, maxGapCost, cost);
                    }
                    //update max possible if available
                    if (d[i][0] > maxSoFar)
                    {
                        maxSoFar = d[i][0];
                    }
                }

                for (int j = 0; j < m; j++)
                {
                    // get the substution cost
                    double cost = dCostFunction.GetCost(firstWord, 0, secondWord, j);
                    if (j == 0)
                    {
                        d[0][0] = Math.Max(defaultMismatchScore, cost);
                    }
                    else
                    {
                        double maxGapCost  = defaultMismatchScore;
                        int    windowStart = j - windowSize;
                        if (windowStart < 1)
                        {
                            windowStart = 1;
                        }
                        for (int k = windowStart; k < j; k++)
                        {
                            maxGapCost = Math.Max(maxGapCost, d[0][j - k] - gGapFunction.GetCost(secondWord, j - k, j));
                        }

                        d[0][j] = MathFunctions.MaxOf3(defaultMismatchScore, maxGapCost, cost);
                    }
                    //update max possible if available
                    if (d[0][j] > maxSoFar)
                    {
                        maxSoFar = d[0][j];
                    }
                }

                // cycle through rest of table filling values from the lowest cost value of the three part cost function
                for (int i = 1; i < n; i++)
                {
                    for (int j = 1; j < m; j++)
                    {
                        // get the substution cost
                        double cost = dCostFunction.GetCost(firstWord, i, secondWord, j);
                        // find lowest cost at point from three possible
                        double maxGapCost1 = defaultMismatchScore;
                        double maxGapCost2 = defaultMismatchScore;
                        int    windowStart = i - windowSize;
                        if (windowStart < 1)
                        {
                            windowStart = 1;
                        }
                        for (int k = windowStart; k < i; k++)
                        {
                            maxGapCost1 = Math.Max(maxGapCost1, d[i - k][j] - gGapFunction.GetCost(firstWord, i - k, i));
                        }

                        windowStart = j - windowSize;
                        if (windowStart < 1)
                        {
                            windowStart = 1;
                        }
                        for (int k = windowStart; k < j; k++)
                        {
                            maxGapCost2 = Math.Max(maxGapCost2, d[i][j - k] - gGapFunction.GetCost(secondWord, j - k, j));
                        }

                        d[i][j] = MathFunctions.MaxOf4(defaultMismatchScore, maxGapCost1, maxGapCost2, d[i - 1][j - 1] + cost);
                        if (d[i][j] > maxSoFar)
                        {
                            maxSoFar = d[i][j];
                        }
                    }
                }

                // return max value within matrix as holds the maximum edit score
                return(maxSoFar);
            }
            return(0.0);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// gets the un-normalised similarity measure of the metric for the given strings.</summary>
        /// <param name="firstWord"></param>
        /// <param name="secondWord"></param>
        /// <returns> returns the score of the similarity measure (un-normalised)</returns>
        public override double GetUnnormalisedSimilarity(string firstWord, string secondWord)
        {
            if ((firstWord != null) && (secondWord != null))
            {
                int n = firstWord.Length;
                int m = secondWord.Length;
                if (n == 0)
                {
                    return(m);
                }
                if (m == 0)
                {
                    return(n);
                }
                double[][] d = new double[n][];
                for (int i = 0; i < n; i++)
                {
                    d[i] = new double[m];
                }
                double maxSoFar = defaultMismatchScore;
                for (int i = 0; i < n; i++)
                {
                    double cost = dCostFunction.GetCost(firstWord, i, secondWord, 0);
                    if (i == 0)
                    {
                        d[0][0] = MathFunctions.MaxOf3(defaultMismatchScore, -gapCost, cost);
                    }
                    else
                    {
                        d[i][0] = MathFunctions.MaxOf3(defaultMismatchScore, d[i - 1][0] - gapCost, cost);
                    }
                    if (d[i][0] > maxSoFar)
                    {
                        maxSoFar = d[i][0];
                    }
                }

                for (int j = 0; j < m; j++)
                {
                    double cost = dCostFunction.GetCost(firstWord, 0, secondWord, j);
                    if (j == 0)
                    {
                        d[0][0] = MathFunctions.MaxOf3(defaultMismatchScore, -gapCost, cost);
                    }
                    else
                    {
                        d[0][j] = MathFunctions.MaxOf3(defaultMismatchScore, d[0][j - 1] - gapCost, cost);
                    }
                    if (d[0][j] > maxSoFar)
                    {
                        maxSoFar = d[0][j];
                    }
                }

                for (int i = 1; i < n; i++)
                {
                    for (int j = 1; j < m; j++)
                    {
                        double cost = dCostFunction.GetCost(firstWord, i, secondWord, j);
                        d[i][j] =
                            MathFunctions.MaxOf4(defaultMismatchScore, d[i - 1][j] - gapCost, d[i][j - 1] - gapCost,
                                                 d[i - 1][j - 1] + cost);
                        if (d[i][j] > maxSoFar)
                        {
                            maxSoFar = d[i][j];
                        }
                    }
                }

                return(maxSoFar);
            }
            return(0.0);
        }