Example #1
0
        /// <summary>
        /// Longest Common Subsequence. A good value is greater than 0.33.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="comparedTo"></param>
        /// <param name="caseSensitive"></param>
        /// <returns>Returns a Tuple of the sub sequence string and the match coefficient.</returns>
        public static Tuple <string, double> LongestCommonSubsequence(this string input, string comparedTo, bool caseSensitive = false)
        {
            if (string.IsNullOrWhiteSpace(input) || string.IsNullOrWhiteSpace(comparedTo))
            {
                return(new Tuple <string, double>(string.Empty, 0.0d));
            }
            if (!caseSensitive)
            {
                input      = input.ToLower();
                comparedTo = comparedTo.ToLower();
            }

            int inputLen      = input.Length;
            int comparedToLen = comparedTo.Length;

            int[,] lcs             = new int[inputLen + 1, comparedToLen + 1];
            LcsDirection[,] tracks = new LcsDirection[inputLen + 1, comparedToLen + 1];
            int[,] w = new int[inputLen + 1, comparedToLen + 1];
            int i, j;

            for (i = 0; i <= inputLen; ++i)
            {
                lcs[i, 0]    = 0;
                tracks[i, 0] = LcsDirection.North;
            }
            for (j = 0; j <= comparedToLen; ++j)
            {
                lcs[0, j]    = 0;
                tracks[0, j] = LcsDirection.West;
            }

            for (i = 1; i <= inputLen; ++i)
            {
                for (j = 1; j <= comparedToLen; ++j)
                {
                    if (input[i - 1].Equals(comparedTo[j - 1]))
                    {
                        int k = w[i - 1, j - 1];
                        //lcs[i,j] = lcs[i-1,j-1] + 1;
                        lcs[i, j]    = lcs[i - 1, j - 1] + Square(k + 1) - Square(k);
                        tracks[i, j] = LcsDirection.NorthWest;
                        w[i, j]      = k + 1;
                    }
                    else
                    {
                        lcs[i, j]    = lcs[i - 1, j - 1];
                        tracks[i, j] = LcsDirection.None;
                    }

                    if (lcs[i - 1, j] >= lcs[i, j])
                    {
                        lcs[i, j]    = lcs[i - 1, j];
                        tracks[i, j] = LcsDirection.North;
                        w[i, j]      = 0;
                    }

                    if (lcs[i, j - 1] >= lcs[i, j])
                    {
                        lcs[i, j]    = lcs[i, j - 1];
                        tracks[i, j] = LcsDirection.West;
                        w[i, j]      = 0;
                    }
                }
            }

            i = inputLen;
            j = comparedToLen;

            string subseq = "";
            double p      = lcs[i, j];

            //trace the backtracking matrix.
            while (i > 0 || j > 0)
            {
                if (tracks[i, j] == LcsDirection.NorthWest)
                {
                    i--;
                    j--;
                    subseq = input[i] + subseq;
                    //Trace.WriteLine(i + " " + input1[i] + " " + j);
                }

                else if (tracks[i, j] == LcsDirection.North)
                {
                    i--;
                }

                else if (tracks[i, j] == LcsDirection.West)
                {
                    j--;
                }
            }

            double coef = p / (inputLen * comparedToLen);

            Tuple <string, double> retval = new Tuple <string, double>(subseq, coef);

            return(retval);
        }
        /// <summary>
        /// Longest Common Subsequence. A good value is greater than 0.33.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="comparedTo"></param>
        /// <param name="caseSensitive"></param>
        /// <returns>Returns a Tuple of the sub sequence string and the match coeficient.</returns>
        public static Tuple<string, double> LongestCommonSubsequence(this string input, string comparedTo, bool caseSensitive = false)
        {
            if (string.IsNullOrWhiteSpace(input) || string.IsNullOrWhiteSpace(comparedTo)) return new Tuple<string, double>(string.Empty, 0.0d);
            if (!caseSensitive)
            {
                input = input.ToLower();
                comparedTo = comparedTo.ToLower();
            }

            int inputLen = input.Length;
            int comparedToLen = comparedTo.Length;

            int[,] lcs = new int[inputLen + 1, comparedToLen + 1];
            LcsDirection[,] tracks = new LcsDirection[inputLen + 1, comparedToLen + 1];
            int[,] w = new int[inputLen + 1, comparedToLen + 1];
            int i, j;

            for (i = 0; i <= inputLen; ++i)
            {
                lcs[i, 0] = 0;
                tracks[i, 0] = LcsDirection.North;

            }
            for (j = 0; j <= comparedToLen; ++j)
            {
                lcs[0, j] = 0;
                tracks[0, j] = LcsDirection.West;
            }

            for (i = 1; i <= inputLen; ++i)
            {
                for (j = 1; j <= comparedToLen; ++j)
                {
                    if (input[i - 1].Equals(comparedTo[j - 1]))
                    {
                        int k = w[i - 1, j - 1];
                        //lcs[i,j] = lcs[i-1,j-1] + 1;
                        lcs[i, j] = lcs[i - 1, j - 1] + Square(k + 1) - Square(k);
                        tracks[i, j] = LcsDirection.NorthWest;
                        w[i, j] = k + 1;
                    }
                    else
                    {
                        lcs[i, j] = lcs[i - 1, j - 1];
                        tracks[i, j] = LcsDirection.None;
                    }

                    if (lcs[i - 1, j] >= lcs[i, j])
                    {
                        lcs[i, j] = lcs[i - 1, j];
                        tracks[i, j] = LcsDirection.North;
                        w[i, j] = 0;
                    }

                    if (lcs[i, j - 1] >= lcs[i, j])
                    {
                        lcs[i, j] = lcs[i, j - 1];
                        tracks[i, j] = LcsDirection.West;
                        w[i, j] = 0;
                    }
                }
            }

            i = inputLen;
            j = comparedToLen;

            string subseq = "";
            double p = lcs[i, j];

            //trace the backtracking matrix.
            while (i > 0 || j > 0)
            {
                if (tracks[i, j] == LcsDirection.NorthWest)
                {
                    i--;
                    j--;
                    subseq = input[i] + subseq;
                    //Trace.WriteLine(i + " " + input1[i] + " " + j);
                }

                else if (tracks[i, j] == LcsDirection.North)
                {
                    i--;
                }

                else if (tracks[i, j] == LcsDirection.West)
                {
                    j--;
                }
            }

            double coef = p / (inputLen * comparedToLen);

            Tuple<string, double> retval = new Tuple<string, double>(subseq, coef);
            return retval;
        }
        /// <summary>
        /// Longest Common Subsequence.
        /// </summary>
        /// <param name="input"></param>
        /// <param name="comparedTo"></param>
        /// <param name="caseSensitive"></param>
        /// <returns>Returns a Tuple of the sub sequence string and the match coefficient.</returns>
        public static Tuple<string, double> LongestCommonSubsequence(
            this string input, 
            string comparedTo, 
            bool caseSensitive = false)
        {
            if (string.IsNullOrWhiteSpace(input) || string.IsNullOrWhiteSpace(comparedTo))
            {
                return new Tuple<string, double>(string.Empty, 0.0d);
            }

            if (!caseSensitive)
            {
                input = input.ToLower();
                comparedTo = comparedTo.ToLower();
            }

            var inputLen = input.Length;
            var comparedToLen = comparedTo.Length;

            var lcs = new int[inputLen + 1, comparedToLen + 1];
            var tracks = new LcsDirection[inputLen + 1, comparedToLen + 1];
            var w = new int[inputLen + 1, comparedToLen + 1];
            int i, j;

            for (i = 0; i <= inputLen; ++i)
            {
                lcs[i, 0] = 0;
                tracks[i, 0] = LcsDirection.North;
            }

            for (j = 0; j <= comparedToLen; ++j)
            {
                lcs[0, j] = 0;
                tracks[0, j] = LcsDirection.West;
            }

            for (i = 1; i <= inputLen; ++i)
            {
                for (j = 1; j <= comparedToLen; ++j)
                {
                    if (input[i - 1].Equals(comparedTo[j - 1]))
                    {
                        var k = w[i - 1, j - 1];
                        lcs[i, j] = lcs[i - 1, j - 1] + Square(k + 1) - Square(k);
                        tracks[i, j] = LcsDirection.NorthWest;
                        w[i, j] = k + 1;
                    }
                    else
                    {
                        lcs[i, j] = lcs[i - 1, j - 1];
                        tracks[i, j] = LcsDirection.None;
                    }

                    if (lcs[i - 1, j] >= lcs[i, j])
                    {
                        lcs[i, j] = lcs[i - 1, j];
                        tracks[i, j] = LcsDirection.North;
                        w[i, j] = 0;
                    }

                    if (lcs[i, j - 1] >= lcs[i, j])
                    {
                        lcs[i, j] = lcs[i, j - 1];
                        tracks[i, j] = LcsDirection.West;
                        w[i, j] = 0;
                    }
                }
            }

            i = inputLen;
            j = comparedToLen;

            var subseq = string.Empty;
            double p = lcs[i, j];

            while (i > 0 || j > 0)
            {
                switch (tracks[i, j])
                {
                    case LcsDirection.NorthWest:
                        i--;
                        j--;
                        subseq = input[i] + subseq;
                        break;
                    case LcsDirection.North:
                        i--;
                        break;
                    case LcsDirection.West:
                        j--;
                        break;
                    case LcsDirection.None:
                        break;
                    default:
                        throw new ArgumentOutOfRangeException();
                }
            }

            var coef = p / (inputLen * comparedToLen);

            return new Tuple<string, double>(subseq, coef);
        }