Ejemplo n.º 1
0
        /// <summary>
        /// Calculate how well the given words match the string.
        /// Uses case insensitive matching.
        /// Specify custom costs for the operations.
        /// </summary>
        public static StrMatch WordMatch(string text, string[] words, int cReplace, int cInsert, int cDelete)
        {
            if (text == null || words == null || words.Length == 0)
            {
                throw new ArgumentException("Arguments cannot be null or empty");
            }
            string pattern = string.Join("", words);

            // Find word borders
            var slack = new HashSet <int>();
            var index = 0;

            foreach (var word in words)
            {
                index += word.Length;
                slack.Add(index);
            }

            // Start node tracker
            var startNode = new int[pattern.Length + 1, text.Length + 1];

            for (int y = 0; y < startNode.GetLength(1); y++)
            {
                startNode[0, y] = y;
            }

            // Calculate custom edit distance
            var matrix = new int[pattern.Length + 1, text.Length + 1];

            for (int x = 0; x < matrix.GetLength(0); x++)
            {
                matrix[x, 0] = x * cDelete;
            }
            for (int y = 1; y < matrix.GetLength(1); y++)
            {
                matrix[0, y] = 0;
            }
            for (int y = 1; y <= text.Length; y++)
            {
                for (int x = 1; x <= pattern.Length; x++)
                {
                    bool isMatch = char.ToLower(pattern[x - 1]) == char.ToLower(text[y - 1]);
                    int  replace = matrix[x - 1, y - 1] + (isMatch ? 0 : cReplace);
                    int  insert  = matrix[x, y - 1] + (slack.Contains(x) ? 0 : cInsert);
                    int  delete  = matrix[x - 1, y] + cDelete;

                    if (replace <= insert && replace <= delete)
                    {
                        matrix[x, y]    = replace;
                        startNode[x, y] = startNode[x - 1, y - 1];
                    }
                    else if (insert <= delete)
                    {
                        matrix[x, y]    = insert;
                        startNode[x, y] = startNode[x, y - 1];
                    }
                    else
                    {
                        matrix[x, y]    = delete;
                        startNode[x, y] = startNode[x - 1, y];
                    }
                }
            }

            // Find best match distance and end position
            int best = matrix[pattern.Length, 0];
            int end  = 0;

            for (int i = 1; i <= text.Length; i++)
            {
                int value = matrix[pattern.Length, i];

                if (value < best)
                {
                    best = value;
                    end  = i;
                }
            }

            // Match failed
            if (best == pattern.Length)
            {
                return(StrMatch.Fail());
            }

            // Final values
            int start         = startNode[pattern.Length, end];
            int matchLength   = end - start;
            int middleInserts = matchLength - pattern.Length;
            int startInserts  = end - pattern.Length - middleInserts;
            int distance      = best * 100 + startInserts + middleInserts;

            return(new StrMatch(text.Substring(start, matchLength), distance, start, matchLength, text.Length, 1 - best / (double)pattern.Length));
        }
Ejemplo n.º 2
0
        /// <summary>Find the best matching substring from a string. Substring version of Edit Distance.
        /// Specify custom costs for the operations.</summary>
        public static StrMatch Find(string text, string pattern, int cReplace, int cInsert, int cDelete)
        {
            if (text == null || pattern == null)
            {
                throw new ArgumentException("Strings can't be null");
            }

            int[,] matrix = new int[pattern.Length + 1, text.Length + 1];

            for (int i = 0; i <= pattern.Length; i++)
            {
                matrix[i, 0] = i * cDelete;
            }
            for (int i = 1; i <= text.Length; i++)
            {
                matrix[0, i] = 0;
            }
            for (int y = 1; y <= text.Length; y++)
            {
                for (int x = 1; x <= pattern.Length; x++)
                {
                    bool isMatch = pattern[x - 1] == text[y - 1];
                    var  replace = matrix[x - 1, y - 1] + (isMatch ? 0 : cReplace);
                    var  insert  = matrix[x, y - 1] + cInsert;
                    var  delete  = matrix[x - 1, y] + cDelete;
                    matrix[x, y] = Math.Min(Math.Min(insert, delete), replace);
                }
            }

            var best  = matrix[pattern.Length, 0];
            var start = 0;
            var end   = 0;

            for (int i = 1; i <= text.Length; i++)
            {
                var value = matrix[pattern.Length, i];
                if (value < best)
                {
                    best = value;
                    end  = i;
                }
            }

            if (best == pattern.Length)
            {
                return(StrMatch.Fail());
            }

            int X = pattern.Length;
            int Y = end;

            while (X != 0)
            {
                var left   = matrix[X - 1, Y];
                var corner = matrix[X - 1, Y - 1];
                var up     = matrix[X, Y - 1];

                if (left <= corner && left <= up)
                {
                    X--;
                }
                else if (corner <= up)
                {
                    X--;
                    Y--;
                }
                else
                {
                    Y--;
                }

                start = Y;
            }

            start -= 1;
            end   -= 1;
            var length = end - start + 1;

            return(new StrMatch(text.Substring(start, length), best, start, length, text.Length, 1 - best / (double)pattern.Length));
        }