Beispiel #1
0
        internal static int FindBestMatchIndex(this string text, string pattern, int loc, MatchSettings settings)
        {
            // Check for null inputs not needed since null can't be passed in C#.

            loc = Math.Max(0, Math.Min(loc, text.Length));
            if (text == pattern)
            {
                // Shortcut (potentially not guaranteed by the algorithm)
                return(0);
            }
            if (text.Length == 0)
            {
                // Nothing to match.
                return(-1);
            }
            if (loc + pattern.Length <= text.Length &&
                text.Substring(loc, pattern.Length) == pattern)
            {
                // Perfect match at the perfect spot!  (Includes case of null pattern)
                return(loc);
            }

            // Do a fuzzy compare.
            var bitap = new BitapAlgorithm(settings);

            return(bitap.Match(text, pattern, loc));
        }
Beispiel #2
0
 public BitapAlgorithm(MatchSettings settings)
 {
     _matchThreshold = settings.MatchThreshold;
     _matchDistance  = settings.MatchDistance;
 }
        /// <summary>
        /// Merge a set of patches onto the text.  Return a patched text, as well
        /// as an array of true/false values indicating which patches were applied.</summary>
        /// <param name="patches"></param>
        /// <param name="text">Old text</param>
        /// <param name="matchSettings"></param>
        /// <param name="settings"></param>
        /// <returns>Two element Object array, containing the new text and an array of
        ///  bool values.</returns>
        public static (string newText, bool[] results) Apply(this List <Patch> patches, string text,
                                                             MatchSettings matchSettings, PatchSettings settings = null
                                                             )
        {
            settings = settings ?? PatchSettings.Default;
            if (patches.Count == 0)
            {
                return(text, new bool[0]);
            }

            // Deep copy the patches so that no changes are made to originals.
            patches = patches.DeepCopy();

            var nullPadding = patches.AddPadding(settings.PatchMargin);

            text = nullPadding + text + nullPadding;
            patches.SplitMax();

            var x = 0;
            // delta keeps track of the offset between the expected and actual
            // location of the previous patch.  If there are patches expected at
            // positions 10 and 20, but the first patch was found at 12, delta is 2
            // and the second patch has an effective expected position of 22.
            var delta   = 0;
            var results = new bool[patches.Count];

            foreach (var aPatch in patches)
            {
                var expectedLoc = aPatch.Start2 + delta;
                var text1       = aPatch.Diffs.Text1();
                int startLoc;
                var endLoc = -1;
                if (text1.Length > Constants.MatchMaxBits)
                {
                    // patch_splitMax will only provide an oversized pattern
                    // in the case of a monster delete.
                    startLoc = text.FindBestMatchIndex(text1.Substring(0, Constants.MatchMaxBits), expectedLoc, matchSettings);
                    // Check for null inputs not needed since null can't be passed in C#.
                    if (startLoc != -1)
                    {
                        endLoc = text.FindBestMatchIndex(
                            text1.Substring(text1.Length - Constants.MatchMaxBits), expectedLoc + text1.Length - Constants.MatchMaxBits, matchSettings
                            );
                        // Check for null inputs not needed since null can't be passed in C#.
                        if (endLoc == -1 || startLoc >= endLoc)
                        {
                            // Can't find valid trailing context.  Drop this patch.
                            startLoc = -1;
                        }
                    }
                }
                else
                {
                    startLoc = text.FindBestMatchIndex(text1, expectedLoc, matchSettings);
                    // Check for null inputs not needed since null can't be passed in C#.
                }
                if (startLoc == -1)
                {
                    // No match found.  :(
                    results[x] = false;
                    // Subtract the delta for this failed patch from subsequent patches.
                    delta -= aPatch.Length2 - aPatch.Length1;
                }
                else
                {
                    // Found a match.  :)
                    results[x] = true;
                    delta      = startLoc - expectedLoc;
                    int actualEndLoc;
                    if (endLoc == -1)
                    {
                        actualEndLoc = Math.Min(startLoc + text1.Length, text.Length);
                    }
                    else
                    {
                        actualEndLoc = Math.Min(endLoc + Constants.MatchMaxBits, text.Length);
                    }
                    var text2 = text.Substring(startLoc, actualEndLoc - startLoc);
                    if (text1 == text2)
                    {
                        // Perfect match, just shove the Replacement text in.
                        text = text.Substring(0, startLoc) + aPatch.Diffs.Text2()
                               + text.Substring(startLoc + text1.Length);
                    }
                    else
                    {
                        // Imperfect match.  Run a diff to get a framework of equivalent
                        // indices.
                        var diffs = Diff.Compute(text1, text2, 0f, false);
                        if (text1.Length > Constants.MatchMaxBits &&
                            diffs.Levenshtein() / (float)text1.Length
                            > settings.PatchDeleteThreshold)
                        {
                            // The end points match, but the content is unacceptably bad.
                            results[x] = false;
                        }
                        else
                        {
                            diffs.CleanupSemanticLossless();
                            var index1 = 0;
                            foreach (var aDiff in aPatch.Diffs)
                            {
                                if (aDiff.Operation != Operation.Equal)
                                {
                                    var index2 = diffs.FindEquivalentLocation2(index1);
                                    if (aDiff.Operation == Operation.Insert)
                                    {
                                        // Insertion
                                        text = text.Insert(startLoc + index2, aDiff.Text);
                                    }
                                    else if (aDiff.Operation == Operation.Delete)
                                    {
                                        // Deletion
                                        text = text.Remove(startLoc + index2, diffs.FindEquivalentLocation2(index1 + aDiff.Text.Length) - index2);
                                    }
                                }
                                if (aDiff.Operation != Operation.Delete)
                                {
                                    index1 += aDiff.Text.Length;
                                }
                            }
                        }
                    }
                }
                x++;
            }
            // Strip the padding off.
            text = text.Substring(nullPadding.Length, text.Length
                                  - 2 * nullPadding.Length);
            return(text, results);
        }