internal static int FindBestMatchIndex(this string text, string pattern, int loc, MatchSettings settings) { // Check for null inputs not needed since null can't be passed in C#. loc = Math.Max(0, Math.Min(loc, text.Length)); if (text == pattern) { // Shortcut (potentially not guaranteed by the algorithm) return(0); } if (text.Length == 0) { // Nothing to match. return(-1); } if (loc + pattern.Length <= text.Length && text.Substring(loc, pattern.Length) == pattern) { // Perfect match at the perfect spot! (Includes case of null pattern) return(loc); } // Do a fuzzy compare. var bitap = new BitapAlgorithm(settings); return(bitap.Match(text, pattern, loc)); }
public BitapAlgorithm(MatchSettings settings) { _matchThreshold = settings.MatchThreshold; _matchDistance = settings.MatchDistance; }
/// <summary> /// Merge a set of patches onto the text. Return a patched text, as well /// as an array of true/false values indicating which patches were applied.</summary> /// <param name="patches"></param> /// <param name="text">Old text</param> /// <param name="matchSettings"></param> /// <param name="settings"></param> /// <returns>Two element Object array, containing the new text and an array of /// bool values.</returns> public static (string newText, bool[] results) Apply(this List <Patch> patches, string text, MatchSettings matchSettings, PatchSettings settings = null ) { settings = settings ?? PatchSettings.Default; if (patches.Count == 0) { return(text, new bool[0]); } // Deep copy the patches so that no changes are made to originals. patches = patches.DeepCopy(); var nullPadding = patches.AddPadding(settings.PatchMargin); text = nullPadding + text + nullPadding; patches.SplitMax(); var x = 0; // delta keeps track of the offset between the expected and actual // location of the previous patch. If there are patches expected at // positions 10 and 20, but the first patch was found at 12, delta is 2 // and the second patch has an effective expected position of 22. var delta = 0; var results = new bool[patches.Count]; foreach (var aPatch in patches) { var expectedLoc = aPatch.Start2 + delta; var text1 = aPatch.Diffs.Text1(); int startLoc; var endLoc = -1; if (text1.Length > Constants.MatchMaxBits) { // patch_splitMax will only provide an oversized pattern // in the case of a monster delete. startLoc = text.FindBestMatchIndex(text1.Substring(0, Constants.MatchMaxBits), expectedLoc, matchSettings); // Check for null inputs not needed since null can't be passed in C#. if (startLoc != -1) { endLoc = text.FindBestMatchIndex( text1.Substring(text1.Length - Constants.MatchMaxBits), expectedLoc + text1.Length - Constants.MatchMaxBits, matchSettings ); // Check for null inputs not needed since null can't be passed in C#. if (endLoc == -1 || startLoc >= endLoc) { // Can't find valid trailing context. Drop this patch. startLoc = -1; } } } else { startLoc = text.FindBestMatchIndex(text1, expectedLoc, matchSettings); // Check for null inputs not needed since null can't be passed in C#. } if (startLoc == -1) { // No match found. :( results[x] = false; // Subtract the delta for this failed patch from subsequent patches. delta -= aPatch.Length2 - aPatch.Length1; } else { // Found a match. :) results[x] = true; delta = startLoc - expectedLoc; int actualEndLoc; if (endLoc == -1) { actualEndLoc = Math.Min(startLoc + text1.Length, text.Length); } else { actualEndLoc = Math.Min(endLoc + Constants.MatchMaxBits, text.Length); } var text2 = text.Substring(startLoc, actualEndLoc - startLoc); if (text1 == text2) { // Perfect match, just shove the Replacement text in. text = text.Substring(0, startLoc) + aPatch.Diffs.Text2() + text.Substring(startLoc + text1.Length); } else { // Imperfect match. Run a diff to get a framework of equivalent // indices. var diffs = Diff.Compute(text1, text2, 0f, false); if (text1.Length > Constants.MatchMaxBits && diffs.Levenshtein() / (float)text1.Length > settings.PatchDeleteThreshold) { // The end points match, but the content is unacceptably bad. results[x] = false; } else { diffs.CleanupSemanticLossless(); var index1 = 0; foreach (var aDiff in aPatch.Diffs) { if (aDiff.Operation != Operation.Equal) { var index2 = diffs.FindEquivalentLocation2(index1); if (aDiff.Operation == Operation.Insert) { // Insertion text = text.Insert(startLoc + index2, aDiff.Text); } else if (aDiff.Operation == Operation.Delete) { // Deletion text = text.Remove(startLoc + index2, diffs.FindEquivalentLocation2(index1 + aDiff.Text.Length) - index2); } } if (aDiff.Operation != Operation.Delete) { index1 += aDiff.Text.Length; } } } } } x++; } // Strip the padding off. text = text.Substring(nullPadding.Length, text.Length - 2 * nullPadding.Length); return(text, results); }