public EditOperation(char valueFrom, char valueTo, EditOperationKind operation) { ValueFrom = valueFrom; ValueTo = valueTo; Operation = valueFrom == valueTo ? EditOperationKind.None : operation; }
private static EditOperationKind GetNextOperation(Operations nextOperation, int insertCost, int removeCost, int editCost, int copyCost, ref Operations previousOperation) { if ((previousOperation != Operations.None) && ((nextOperation & previousOperation) == previousOperation)) { switch (previousOperation) { case Operations.Insert: return(EditOperationKind.Insert); case Operations.Remove: return(EditOperationKind.Remove); case Operations.Edit: return(EditOperationKind.Edit); case Operations.Copy: return(EditOperationKind.Copy); default: throw new ArgumentOutOfRangeException(nameof(previousOperation)); } } int min = int.MaxValue; EditOperationKind operation = EditOperationKind.Edit; if ((nextOperation & Operations.Copy) != Operations.None) { min = copyCost; operation = EditOperationKind.Copy; previousOperation = Operations.Copy; } else if ((nextOperation & Operations.Edit) != Operations.None) { min = editCost; previousOperation = Operations.Edit; } if ((min >= removeCost) && ((nextOperation & Operations.Remove) != Operations.None)) { min = removeCost; operation = EditOperationKind.Remove; previousOperation = Operations.Remove; } if ((min >= insertCost) && ((nextOperation & Operations.Insert) != Operations.None)) { operation = EditOperationKind.Insert; previousOperation = Operations.Insert; } return(operation); }
private static string GenerateRtf(IList <EditOperation> sequence) { EditOperationKind previousState = EditOperationKind.Copy; VerySimpleRtfBuilder rtf = new VerySimpleRtfBuilder(sequence.Count + 2048); VerySimpleRtfBuilder style = null; foreach (EditOperation operation in sequence) { if (previousState != operation.Operation) { previousState = operation.Operation; style?.CloseStyle(); switch (previousState) { case EditOperationKind.Copy: style = null; break; case EditOperationKind.Edit: style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.BoldItalic | VerySimpleRtfBuilder.Styles.Underline, Color.Empty, Color.Gray); break; case EditOperationKind.Insert: style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.Bold | VerySimpleRtfBuilder.Styles.Underline, Color.Empty, Color.LightGreen); break; case EditOperationKind.Remove: style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.Strikeout, Color.Empty, Color.LightPink); break; default: Debug.Fail("Incorrect operation type!"); style = rtf.SetColours(Color.DarkRed, Color.LightPink); break; } } if ((previousState == EditOperationKind.Remove) && (operation.Value == '\n')) { rtf.Append('¶'); } else { rtf.Append(operation.Value); } } return(rtf.ToString()); }
public EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 1) { if (null == source) { throw new ArgumentNullException("source"); } else if (null == target) { throw new ArgumentNullException("target"); } // Forward: building score matrix // Best operation (among insert, update, delete) to perform EditOperationKind[][] M = Enumerable .Range(0, source.Length + 1) .Select(line => new EditOperationKind[target.Length + 1]) .ToArray(); // Minimum cost so far int[][] D = Enumerable .Range(0, source.Length + 1) .Select(line => new int[target.Length + 1]) .ToArray(); // Edge: all removes for (int i = 1; i <= source.Length; ++i) { M[i][0] = EditOperationKind.Remove; D[i][0] = removeCost * i; } // Edge: all inserts for (int i = 1; i <= target.Length; ++i) { M[0][i] = EditOperationKind.Add; D[0][i] = insertCost * i; } // Having fit N - 1, K - 1 characters let's fit N, K for (int i = 1; i <= source.Length; ++i) { for (int j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost int insert = D[i][j - 1] + insertCost; int delete = D[i - 1][j] + removeCost; int edit = D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost); int min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i][j] = EditOperationKind.Add; } else if (min == delete) { M[i][j] = EditOperationKind.Remove; } else if (min == edit) { M[i][j] = EditOperationKind.Edit; } D[i][j] = min; } } // Backward: knowing scores (D) and actions (M) let's building edit sequence var operations = new Span <EditOperation>(new EditOperation[source.Length + target.Length]); var operationIndex = operations.Length - 1; for (int x = target.Length, y = source.Length; (x > 0) || (y > 0); operationIndex--) { EditOperationKind op = M[y][x]; if (op == EditOperationKind.Add) { x -= 1; operations[operationIndex] = new EditOperation('\0', target[x], op); } else if (op == EditOperationKind.Remove) { y -= 1; operations[operationIndex] = new EditOperation(source[y], '\0', op); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; operations[operationIndex] = new EditOperation(source[y], target[x], op); } else // Start of the matching (EditOperationKind.None) { break; } } return(operations.Slice(operationIndex + 1).ToArray()); }
public EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 1) { if (null == source) { throw new ArgumentNullException("source"); } else if (null == target) { throw new ArgumentNullException("target"); } // Forward: building score matrix var opModValue = 128; EditOperationKind[][] M = Enumerable .Range(0, opModValue + 1) .Select(line => new EditOperationKind[target.Length + 1]) .ToArray(); List <EditOperation> result = new List <EditOperation>(source.Length + target.Length); // Minimum cost so far int[][] D = Enumerable .Range(0, 2) .Select(line => new int[target.Length + 1]) .ToArray(); // Having fit N - 1, K - 1 characters let's fit N, K for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { // Edge: all removes D[1][0] = removeCost; for (int i = 1; i <= opModValue; ++i) { M[i][0] = EditOperationKind.Remove; } // Edge: all inserts for (int i = 1; i <= target.Length; ++i) { M[0][i] = EditOperationKind.Add; D[0][i] = insertCost * i; } for (int i = 1; i <= y; ++i) { for (int j = 1; j <= x; ++j) { // here we choose the operation with the least cost int insert = D[i % 2][j - 1] + insertCost; int delete = D[(i - 1) % 2][j] + removeCost; int edit = D[(i - 1) % 2][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost); int min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i % opModValue][j] = EditOperationKind.Add; } else if (min == delete) { M[i % opModValue][j] = EditOperationKind.Remove; } else if (min == edit) { M[i % opModValue][j] = EditOperationKind.Edit; } D[i % 2][j] = min; } } var outerBreak = false; for (int opCount = 0; opCount < opModValue && (x > 0 || y > 0); opCount++) { EditOperationKind op = M[y % opModValue][x]; if (op == EditOperationKind.Add) { x -= 1; result.Add(new EditOperation('\0', target[x], op)); } else if (op == EditOperationKind.Remove) { y -= 1; result.Add(new EditOperation(source[y], '\0', op)); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; result.Add(new EditOperation(source[y], target[x], op)); } else // Start of the matching (EditOperationKind.None) { outerBreak = true; break; } } if (outerBreak) { break; } } result.Reverse(); return(result.ToArray()); }
public EditOperation(char value, EditOperationKind operation) { Value = value; Operation = operation; }
public EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 1) { var localInsertCost = (short)insertCost; var localRemoveCost = (short)removeCost; var localEditCost = (short)editCost; if (null == source) { throw new ArgumentNullException("source"); } else if (null == target) { throw new ArgumentNullException("target"); } // Forward: building score matrix // Best operation (among insert, update, delete) to perform EditOperationKind[][] M = Enumerable .Range(0, source.Length + 1) .Select(line => new EditOperationKind[target.Length + 1]) .ToArray(); // Minimum cost so far short[][] D = Enumerable .Range(0, source.Length + 1) .Select(line => new short[target.Length + 1]) .ToArray(); // Edge: all removes for (short i = 1; i <= source.Length; ++i) { M[i][0] = EditOperationKind.Remove; D[i][0] = (short)(localRemoveCost * i); } // Edge: all inserts for (short i = 1; i <= target.Length; ++i) { M[0][i] = EditOperationKind.Add; D[0][i] = (short)(localInsertCost * i); } // Having fit N - 1, K - 1 characters let's fit N, K for (short i = 1; i <= source.Length; ++i) { for (short j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost var insert = (short)(D[i][j - 1] + localInsertCost); var delete = (short)(D[i - 1][j] + localRemoveCost); var edit = (short)(D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : localEditCost)); var min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i][j] = EditOperationKind.Add; } else if (min == delete) { M[i][j] = EditOperationKind.Remove; } else if (min == edit) { M[i][j] = EditOperationKind.Edit; } D[i][j] = min; } } // Backward: knowing scores (D) and actions (M) let's building edit sequence List <EditOperation> result = new List <EditOperation>(source.Length + target.Length); for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = M[y][x]; if (op == EditOperationKind.Add) { x -= 1; result.Add(new EditOperation('\0', target[x], op)); } else if (op == EditOperationKind.Remove) { y -= 1; result.Add(new EditOperation(source[y], '\0', op)); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; result.Add(new EditOperation(source[y], target[x], op)); } else // Start of the matching (EditOperationKind.None) { break; } } result.Reverse(); return(result.ToArray()); }
public EditOperation(SubsectionInfo source, SubsectionInfo target, EditOperationKind operationKind) { Source = source; Target = target; OperationKind = operationKind; }
private void CorePerform(T[] source, T[] target, Func <T, double> insertCost, Func <T, double> deleteCost, Func <T, T, double> editCost) { // Best operation (among insert, update, delete) to perform EditOperationKind[][] M = Enumerable .Range(0, source.Length + 1) .Select(line => new EditOperationKind[target.Length + 1]) .ToArray(); // Minimum cost so far double[][] D = Enumerable .Range(0, source.Length + 1) .Select(line => new double[target.Length + 1]) .ToArray(); // Edge: all removes double sum = 0.0; for (int i = 1; i <= source.Length; ++i) { M[i][0] = EditOperationKind.Delete; D[i][0] = (sum += deleteCost(source[i - 1])); } // Edge: all inserts sum = 0.0; for (int i = 1; i <= target.Length; ++i) { M[0][i] = EditOperationKind.Insert; D[0][i] = (sum += insertCost(target[i - 1])); } // Having fit N - 1, K - 1 characters let's fit N, K for (int i = 1; i <= source.Length; ++i) { for (int j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost double insert = D[i][j - 1] + insertCost(target[j - 1]); double delete = D[i - 1][j] + deleteCost(source[i - 1]); double edit = D[i - 1][j - 1] + editCost(source[i - 1], target[j - 1]); double min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i][j] = EditOperationKind.Insert; } else if (min == delete) { M[i][j] = EditOperationKind.Delete; } else if (min == edit) { M[i][j] = object.Equals(source[i - 1], target[j - 1]) ? EditOperationKind.None : EditOperationKind.Edit; } D[i][j] = min; } } EditDistance = D[source.Length][target.Length]; // Backward: knowing scores (D) and actions (M) let's building edit sequence m_Sequence = new List <EditOperation <T> >(source.Length + target.Length); for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = M[y][x]; if (op == EditOperationKind.Insert) { x -= 1; m_Sequence.Add(new EditOperation <T>(op, default, target[x], D[y][x + 1] - D[y][x]));
public EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 1) { if (null == source) { throw new ArgumentNullException("source"); } else if (null == target) { throw new ArgumentNullException("target"); } // Forward: building score matrix var columns = target.Length + 1; var rows = source.Length + 1; var mPool = new LargeArrayPoolMatrix <EditOperationKind>(columns, rows); var dPool = new LargeArrayPoolMatrix <int>(columns, rows); // Best operation (among insert, update, delete) to perform var M = mPool; // Minimum cost so far var D = dPool; // Edge: all removes for (int i = 1; i <= source.Length; ++i) { M[i, 0] = EditOperationKind.Remove; D[i, 0] = removeCost * i; } // Edge: all inserts for (int i = 1; i <= target.Length; ++i) { M[0, i] = EditOperationKind.Add; D[0, i] = insertCost * i; } // Having fit N - 1, K - 1 characters let's fit N, K for (int i = 1; i <= source.Length; ++i) { for (int j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost int insert = D[i, j - 1] + insertCost; int delete = D[i - 1, j] + removeCost; int edit = D[i - 1, j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost); int min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i, j] = EditOperationKind.Add; } else if (min == delete) { M[i, j] = EditOperationKind.Remove; } else if (min == edit) { M[i, j] = EditOperationKind.Edit; } D[i, j] = min; } } // Backward: knowing scores (D) and actions (M) let's building edit sequence List <EditOperation> result = new List <EditOperation>(source.Length + target.Length); for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = M[y, x]; if (op == EditOperationKind.Add) { x -= 1; result.Add(new EditOperation('\0', target[x], EditOperationKind.Add)); } else if (op == EditOperationKind.Remove) { y -= 1; result.Add(new EditOperation(source[y], '\0', EditOperationKind.Remove)); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; result.Add(new EditOperation(source[y], target[x], EditOperationKind.Edit)); } else // Start of the matching (EditOperationKind.None) { break; } } dPool.Dispose(); mPool.Dispose(); result.Reverse(); return(result.ToArray()); }
public unsafe EditOperation[] EditSequence( string source, string target, int insertCost = 1, int removeCost = 1, int editCost = 1) { if (null == source) { throw new ArgumentNullException("source"); } else if (null == target) { throw new ArgumentNullException("target"); } // Forward: building score matrix List <EditOperation> result = new List <EditOperation>(source.Length + target.Length); var columns = target.Length + 1; var rows = source.Length + 1; // Best operation (among insert, update, delete) to perform var underlyingType = Enum.GetUnderlyingType(typeof(EditOperationKind)); var operationHandle = Marshal.AllocHGlobal(Marshal.SizeOf(underlyingType) * columns * rows); var M = new Span <EditOperationKind>(operationHandle.ToPointer(), columns * rows); // Minimum cost so far var costHandle = Marshal.AllocHGlobal(Marshal.SizeOf <int>() * columns * rows); var D = new Span <int>(costHandle.ToPointer(), columns * rows); M[0] = EditOperationKind.None; D[0] = 0; // Edge: all removes for (int i = 1; i <= source.Length; ++i) { M[i * columns] = EditOperationKind.Remove; D[i * columns] = removeCost * i; } // Edge: all inserts for (int i = 1; i <= target.Length; ++i) { M[i] = EditOperationKind.Add; D[i] = insertCost * i; } // Having fit N - 1, K - 1 characters let's fit N, K for (int i = 1; i <= source.Length; ++i) { for (int j = 1; j <= target.Length; ++j) { // here we choose the operation with the least cost int insert = D[i * columns + j - 1] + insertCost; int delete = D[(i - 1) * columns + j] + removeCost; int edit = D[(i - 1) * columns + j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost); int min = Math.Min(Math.Min(insert, delete), edit); if (min == insert) { M[i * columns + j] = EditOperationKind.Add; } else if (min == delete) { M[i * columns + j] = EditOperationKind.Remove; } else if (min == edit) { M[i * columns + j] = EditOperationKind.Edit; } D[i * columns + j] = min; } } Marshal.FreeHGlobal(costHandle); // Backward: knowing scores (D) and actions (M) let's building edit sequence for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = M[y * columns + x]; if (op == EditOperationKind.Add) { x -= 1; result.Add(new EditOperation('\0', target[x], op)); } else if (op == EditOperationKind.Remove) { y -= 1; result.Add(new EditOperation(source[y], '\0', op)); } else if (op == EditOperationKind.Edit) { x -= 1; y -= 1; result.Add(new EditOperation(source[y], target[x], op)); } else // Start of the matching (EditOperationKind.None) { break; } } Marshal.FreeHGlobal(operationHandle); result.Reverse(); return(result.ToArray()); }
private static List <EditOperation> ProcessEditSequence(string source, string target, int insertCost, int removeCost, int editCost, int copyCost, int whiteSpacePreference) { // Forward: building score matrix // Best operation (among insert, edit/copy, remove) to perform Operations[,] nextOperation = new Operations[source.Length + 1, target.Length + 1]; // Minimum cost so far int[,] pathCost = new int[source.Length + 1, target.Length + 1]; // Edge: all removes for (int i = 1; i <= source.Length; i++) { nextOperation[i, 0] = Operations.Remove; pathCost[i, 0] = removeCost * i; } // Edge: all inserts for (int i = 1; i <= target.Length; i++) { nextOperation[0, i] = Operations.Insert; pathCost[0, i] = insertCost * i; } // fill the cost and operation table for (int i = 1; i <= source.Length; i++) { char sourceCharacter = source[i - 1]; for (int j = 1; j <= target.Length; j++) { // here we choose the operation with the least cost char targetCharacter = target[j - 1]; bool copy = (sourceCharacter == targetCharacter); int insert = pathCost[i, j - 1] + insertCost; int remove = pathCost[i - 1, j] + removeCost; int edit = pathCost[i - 1, j - 1] + ((copy) ? (copyCost) : (editCost)); if (char.IsWhiteSpace(sourceCharacter) && char.IsWhiteSpace(targetCharacter) && !copy) { insert -= whiteSpacePreference; remove -= whiteSpacePreference; edit -= whiteSpacePreference; } int min = Math.Min(Math.Min(insert, remove), edit); if (min == insert) { nextOperation[i, j] |= Operations.Insert; } if (min == remove) { nextOperation[i, j] |= Operations.Remove; } if (min == edit) { nextOperation[i, j] |= ((copy) ? (Operations.Copy) : (Operations.Edit)); } pathCost[i, j] = min; } } // Backward: knowing costs and operations let's building edit sequence (in reverse order, from end to start) List <EditOperation> result = new List <EditOperation>(source.Length + target.Length); Operations previousOperation = Operations.None; for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);) { EditOperationKind op = GetNextOperation(nextOperation[y, x], insertCost, removeCost, editCost, copyCost, ref previousOperation); switch (op) { case EditOperationKind.Insert: x--; result.Add(new EditOperation(target[x], op)); break; case EditOperationKind.Remove: y--; result.Add(new EditOperation(source[y], op)); break; default: // EditOperationKind.Edit, EditOperationKind.Copy x--; y--; result.Add(new EditOperation(target[x], op)); Debug.Assert((op == EditOperationKind.Edit) || (op == EditOperationKind.Copy)); break; } } result.Reverse(); return(result); }