Ejemplo n.º 1
0
            public EditOperation(char valueFrom, char valueTo, EditOperationKind operation)
            {
                ValueFrom = valueFrom;
                ValueTo   = valueTo;

                Operation = valueFrom == valueTo ? EditOperationKind.None : operation;
            }
Ejemplo n.º 2
0
        private static EditOperationKind GetNextOperation(Operations nextOperation, int insertCost, int removeCost,
                                                          int editCost, int copyCost, ref Operations previousOperation)
        {
            if ((previousOperation != Operations.None) && ((nextOperation & previousOperation) == previousOperation))
            {
                switch (previousOperation)
                {
                case Operations.Insert:
                    return(EditOperationKind.Insert);

                case Operations.Remove:
                    return(EditOperationKind.Remove);

                case Operations.Edit:
                    return(EditOperationKind.Edit);

                case Operations.Copy:
                    return(EditOperationKind.Copy);

                default:
                    throw new ArgumentOutOfRangeException(nameof(previousOperation));
                }
            }

            int min = int.MaxValue;
            EditOperationKind operation = EditOperationKind.Edit;

            if ((nextOperation & Operations.Copy) != Operations.None)
            {
                min               = copyCost;
                operation         = EditOperationKind.Copy;
                previousOperation = Operations.Copy;
            }
            else if ((nextOperation & Operations.Edit) != Operations.None)
            {
                min = editCost;
                previousOperation = Operations.Edit;
            }

            if ((min >= removeCost) && ((nextOperation & Operations.Remove) != Operations.None))
            {
                min               = removeCost;
                operation         = EditOperationKind.Remove;
                previousOperation = Operations.Remove;
            }

            if ((min >= insertCost) && ((nextOperation & Operations.Insert) != Operations.None))
            {
                operation         = EditOperationKind.Insert;
                previousOperation = Operations.Insert;
            }

            return(operation);
        }
Ejemplo n.º 3
0
        private static string GenerateRtf(IList <EditOperation> sequence)
        {
            EditOperationKind    previousState = EditOperationKind.Copy;
            VerySimpleRtfBuilder rtf           = new VerySimpleRtfBuilder(sequence.Count + 2048);
            VerySimpleRtfBuilder style         = null;

            foreach (EditOperation operation in sequence)
            {
                if (previousState != operation.Operation)
                {
                    previousState = operation.Operation;
                    style?.CloseStyle();
                    switch (previousState)
                    {
                    case EditOperationKind.Copy:
                        style = null;
                        break;

                    case EditOperationKind.Edit:
                        style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.BoldItalic | VerySimpleRtfBuilder.Styles.Underline, Color.Empty, Color.Gray);
                        break;

                    case EditOperationKind.Insert:
                        style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.Bold | VerySimpleRtfBuilder.Styles.Underline, Color.Empty, Color.LightGreen);
                        break;

                    case EditOperationKind.Remove:
                        style = rtf.SetStyle(VerySimpleRtfBuilder.Styles.Strikeout, Color.Empty, Color.LightPink);
                        break;

                    default:
                        Debug.Fail("Incorrect operation type!");
                        style = rtf.SetColours(Color.DarkRed, Color.LightPink);
                        break;
                    }
                }

                if ((previousState == EditOperationKind.Remove) && (operation.Value == '\n'))
                {
                    rtf.Append('¶');
                }
                else
                {
                    rtf.Append(operation.Value);
                }
            }

            return(rtf.ToString());
        }
Ejemplo n.º 4
0
        public EditOperation[] EditSequence(
            string source, string target,
            int insertCost = 1, int removeCost = 1, int editCost = 1)
        {
            if (null == source)
            {
                throw new ArgumentNullException("source");
            }
            else if (null == target)
            {
                throw new ArgumentNullException("target");
            }

            // Forward: building score matrix

            // Best operation (among insert, update, delete) to perform
            EditOperationKind[][] M = Enumerable
                                      .Range(0, source.Length + 1)
                                      .Select(line => new EditOperationKind[target.Length + 1])
                                      .ToArray();

            // Minimum cost so far
            int[][] D = Enumerable
                        .Range(0, source.Length + 1)
                        .Select(line => new int[target.Length + 1])
                        .ToArray();

            // Edge: all removes
            for (int i = 1; i <= source.Length; ++i)
            {
                M[i][0] = EditOperationKind.Remove;
                D[i][0] = removeCost * i;
            }

            // Edge: all inserts
            for (int i = 1; i <= target.Length; ++i)
            {
                M[0][i] = EditOperationKind.Add;
                D[0][i] = insertCost * i;
            }

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (int i = 1; i <= source.Length; ++i)
            {
                for (int j = 1; j <= target.Length; ++j)
                {
                    // here we choose the operation with the least cost
                    int insert = D[i][j - 1] + insertCost;
                    int delete = D[i - 1][j] + removeCost;
                    int edit   = D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);

                    int min = Math.Min(Math.Min(insert, delete), edit);

                    if (min == insert)
                    {
                        M[i][j] = EditOperationKind.Add;
                    }
                    else if (min == delete)
                    {
                        M[i][j] = EditOperationKind.Remove;
                    }
                    else if (min == edit)
                    {
                        M[i][j] = EditOperationKind.Edit;
                    }

                    D[i][j] = min;
                }
            }

            // Backward: knowing scores (D) and actions (M) let's building edit sequence
            var operations     = new Span <EditOperation>(new EditOperation[source.Length + target.Length]);
            var operationIndex = operations.Length - 1;

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0); operationIndex--)
            {
                EditOperationKind op = M[y][x];

                if (op == EditOperationKind.Add)
                {
                    x -= 1;
                    operations[operationIndex] = new EditOperation('\0', target[x], op);
                }
                else if (op == EditOperationKind.Remove)
                {
                    y -= 1;
                    operations[operationIndex] = new EditOperation(source[y], '\0', op);
                }
                else if (op == EditOperationKind.Edit)
                {
                    x -= 1;
                    y -= 1;
                    operations[operationIndex] = new EditOperation(source[y], target[x], op);
                }
                else                 // Start of the matching (EditOperationKind.None)
                {
                    break;
                }
            }

            return(operations.Slice(operationIndex + 1).ToArray());
        }
Ejemplo n.º 5
0
        public EditOperation[] EditSequence(
            string source, string target,
            int insertCost = 1, int removeCost = 1, int editCost = 1)
        {
            if (null == source)
            {
                throw new ArgumentNullException("source");
            }
            else if (null == target)
            {
                throw new ArgumentNullException("target");
            }

            // Forward: building score matrix
            var opModValue = 128;

            EditOperationKind[][] M = Enumerable
                                      .Range(0, opModValue + 1)
                                      .Select(line => new EditOperationKind[target.Length + 1])
                                      .ToArray();

            List <EditOperation> result =
                new List <EditOperation>(source.Length + target.Length);

            // Minimum cost so far
            int[][] D = Enumerable
                        .Range(0, 2)
                        .Select(line => new int[target.Length + 1])
                        .ToArray();

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                // Edge: all removes
                D[1][0] = removeCost;
                for (int i = 1; i <= opModValue; ++i)
                {
                    M[i][0] = EditOperationKind.Remove;
                }

                // Edge: all inserts
                for (int i = 1; i <= target.Length; ++i)
                {
                    M[0][i] = EditOperationKind.Add;
                    D[0][i] = insertCost * i;
                }

                for (int i = 1; i <= y; ++i)
                {
                    for (int j = 1; j <= x; ++j)
                    {
                        // here we choose the operation with the least cost
                        int insert = D[i % 2][j - 1] + insertCost;
                        int delete = D[(i - 1) % 2][j] + removeCost;
                        int edit   = D[(i - 1) % 2][j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);

                        int min = Math.Min(Math.Min(insert, delete), edit);

                        if (min == insert)
                        {
                            M[i % opModValue][j] = EditOperationKind.Add;
                        }
                        else if (min == delete)
                        {
                            M[i % opModValue][j] = EditOperationKind.Remove;
                        }
                        else if (min == edit)
                        {
                            M[i % opModValue][j] = EditOperationKind.Edit;
                        }

                        D[i % 2][j] = min;
                    }
                }

                var outerBreak = false;
                for (int opCount = 0; opCount < opModValue && (x > 0 || y > 0); opCount++)
                {
                    EditOperationKind op = M[y % opModValue][x];

                    if (op == EditOperationKind.Add)
                    {
                        x -= 1;
                        result.Add(new EditOperation('\0', target[x], op));
                    }
                    else if (op == EditOperationKind.Remove)
                    {
                        y -= 1;
                        result.Add(new EditOperation(source[y], '\0', op));
                    }
                    else if (op == EditOperationKind.Edit)
                    {
                        x -= 1;
                        y -= 1;
                        result.Add(new EditOperation(source[y], target[x], op));
                    }
                    else                     // Start of the matching (EditOperationKind.None)
                    {
                        outerBreak = true;
                        break;
                    }
                }

                if (outerBreak)
                {
                    break;
                }
            }

            result.Reverse();

            return(result.ToArray());
        }
Ejemplo n.º 6
0
 public EditOperation(char value, EditOperationKind operation)
 {
     Value     = value;
     Operation = operation;
 }
        public EditOperation[] EditSequence(
            string source, string target,
            int insertCost = 1, int removeCost = 1, int editCost = 1)
        {
            var localInsertCost = (short)insertCost;
            var localRemoveCost = (short)removeCost;
            var localEditCost   = (short)editCost;

            if (null == source)
            {
                throw new ArgumentNullException("source");
            }
            else if (null == target)
            {
                throw new ArgumentNullException("target");
            }

            // Forward: building score matrix

            // Best operation (among insert, update, delete) to perform
            EditOperationKind[][] M = Enumerable
                                      .Range(0, source.Length + 1)
                                      .Select(line => new EditOperationKind[target.Length + 1])
                                      .ToArray();

            // Minimum cost so far
            short[][] D = Enumerable
                          .Range(0, source.Length + 1)
                          .Select(line => new short[target.Length + 1])
                          .ToArray();

            // Edge: all removes
            for (short i = 1; i <= source.Length; ++i)
            {
                M[i][0] = EditOperationKind.Remove;
                D[i][0] = (short)(localRemoveCost * i);
            }

            // Edge: all inserts
            for (short i = 1; i <= target.Length; ++i)
            {
                M[0][i] = EditOperationKind.Add;
                D[0][i] = (short)(localInsertCost * i);
            }

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (short i = 1; i <= source.Length; ++i)
            {
                for (short j = 1; j <= target.Length; ++j)
                {
                    // here we choose the operation with the least cost
                    var insert = (short)(D[i][j - 1] + localInsertCost);
                    var delete = (short)(D[i - 1][j] + localRemoveCost);
                    var edit   = (short)(D[i - 1][j - 1] + (source[i - 1] == target[j - 1] ? 0 : localEditCost));

                    var min = Math.Min(Math.Min(insert, delete), edit);

                    if (min == insert)
                    {
                        M[i][j] = EditOperationKind.Add;
                    }
                    else if (min == delete)
                    {
                        M[i][j] = EditOperationKind.Remove;
                    }
                    else if (min == edit)
                    {
                        M[i][j] = EditOperationKind.Edit;
                    }

                    D[i][j] = min;
                }
            }

            // Backward: knowing scores (D) and actions (M) let's building edit sequence
            List <EditOperation> result =
                new List <EditOperation>(source.Length + target.Length);

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                EditOperationKind op = M[y][x];

                if (op == EditOperationKind.Add)
                {
                    x -= 1;
                    result.Add(new EditOperation('\0', target[x], op));
                }
                else if (op == EditOperationKind.Remove)
                {
                    y -= 1;
                    result.Add(new EditOperation(source[y], '\0', op));
                }
                else if (op == EditOperationKind.Edit)
                {
                    x -= 1;
                    y -= 1;
                    result.Add(new EditOperation(source[y], target[x], op));
                }
                else                 // Start of the matching (EditOperationKind.None)
                {
                    break;
                }
            }

            result.Reverse();

            return(result.ToArray());
        }
Ejemplo n.º 8
0
 public EditOperation(SubsectionInfo source, SubsectionInfo target, EditOperationKind operationKind)
 {
     Source        = source;
     Target        = target;
     OperationKind = operationKind;
 }
Ejemplo n.º 9
0
        private void CorePerform(T[] source,
                                 T[] target,
                                 Func <T, double> insertCost,
                                 Func <T, double> deleteCost,
                                 Func <T, T, double> editCost)
        {
            // Best operation (among insert, update, delete) to perform
            EditOperationKind[][] M = Enumerable
                                      .Range(0, source.Length + 1)
                                      .Select(line => new EditOperationKind[target.Length + 1])
                                      .ToArray();

            // Minimum cost so far
            double[][] D = Enumerable
                           .Range(0, source.Length + 1)
                           .Select(line => new double[target.Length + 1])
                           .ToArray();

            // Edge: all removes
            double sum = 0.0;

            for (int i = 1; i <= source.Length; ++i)
            {
                M[i][0] = EditOperationKind.Delete;
                D[i][0] = (sum += deleteCost(source[i - 1]));
            }

            // Edge: all inserts
            sum = 0.0;

            for (int i = 1; i <= target.Length; ++i)
            {
                M[0][i] = EditOperationKind.Insert;
                D[0][i] = (sum += insertCost(target[i - 1]));
            }

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (int i = 1; i <= source.Length; ++i)
            {
                for (int j = 1; j <= target.Length; ++j)
                {
                    // here we choose the operation with the least cost
                    double insert = D[i][j - 1] + insertCost(target[j - 1]);
                    double delete = D[i - 1][j] + deleteCost(source[i - 1]);
                    double edit   = D[i - 1][j - 1] + editCost(source[i - 1], target[j - 1]);

                    double min = Math.Min(Math.Min(insert, delete), edit);

                    if (min == insert)
                    {
                        M[i][j] = EditOperationKind.Insert;
                    }
                    else if (min == delete)
                    {
                        M[i][j] = EditOperationKind.Delete;
                    }
                    else if (min == edit)
                    {
                        M[i][j] = object.Equals(source[i - 1], target[j - 1])
              ? EditOperationKind.None
              : EditOperationKind.Edit;
                    }

                    D[i][j] = min;
                }
            }

            EditDistance = D[source.Length][target.Length];

            // Backward: knowing scores (D) and actions (M) let's building edit sequence
            m_Sequence =
                new List <EditOperation <T> >(source.Length + target.Length);

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                EditOperationKind op = M[y][x];

                if (op == EditOperationKind.Insert)
                {
                    x -= 1;
                    m_Sequence.Add(new EditOperation <T>(op, default, target[x], D[y][x + 1] - D[y][x]));
        public EditOperation[] EditSequence(
            string source, string target,
            int insertCost = 1, int removeCost = 1, int editCost = 1)
        {
            if (null == source)
            {
                throw new ArgumentNullException("source");
            }
            else if (null == target)
            {
                throw new ArgumentNullException("target");
            }

            // Forward: building score matrix

            var columns = target.Length + 1;
            var rows    = source.Length + 1;
            var mPool   = new LargeArrayPoolMatrix <EditOperationKind>(columns, rows);
            var dPool   = new LargeArrayPoolMatrix <int>(columns, rows);

            // Best operation (among insert, update, delete) to perform
            var M = mPool;

            // Minimum cost so far
            var D = dPool;

            // Edge: all removes
            for (int i = 1; i <= source.Length; ++i)
            {
                M[i, 0] = EditOperationKind.Remove;
                D[i, 0] = removeCost * i;
            }

            // Edge: all inserts
            for (int i = 1; i <= target.Length; ++i)
            {
                M[0, i] = EditOperationKind.Add;
                D[0, i] = insertCost * i;
            }

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (int i = 1; i <= source.Length; ++i)
            {
                for (int j = 1; j <= target.Length; ++j)
                {
                    // here we choose the operation with the least cost
                    int insert = D[i, j - 1] + insertCost;
                    int delete = D[i - 1, j] + removeCost;
                    int edit   = D[i - 1, j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);

                    int min = Math.Min(Math.Min(insert, delete), edit);

                    if (min == insert)
                    {
                        M[i, j] = EditOperationKind.Add;
                    }
                    else if (min == delete)
                    {
                        M[i, j] = EditOperationKind.Remove;
                    }
                    else if (min == edit)
                    {
                        M[i, j] = EditOperationKind.Edit;
                    }

                    D[i, j] = min;
                }
            }

            // Backward: knowing scores (D) and actions (M) let's building edit sequence
            List <EditOperation> result =
                new List <EditOperation>(source.Length + target.Length);

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                EditOperationKind op = M[y, x];

                if (op == EditOperationKind.Add)
                {
                    x -= 1;
                    result.Add(new EditOperation('\0', target[x], EditOperationKind.Add));
                }
                else if (op == EditOperationKind.Remove)
                {
                    y -= 1;
                    result.Add(new EditOperation(source[y], '\0', EditOperationKind.Remove));
                }
                else if (op == EditOperationKind.Edit)
                {
                    x -= 1;
                    y -= 1;
                    result.Add(new EditOperation(source[y], target[x], EditOperationKind.Edit));
                }
                else                 // Start of the matching (EditOperationKind.None)
                {
                    break;
                }
            }

            dPool.Dispose();
            mPool.Dispose();

            result.Reverse();

            return(result.ToArray());
        }
Ejemplo n.º 11
0
        public unsafe EditOperation[] EditSequence(
            string source, string target,
            int insertCost = 1, int removeCost = 1, int editCost = 1)
        {
            if (null == source)
            {
                throw new ArgumentNullException("source");
            }
            else if (null == target)
            {
                throw new ArgumentNullException("target");
            }

            // Forward: building score matrix

            List <EditOperation> result =
                new List <EditOperation>(source.Length + target.Length);

            var columns = target.Length + 1;
            var rows    = source.Length + 1;

            // Best operation (among insert, update, delete) to perform
            var underlyingType  = Enum.GetUnderlyingType(typeof(EditOperationKind));
            var operationHandle = Marshal.AllocHGlobal(Marshal.SizeOf(underlyingType) * columns * rows);
            var M = new Span <EditOperationKind>(operationHandle.ToPointer(), columns * rows);

            // Minimum cost so far
            var costHandle = Marshal.AllocHGlobal(Marshal.SizeOf <int>() * columns * rows);
            var D          = new Span <int>(costHandle.ToPointer(), columns * rows);

            M[0] = EditOperationKind.None;
            D[0] = 0;

            // Edge: all removes
            for (int i = 1; i <= source.Length; ++i)
            {
                M[i * columns] = EditOperationKind.Remove;
                D[i * columns] = removeCost * i;
            }

            // Edge: all inserts
            for (int i = 1; i <= target.Length; ++i)
            {
                M[i] = EditOperationKind.Add;
                D[i] = insertCost * i;
            }

            // Having fit N - 1, K - 1 characters let's fit N, K
            for (int i = 1; i <= source.Length; ++i)
            {
                for (int j = 1; j <= target.Length; ++j)
                {
                    // here we choose the operation with the least cost
                    int insert = D[i * columns + j - 1] + insertCost;
                    int delete = D[(i - 1) * columns + j] + removeCost;
                    int edit   = D[(i - 1) * columns + j - 1] + (source[i - 1] == target[j - 1] ? 0 : editCost);

                    int min = Math.Min(Math.Min(insert, delete), edit);

                    if (min == insert)
                    {
                        M[i * columns + j] = EditOperationKind.Add;
                    }
                    else if (min == delete)
                    {
                        M[i * columns + j] = EditOperationKind.Remove;
                    }
                    else if (min == edit)
                    {
                        M[i * columns + j] = EditOperationKind.Edit;
                    }

                    D[i * columns + j] = min;
                }
            }

            Marshal.FreeHGlobal(costHandle);

            // Backward: knowing scores (D) and actions (M) let's building edit sequence

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                EditOperationKind op = M[y * columns + x];

                if (op == EditOperationKind.Add)
                {
                    x -= 1;
                    result.Add(new EditOperation('\0', target[x], op));
                }
                else if (op == EditOperationKind.Remove)
                {
                    y -= 1;
                    result.Add(new EditOperation(source[y], '\0', op));
                }
                else if (op == EditOperationKind.Edit)
                {
                    x -= 1;
                    y -= 1;
                    result.Add(new EditOperation(source[y], target[x], op));
                }
                else                 // Start of the matching (EditOperationKind.None)
                {
                    break;
                }
            }

            Marshal.FreeHGlobal(operationHandle);

            result.Reverse();

            return(result.ToArray());
        }
Ejemplo n.º 12
0
        private static List <EditOperation> ProcessEditSequence(string source, string target,
                                                                int insertCost, int removeCost, int editCost, int copyCost, int whiteSpacePreference)
        {
            // Forward: building score matrix

            // Best operation (among insert, edit/copy, remove) to perform
            Operations[,] nextOperation = new Operations[source.Length + 1, target.Length + 1];

            // Minimum cost so far
            int[,] pathCost = new int[source.Length + 1, target.Length + 1];

            // Edge: all removes
            for (int i = 1; i <= source.Length; i++)
            {
                nextOperation[i, 0] = Operations.Remove;
                pathCost[i, 0]      = removeCost * i;
            }

            // Edge: all inserts
            for (int i = 1; i <= target.Length; i++)
            {
                nextOperation[0, i] = Operations.Insert;
                pathCost[0, i]      = insertCost * i;
            }

            // fill the cost and operation table
            for (int i = 1; i <= source.Length; i++)
            {
                char sourceCharacter = source[i - 1];
                for (int j = 1; j <= target.Length; j++)
                {
                    // here we choose the operation with the least cost
                    char targetCharacter = target[j - 1];
                    bool copy            = (sourceCharacter == targetCharacter);
                    int  insert          = pathCost[i, j - 1] + insertCost;
                    int  remove          = pathCost[i - 1, j] + removeCost;
                    int  edit            = pathCost[i - 1, j - 1] + ((copy) ? (copyCost) : (editCost));

                    if (char.IsWhiteSpace(sourceCharacter) && char.IsWhiteSpace(targetCharacter) && !copy)
                    {
                        insert -= whiteSpacePreference;
                        remove -= whiteSpacePreference;
                        edit   -= whiteSpacePreference;
                    }

                    int min = Math.Min(Math.Min(insert, remove), edit);

                    if (min == insert)
                    {
                        nextOperation[i, j] |= Operations.Insert;
                    }

                    if (min == remove)
                    {
                        nextOperation[i, j] |= Operations.Remove;
                    }

                    if (min == edit)
                    {
                        nextOperation[i, j] |= ((copy) ? (Operations.Copy) : (Operations.Edit));
                    }

                    pathCost[i, j] = min;
                }
            }

            // Backward: knowing costs and operations let's building edit sequence (in reverse order, from end to start)
            List <EditOperation> result = new List <EditOperation>(source.Length + target.Length);

            Operations previousOperation = Operations.None;

            for (int x = target.Length, y = source.Length; (x > 0) || (y > 0);)
            {
                EditOperationKind op = GetNextOperation(nextOperation[y, x], insertCost, removeCost, editCost, copyCost, ref previousOperation);

                switch (op)
                {
                case EditOperationKind.Insert:
                    x--;
                    result.Add(new EditOperation(target[x], op));
                    break;

                case EditOperationKind.Remove:
                    y--;
                    result.Add(new EditOperation(source[y], op));
                    break;

                default: // EditOperationKind.Edit, EditOperationKind.Copy
                    x--;
                    y--;
                    result.Add(new EditOperation(target[x], op));
                    Debug.Assert((op == EditOperationKind.Edit) || (op == EditOperationKind.Copy));
                    break;
                }
            }

            result.Reverse();
            return(result);
        }