private EditDistance ComputeEditDistance(double[,] sim, PositionRange srcRange, PositionRange trgRange) { const double invalidAssignmentCosts = 100000.0d; // TODO handle special cases (one/both of the arrays being empty/having no elements) // TODO use diagonal algorithm int i; int j; int sourceObjectsCount = srcRange.Into - srcRange.Start + 1; int targetObjectsCount = trgRange.Into - trgRange.Start + 1; EditDistance result = new EditDistance(sourceObjectsCount, targetObjectsCount, 0.0d); MatrixItem[,] matrix = new MatrixItem[sourceObjectsCount + 1, targetObjectsCount + 1]; // initialize matrix matrix[0, 0] = new MatrixItem(0.0d, EditOperation.Identity, 0.0d); for (i = 1; i <= sourceObjectsCount; ++i) { matrix[i, 0] = new MatrixItem((double)i * _InsertDeleteCosts, EditOperation.Delete, 0.0d); } for (j = 1; j <= targetObjectsCount; ++j) { matrix[0, j] = new MatrixItem((double)j * _InsertDeleteCosts, EditOperation.Insert, 0.0d); } for (i = 1; i <= sourceObjectsCount; ++i) { for (j = 1; j <= targetObjectsCount; ++j) { matrix[i, j] = new MatrixItem(0.0d, EditOperation.Identity, 0.0d); } } // populate edit distance matrix for (i = 1; i <= sourceObjectsCount; ++i) { for (j = 1; j <= targetObjectsCount; ++j) { double similarity = sim[srcRange.Start + i - 1, trgRange.Start + j - 1]; System.Diagnostics.Debug.Assert((similarity >= 0.0d && similarity <= 1.0d) || similarity == -1.0d); // low similarity means high "change costs" and vice versa: double changeCosts = (similarity < 0) ? invalidAssignmentCosts : matrix[i - 1, j - 1].Score + (1.0d - similarity); double insertCosts = matrix[i, j - 1].Score + _InsertDeleteCosts; double deleteCosts = matrix[i - 1, j].Score + _InsertDeleteCosts; double min = Math.Min(Math.Min(changeCosts, deleteCosts), insertCosts); matrix[i, j].Score = min; matrix[i, j].Similarity = similarity; if (min == deleteCosts) { matrix[i, j].Operation = EditOperation.Delete; } else if (min == insertCosts) { matrix[i, j].Operation = EditOperation.Insert; } else if (min == changeCosts) { if (similarity == 1.0d) { matrix[i, j].Operation = EditOperation.Identity; } else { matrix[i, j].Operation = EditOperation.Change; } } } } // readout the cheapest path i = sourceObjectsCount; j = targetObjectsCount; // TODO we may rather need to find the end point at the borders result.Distance += matrix[i, j].Score; while (i > 0 || j > 0) { EditDistanceItem item = new EditDistanceItem(); item.Resolution = EditDistanceResolution.None; MatrixItem m = matrix[i, j]; item.Operation = m.Operation; switch (item.Operation) { case EditOperation.Identity: item.Costs = 0.0d; --i; --j; break; case EditOperation.Change: item.Costs = (1.0d - m.Similarity); --i; --j; break; case EditOperation.Insert: item.Costs = _InsertDeleteCosts; --j; break; case EditOperation.Delete: item.Costs = _InsertDeleteCosts; --i; break; } System.Diagnostics.Debug.Assert(i >= 0 && j >= 0); System.Diagnostics.Debug.Assert(sourceObjectsCount == 0 || item.Operation == EditOperation.Insert || i < sourceObjectsCount); System.Diagnostics.Debug.Assert(targetObjectsCount == 0 || item.Operation == EditOperation.Delete || j < targetObjectsCount); // TODO shift only for certain ops? item.Source = srcRange.Start + i; item.Target = trgRange.Start + j; result.AddAtStart(item); } return(result); }