private ComparisonResult GetDistanceBetween(DocumentTokens firstDocument, DocumentTokens secondDocument) { var optimal = new double[firstDocument.Count + 1, secondDocument.Count + 1]; for (var i = 0; i < secondDocument.Count + 1; ++i) { optimal[0, i] = i; } for (var i = 0; i < firstDocument.Count + 1; ++i) { optimal[i, 0] = i; } for (var i = 1; i < firstDocument.Count + 1; ++i) { for (var j = 1; j < secondDocument.Count + 1; ++j) { var distance = TokenDistanceCalculator.GetTokenDistance(firstDocument[i - 1], secondDocument[j - 1]); if (distance != 0) { optimal[i, j] = Math.Min(Math.Min(optimal[i - 1, j] + 1, optimal[i, j - 1] + 1), optimal[i - 1, j - 1] + distance); } else if (distance == 0) { optimal[i, j] = optimal[i - 1, j - 1]; } } } return(new ComparisonResult(firstDocument, secondDocument, optimal[firstDocument.Count, secondDocument.Count])); }
public ComparisonResult GetLevensteinDistance(DocumentTokens first, DocumentTokens second) { var opt = new double[first.Count + 1, second.Count + 1]; for (int i = 0; i <= first.Count; i++) { opt[i, 0] = i; } for (int i = 0; i <= second.Count; i++) { opt[0, i] = i; } for (int i = 1; i <= first.Count; i++) { for (int j = 1; j <= second.Count; j++) { var dist = TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]); if (dist == 0) { opt[i, j] = opt[i - 1, j - 1]; } else { opt[i, j] = new[] { opt[i - 1, j] + 1, opt[i, j - 1] + 1, opt[i - 1, j - 1] + dist } }.Min();
private static double CalcLevenshteinDistance(DocumentTokens first, DocumentTokens second) { var opt = new double[first.Count + 1, second.Count + 1]; for (var i = 0; i <= first.Count; ++i) { opt[i, 0] = i; } for (var i = 0; i <= second.Count; ++i) { opt[0, i] = i; } for (var i = 1; i <= first.Count; ++i) { for (var j = 1; j <= second.Count; ++j) { var token1 = first[i - 1]; var token2 = second[j - 1]; if (token1 == token2) { opt[i, j] = opt[i - 1, j - 1]; } else { var replaceCost = TokenDistanceCalculator.GetTokenDistance(token1, token2); opt[i, j] = Helper.GetMinValue(1 + opt[i - 1, j], replaceCost + opt[i - 1, j - 1], 1 + opt[i, j - 1]); } } } return(opt[first.Count, second.Count]); }
public double ComputeLevenshteinDistance(DocumentTokens first, DocumentTokens second) { var prevOpt = new double[second.Count + 1]; var currentOpt = new double[second.Count + 1]; currentOpt[0] = 1; for (var i = 0; i <= second.Count; ++i) { prevOpt[i] = i; } for (var i = 1; i <= first.Count; ++i) { for (var j = 1; j <= second.Count; ++j) { if (first[i - 1] == second[j - 1]) { currentOpt[j] = prevOpt[j - 1]; } else { currentOpt[j] = Math.Min( 1 + prevOpt[j], TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]) + Math.Min(prevOpt[j - 1], currentOpt[j - 1]) ); } } currentOpt.CopyTo(prevOpt, 0); currentOpt[0] = i + 1; } return(currentOpt[second.Count]); }
private static List <string> RestoreAnswer(int[,] opt, List <string> first, List <string> second) { var result = new List <string>(); int i = 0; int j = 0; while (opt[i, j] != 0 && i < first.Count && j < second.Count) { if (TokenDistanceCalculator.GetTokenDistance(first[i], second[j]) == 0) { result.Add(first[i]); i++; j++; } else if (opt[i, j] == opt[i + 1, j]) { i++; } else { j++; } } return(result); }
public List <ComparisonResult> CompareDocumentsPairwise(List <DocumentTokens> documents) { return(new List <ComparisonResult> { new ComparisonResult( documents[0], documents[1], TokenDistanceCalculator.GetTokenDistance(documents[0][0], documents[1][0])) }); }
private double GetMinDistance(Document minDocument, Document maxDocument) { if (minDocument.IsEnd) { return(maxDocument.Length - maxDocument.Index); } if (maxDocument.IsEnd) { return(minDocument.Length - minDocument.Index); } var tokenDistance = TokenDistanceCalculator.GetTokenDistance(minDocument.Current, maxDocument.Current); var equalToken = GetMinDistance(minDocument.GetNext, maxDocument.GetNext) + tokenDistance; var addTokenDistance = GetMinDistance(minDocument, maxDocument.GetNext) + 1; var replaceTokenDistance = GetMinDistance(minDocument.GetNext, maxDocument.GetNext) + 1; return(Math.Min(Math.Min(addTokenDistance, replaceTokenDistance), equalToken)); }
private static int[,] CreateOptimizationTable(List <string> first, List <string> second) { var opt = new int[first.Count + 1, second.Count + 1]; for (int i = first.Count - 1; i >= 0; i--) { for (int j = second.Count - 1; j >= 0; j--) { if (TokenDistanceCalculator.GetTokenDistance(first[i], second[j]) == 0) { opt[i, j] = 1 + opt[i + 1, j + 1]; } else { opt[i, j] = Math.Max(opt[i + 1, j], opt[i, j + 1]); } } } return(opt); }
public ComparisonResult LevensteinDistance(DocumentTokens first, DocumentTokens second) { var opt = new double[first.Count + 1, second.Count + 1]; for (int i = 0; i <= first.Count; i++) { opt[i, 0] = i; } for (int j = 0; j <= second.Count; j++) { opt[0, j] = j; } for (int i = 1; i <= first.Count; i++) { for (int j = 1; j <= second.Count; j++) { var dist = TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]); opt[i, j] = Math.Min(Math.Min(opt[i - 1, j] + 1, opt[i, j - 1] + 1), opt[i - 1, j - 1] + dist); } } return(new ComparisonResult(first, second, opt[first.Count, second.Count])); }
private static void FindLevenshteinDistanse(DocumentTokens firstDoc, DocumentTokens secondDoc, double[] optOld, double[] optNew) { for (var i = 1; i <= firstDoc.Count; ++i) { for (var j = 1; j <= secondDoc.Count; ++j) { if (firstDoc[i - 1] == secondDoc[j - 1]) { optNew[j] = optOld[j - 1]; } else { optNew[j] = GetMinOfThree(optOld[j] + 1, optOld[j - 1] + TokenDistanceCalculator.GetTokenDistance(firstDoc[i - 1], secondDoc[j - 1]), optNew[j - 1] + TokenDistanceCalculator.GetTokenDistance(firstDoc[i - 1], secondDoc[j - 1])); } } optNew.CopyTo(optOld, 0); optNew[0] = i + 1; } }
ComparisonResult CompareDocuments(DocumentTokens first, DocumentTokens second) { if (first == null) { throw new ArgumentNullException("first"); } if (second == null) { throw new ArgumentNullException("second"); } double diff; double[,] m = new double[first.Count + 1, second.Count + 1]; for (int i = 0; i <= first.Count; i++) { m[i, 0] = i; } for (int j = 0; j <= second.Count; j++) { m[0, j] = j; } for (int i = 1; i <= first.Count; i++) { for (int j = 1; j <= second.Count; j++) { diff = (first[i - 1] == second[j - 1]) ? 0 : TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]); m[i, j] = Math.Min(Math.Min(m[i - 1, j] + 1, m[i, j - 1] + 1), m[i - 1, j - 1] + diff); } } return(new ComparisonResult(first, second, m[first.Count, second.Count])); }