Beispiel #1
0
        private ComparisonResult GetDistanceBetween(DocumentTokens firstDocument, DocumentTokens secondDocument)
        {
            var optimal = new double[firstDocument.Count + 1, secondDocument.Count + 1];

            for (var i = 0; i < secondDocument.Count + 1; ++i)
            {
                optimal[0, i] = i;
            }
            for (var i = 0; i < firstDocument.Count + 1; ++i)
            {
                optimal[i, 0] = i;
            }
            for (var i = 1; i < firstDocument.Count + 1; ++i)
            {
                for (var j = 1; j < secondDocument.Count + 1; ++j)
                {
                    var distance = TokenDistanceCalculator.GetTokenDistance(firstDocument[i - 1], secondDocument[j - 1]);
                    if (distance != 0)
                    {
                        optimal[i, j] = Math.Min(Math.Min(optimal[i - 1, j] + 1, optimal[i, j - 1] + 1), optimal[i - 1, j - 1] + distance);
                    }
                    else if (distance == 0)
                    {
                        optimal[i, j] = optimal[i - 1, j - 1];
                    }
                }
            }
            return(new ComparisonResult(firstDocument, secondDocument, optimal[firstDocument.Count, secondDocument.Count]));
        }
        public ComparisonResult GetLevensteinDistance(DocumentTokens first, DocumentTokens second)
        {
            var opt = new double[first.Count + 1, second.Count + 1];

            for (int i = 0; i <= first.Count; i++)
            {
                opt[i, 0] = i;
            }
            for (int i = 0; i <= second.Count; i++)
            {
                opt[0, i] = i;
            }
            for (int i = 1; i <= first.Count; i++)
            {
                for (int j = 1; j <= second.Count; j++)
                {
                    var dist = TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]);
                    if (dist == 0)
                    {
                        opt[i, j] = opt[i - 1, j - 1];
                    }
                    else
                    {
                        opt[i, j] = new[]
                        {
                            opt[i - 1, j] + 1,
                            opt[i, j - 1] + 1,
                            opt[i - 1, j - 1] + dist
                        }
                    }.Min();
        private static double CalcLevenshteinDistance(DocumentTokens first, DocumentTokens second)
        {
            var opt = new double[first.Count + 1, second.Count + 1];

            for (var i = 0; i <= first.Count; ++i)
            {
                opt[i, 0] = i;
            }
            for (var i = 0; i <= second.Count; ++i)
            {
                opt[0, i] = i;
            }
            for (var i = 1; i <= first.Count; ++i)
            {
                for (var j = 1; j <= second.Count; ++j)
                {
                    var token1 = first[i - 1];
                    var token2 = second[j - 1];
                    if (token1 == token2)
                    {
                        opt[i, j] = opt[i - 1, j - 1];
                    }
                    else
                    {
                        var replaceCost = TokenDistanceCalculator.GetTokenDistance(token1, token2);
                        opt[i, j] = Helper.GetMinValue(1 + opt[i - 1, j],
                                                       replaceCost + opt[i - 1, j - 1], 1 + opt[i, j - 1]);
                    }
                }
            }

            return(opt[first.Count, second.Count]);
        }
Beispiel #4
0
        public double ComputeLevenshteinDistance(DocumentTokens first, DocumentTokens second)
        {
            var prevOpt    = new double[second.Count + 1];
            var currentOpt = new double[second.Count + 1];

            currentOpt[0] = 1;
            for (var i = 0; i <= second.Count; ++i)
            {
                prevOpt[i] = i;
            }

            for (var i = 1; i <= first.Count; ++i)
            {
                for (var j = 1; j <= second.Count; ++j)
                {
                    if (first[i - 1] == second[j - 1])
                    {
                        currentOpt[j] = prevOpt[j - 1];
                    }
                    else
                    {
                        currentOpt[j] = Math.Min(
                            1 + prevOpt[j],
                            TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]) +
                            Math.Min(prevOpt[j - 1], currentOpt[j - 1])
                            );
                    }
                }

                currentOpt.CopyTo(prevOpt, 0);
                currentOpt[0] = i + 1;
            }

            return(currentOpt[second.Count]);
        }
        private static List <string> RestoreAnswer(int[,] opt, List <string> first, List <string> second)
        {
            var result = new List <string>();

            int i = 0;
            int j = 0;

            while (opt[i, j] != 0 && i < first.Count && j < second.Count)
            {
                if (TokenDistanceCalculator.GetTokenDistance(first[i], second[j]) == 0)
                {
                    result.Add(first[i]);
                    i++;
                    j++;
                }
                else
                if (opt[i, j] == opt[i + 1, j])
                {
                    i++;
                }
                else
                {
                    j++;
                }
            }
            return(result);
        }
Beispiel #6
0
 public List <ComparisonResult> CompareDocumentsPairwise(List <DocumentTokens> documents)
 {
     return(new List <ComparisonResult> {
         new ComparisonResult(
             documents[0],
             documents[1],
             TokenDistanceCalculator.GetTokenDistance(documents[0][0], documents[1][0]))
     });
 }
Beispiel #7
0
        private double GetMinDistance(Document minDocument, Document maxDocument)
        {
            if (minDocument.IsEnd)
            {
                return(maxDocument.Length - maxDocument.Index);
            }

            if (maxDocument.IsEnd)
            {
                return(minDocument.Length - minDocument.Index);
            }

            var tokenDistance = TokenDistanceCalculator.GetTokenDistance(minDocument.Current, maxDocument.Current);

            var equalToken           = GetMinDistance(minDocument.GetNext, maxDocument.GetNext) + tokenDistance;
            var addTokenDistance     = GetMinDistance(minDocument, maxDocument.GetNext) + 1;
            var replaceTokenDistance = GetMinDistance(minDocument.GetNext, maxDocument.GetNext) + 1;

            return(Math.Min(Math.Min(addTokenDistance, replaceTokenDistance), equalToken));
        }
        private static int[,] CreateOptimizationTable(List <string> first, List <string> second)
        {
            var opt = new int[first.Count + 1, second.Count + 1];

            for (int i = first.Count - 1; i >= 0; i--)
            {
                for (int j = second.Count - 1; j >= 0; j--)
                {
                    if (TokenDistanceCalculator.GetTokenDistance(first[i], second[j]) == 0)
                    {
                        opt[i, j] = 1 + opt[i + 1, j + 1];
                    }
                    else
                    {
                        opt[i, j] = Math.Max(opt[i + 1, j], opt[i, j + 1]);
                    }
                }
            }
            return(opt);
        }
        public ComparisonResult LevensteinDistance(DocumentTokens first, DocumentTokens second)
        {
            var opt = new double[first.Count + 1, second.Count + 1];

            for (int i = 0; i <= first.Count; i++)
            {
                opt[i, 0] = i;
            }
            for (int j = 0; j <= second.Count; j++)
            {
                opt[0, j] = j;
            }
            for (int i = 1; i <= first.Count; i++)
            {
                for (int j = 1; j <= second.Count; j++)
                {
                    var dist = TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]);
                    opt[i, j] = Math.Min(Math.Min(opt[i - 1, j] + 1, opt[i, j - 1] + 1),
                                         opt[i - 1, j - 1] + dist);
                }
            }
            return(new ComparisonResult(first, second, opt[first.Count, second.Count]));
        }
 private static void FindLevenshteinDistanse(DocumentTokens firstDoc, DocumentTokens secondDoc, double[] optOld, double[] optNew)
 {
     for (var i = 1; i <= firstDoc.Count; ++i)
     {
         for (var j = 1; j <= secondDoc.Count; ++j)
         {
             if (firstDoc[i - 1] == secondDoc[j - 1])
             {
                 optNew[j] = optOld[j - 1];
             }
             else
             {
                 optNew[j] = GetMinOfThree(optOld[j] + 1,
                                           optOld[j - 1] +
                                           TokenDistanceCalculator.GetTokenDistance(firstDoc[i - 1], secondDoc[j - 1]),
                                           optNew[j - 1] +
                                           TokenDistanceCalculator.GetTokenDistance(firstDoc[i - 1], secondDoc[j - 1]));
             }
         }
         optNew.CopyTo(optOld, 0);
         optNew[0] = i + 1;
     }
 }
Beispiel #11
0
        ComparisonResult CompareDocuments(DocumentTokens first, DocumentTokens second)
        {
            if (first == null)
            {
                throw new ArgumentNullException("first");
            }
            if (second == null)
            {
                throw new ArgumentNullException("second");
            }
            double diff;

            double[,] m = new double[first.Count + 1, second.Count + 1];

            for (int i = 0; i <= first.Count; i++)
            {
                m[i, 0] = i;
            }
            for (int j = 0; j <= second.Count; j++)
            {
                m[0, j] = j;
            }

            for (int i = 1; i <= first.Count; i++)
            {
                for (int j = 1; j <= second.Count; j++)
                {
                    diff = (first[i - 1] == second[j - 1]) ? 0 : TokenDistanceCalculator.GetTokenDistance(first[i - 1], second[j - 1]);

                    m[i, j] = Math.Min(Math.Min(m[i - 1, j] + 1,
                                                m[i, j - 1] + 1),
                                       m[i - 1, j - 1] + diff);
                }
            }
            return(new ComparisonResult(first, second, m[first.Count, second.Count]));
        }