Exemple #1
0
        /// <summary>
        /// Computes the longest common subsequence matrix between the two collections
        /// </summary>
        /// <param name="first"></param>
        /// <param name="second"></param>
        private double LCS(ref IDiffObjectsCollection first, ref IDiffObjectsCollection second)
        {
            int start = 0;
            int firstEnd = first.Count;
            int secondEnd = second.Count;
            int i, j, n, m;

            IDiffObjectsCollection granularDiffsForFirst = null;
            IDiffObjectsCollection granularDiffsForSecond = null;
            IDiffObjectsCollection granularCommonElements = null;

            IDiffObjectsCollection currCommonElements = first.CommonElements;

            second.CommonElements = currCommonElements;

            //trim the matching items at the beggining
            while (start < firstEnd && start < secondEnd && first[start].ValueMatches(second[start], out granularDiffsForFirst, out granularDiffsForSecond, out granularCommonElements))
            {
                //if the collections were slightly different add the small differences
                first.Differences.AddRange(granularDiffsForFirst);
                second.Differences.AddRange(granularDiffsForSecond);
                if (granularDiffsForFirst == null && granularDiffsForSecond == null)
                {
                    currCommonElements.Add(first[start]);
                }
                else
                {
                    currCommonElements.AddRange(granularCommonElements);
                }

                start++;
            }

            //trim the matching items at the end
            while (firstEnd > start && secondEnd > start && first[firstEnd - 1].ValueMatches(second[secondEnd - 1], out granularDiffsForFirst, out granularDiffsForSecond, out granularCommonElements))
            {
                firstEnd--;
                secondEnd--;

                if (granularDiffsForFirst == null && granularDiffsForSecond == null)
                {
                    currCommonElements.Add(first[firstEnd]);
                }
                else
                {
                    currCommonElements.AddRange(granularCommonElements);
                }
                //if the collections were slightly different insert the small differences
                first.Differences.PushRange(granularDiffsForFirst);
                second.Differences.PushRange(granularDiffsForSecond);
            }

            m = firstEnd - start + 1;
            n = secondEnd - start + 1;

            //truncate the matrix to prevent OOM
            m = Math.Min(m, MAX_MATRIX_LENGTH);
            n = Math.Min(n, MAX_MATRIX_LENGTH);
            //create the matrix for LCS

            ushort[,] C = new ushort[m, n];


            for (i = 1; i < m; i++)
            {
                for (j = 1; j < n; j++)
                {
                    if (first[start + i - 1].ValueMatches(second[start + j - 1]))
                    {
                        C[i, j] = (ushort)(C[i - 1, j - 1] + 1);
                    }
                    else
                    {
                        C[i, j] = Math.Max(C[i, j - 1], C[i - 1, j]);
                    }
                }
            }

            //populate the differences
            return(ExtractDiffs(C, first, second, start, m, n));
        }
Exemple #2
0
        public double Diff(ref IDiffObjectsCollection first, ref IDiffObjectsCollection second)
        {
            int i = 0, j = 0, m = first.Count, n = second.Count;

            double diffCount1 = 0;
            double diffCount2 = 0;

            IDiffObjectsCollection granularDiffs1 = null;
            IDiffObjectsCollection granularDiffs2 = null;
            IDiffObjectsCollection granularCommon = null;

            IDiffObjectsCollection currCommonElements = first.CommonElements;

            second.CommonElements = currCommonElements;

            while (i < m || j < n)
            {
                if (j < n && i < m && first[i].ValueMatches(second[j], out granularDiffs1, out granularDiffs2, out granularCommon))
                {
                    first.Differences.AddRange(granularDiffs1);
                    second.Differences.AddRange(granularDiffs2);
                    if (granularDiffs1 == null && granularDiffs2 == null)
                    {
                        currCommonElements.Add(first[i]);
                    }
                    else
                    {
                        currCommonElements.AddRange(granularCommon);
                    }
                    i++;
                    j++;
                }
                else if (i < m && (j >= n || first[i].CompareValues(second[j], _ignoreCase) == CompareResult.Lower))
                {
                    first.Differences.Add(first[i]);
                    diffCount1++;
                    i++;
                }
                else if (j < n && (i >= m || first[i].CompareValues(second[j], _ignoreCase) == CompareResult.Greater))
                {
                    second.Differences.Add(second[j]);
                    diffCount2++;
                    j++;
                }
            }

            //calculate diff ratio, or similarity factor
            double ratio1 = 0;

            if (first.Count > 0)
            {
                ratio1 = 1 - diffCount1 / first.Count;
            }

            double ratio2 = 0;

            if (second.Count > 0)
            {
                ratio2 = 1 - diffCount2 / second.Count;
            }

            return(Math.Min(ratio1, ratio2));
        }
Exemple #3
0
        private double ExtractDiffs(
            ushort[, ] C, IDiffObjectsCollection first, IDiffObjectsCollection second, int start, int m, int n)
        {
            int i = m - 1, j = n - 1;

            double diffCount1 = 0;
            double diffCount2 = 0;

            IDiffObjectsCollection currFirstDiffs     = first.Differences;
            IDiffObjectsCollection currSecondDiffs    = second.Differences;
            IDiffObjectsCollection currCommonElements = first.CommonElements;

            second.CommonElements = currCommonElements;

            do
            {
                int         iIndex     = start + i - 1;
                int         jIndex     = start + j - 1;
                IDiffObject currFirst  = null;
                IDiffObject currSecond = null;


                IDiffObjectsCollection granularDiffsForFirst  = null;
                IDiffObjectsCollection granularDiffsForSecond = null;
                IDiffObjectsCollection granularCommonElements = null;


                if (iIndex >= 0 && iIndex < first.Count)
                {
                    currFirst = first[iIndex];
                }

                if (jIndex >= 0 && jIndex < second.Count)
                {
                    currSecond = second[jIndex];
                }


                if (currFirst != null &&
                    currFirst.ValueMatches(currSecond, out granularDiffsForFirst, out granularDiffsForSecond, out granularCommonElements))
                {
                    //ValuesMatch can return true if the objects are very similar
                    //In that case we need to capture granular differences
                    currFirstDiffs.PushRange(granularDiffsForFirst);
                    currSecondDiffs.PushRange(granularDiffsForSecond);


                    if (granularDiffsForFirst == null && granularDiffsForSecond == null)
                    {
                        currCommonElements.Add(currFirst);
                    }
                    else
                    {
                        currCommonElements.AddRange(granularCommonElements);
                    }

                    i--; j--;
                }
                else if (j > 0 && (i <= 0 || C[i, j - 1] >= C[i - 1, j]))
                {
                    currSecondDiffs.Push(currSecond);
                    diffCount2++;
                    j--;
                }
                else if (i > 0 && (j <= 0 || C[i, j - 1] < C[i - 1, j]))
                {
                    currFirstDiffs.Push(currFirst);
                    diffCount1++;
                    i--;
                }
            }while (i > 0 || j > 0);

            //calculate diff ratio, or similarity factor
            double ratio1 = 0;

            if (first.Count > 0)
            {
                ratio1 = 1 - diffCount1 / first.Count;
            }

            double ratio2 = 0;

            if (second.Count > 0)
            {
                ratio2 = 1 - diffCount2 / second.Count;
            }

            return(Math.Min(ratio1, ratio2));
        }