Пример #1
0
        public static DiffResultViews DiffTexts(IEnumerable <string> mainTexts, IEnumerable <string> subTexts, out DiffResultDic resultDicMain, out DiffResultDic resultDicSub, double similarThreshold = SIMILAR_THRESHOLD)
        {
            resultDicMain = new DiffResultDic();
            resultDicSub  = new DiffResultDic();

            Sentences mainSentences = ConvertSentences(mainTexts);
            Sentences subSentences  = ConvertSentences(subTexts);

            GetDiffResultDic(mainSentences: mainSentences, subSentences: subSentences, similarLimit: similarThreshold, resultDicMain: out resultDicMain, resultDicSub: out resultDicSub);

            return(ConvertResultToViews(mains: resultDicMain.GetValues(), subs: resultDicSub.GetValues()));
        }
Пример #2
0
        static Sentences ConvertSentences(IEnumerable <string> texts)
        {
            Sentences sentences = new Sentences();

            int index = 0;

            foreach (string text in texts)
            {
                sentences.Add(new Sentence(index: index++, text: text.ToLower()));
            }

            return(sentences);
        }
Пример #3
0
        static void GetDiffResultDic(Sentences mainSentences, Sentences subSentences, double similarLimit, out DiffResultDic resultDicMain, out DiffResultDic resultDicSub)
        {
            resultDicMain = new DiffResultDic();
            resultDicSub  = new DiffResultDic();

            int           lastIndex = -1, intersectCount;
            Sentence      sub = new Sentence();
            List <string> sameTexts, modifiedTexts;
            bool          find;

            foreach (Sentence main in mainSentences)
            {
                find = false;

                for (int i = lastIndex + 1; i < subSentences.Count; i++)
                {
                    sub = subSentences[i];

                    // 일단 동등한지 확인
                    if (string.Equals(main.Text, sub.Text))
                    {
                        resultDicMain.Add(main.Index, new DiffResult(index: main.Index, diffType: DiffType.Same, main: main, sub: sub, sameTexts: main.Texts.ToList(), modifiedTexts: new List <string>()));
                        resultDicSub.Add(sub.Index, new DiffResult(index: sub.Index, diffType: DiffType.Same, main: sub, sub: main, sameTexts: sub.Texts.ToList(), modifiedTexts: new List <string>()));

                        find      = true;
                        lastIndex = i;
                        break;
                    }
                    // 동등하지 않은 상태에서 유사도 확인
                    else
                    {
                        // 갯수 확인 할 때는 교집합을 하지만, 실제 동일-수정된 텍스트를 찾을 때는 순서가 중요해서 교집합이나 차집합을 하지 않는다.
                        intersectCount = main.Texts.Intersect(sub.Texts).Count();

                        if ((double)(intersectCount * 2) / (double)(main.Texts.Length + sub.Texts.Length) > similarLimit)
                        {
                            GetSameAndModifiedTexts(mainTexts: main.Texts, subTexts: sub.Texts, sameTexts: out sameTexts, modifiedTexts: out modifiedTexts);
                            resultDicMain.Add(main.Index, new DiffResult(index: main.Index, diffType: DiffType.Modified, main: main, sub: sub, sameTexts: sameTexts, modifiedTexts: modifiedTexts));

                            GetSameAndModifiedTexts(mainTexts: sub.Texts, subTexts: main.Texts, sameTexts: out sameTexts, modifiedTexts: out modifiedTexts);
                            resultDicSub.Add(sub.Index, new DiffResult(index: sub.Index, diffType: DiffType.Modified, main: sub, sub: main, sameTexts: sameTexts, modifiedTexts: modifiedTexts));

                            find      = true;
                            lastIndex = i;
                            break;
                        }
                    }
                }

                if (!find)
                {
                    resultDicMain.Add(main.Index, new DiffResult(index: main.Index, diffType: DiffType.Removed, main: main, sub: new Sentence(), sameTexts: new List <string>(), modifiedTexts: main.Texts.ToList()));
                }
            }

            // 위에서 처리하고 남은 right sentence는 add로 처리한다.
            foreach (Sentence sentence in subSentences)
            {
                if (!resultDicSub.ContainsKey(sentence.Index))
                {
                    resultDicSub.Add(sentence.Index, new DiffResult(index: sentence.Index, diffType: DiffType.Added, main: sentence, sub: new Sentence(), sameTexts: new List <string>(), modifiedTexts: sentence.Texts.ToList()));
                }
            }

            // 순서가 꼬였으므로 순서대로 정렬한다.
            resultDicSub = new DiffResultDic(resultDicSub.OrderBy(kvp => kvp.Key));
        }