static int GetRepetitionScore(List <string> data, int lines)
        {
            var count = data.Count / lines;

            if (count * lines != data.Count)
            {
                return(0);
            }

            var score = 0;

            for (var repetition = 1; repetition < count; ++repetition)
            {
                for (var index = 0; index < lines; ++index)
                {
                    score += LCS.GetLCS(data[index], data[repetition * lines + index]).Count(val => val[0] == LCS.MatchType.Match);
                }
            }
            return(score);
        }
Пример #2
0
        static public Tuple <List <Tuple <int, int> >, List <string> > GetDiffFixes(TextData src, TextData dest, int lineStartTabStop, bool?ignoreWhitespace, bool?ignoreCase, bool?ignoreNumbers, bool?ignoreLineEndings, string ignoreCharacters)
        {
            var textData   = new TextData[] { src, dest };
            var lineMap    = new Dictionary <int, int> [2];
            var lines      = new List <string> [2];
            var textLines  = new List <string> [2];
            var diffParams = new DiffParams(ignoreWhitespace ?? true, ignoreCase ?? true, ignoreNumbers ?? true, ignoreLineEndings ?? true, ignoreCharacters, lineStartTabStop);

            for (var pass = 0; pass < 2; ++pass)
            {
                lineMap[pass]   = Enumerable.Range(0, textData[pass].NumLines).Indexes(line => textData[pass].diffData?.LineCompare[line] != LCS.MatchType.Gap).Select((index1, index2) => new { index1, index2 }).ToDictionary(obj => obj.index2, obj => obj.index1);
                lines[pass]     = lineMap[pass].Values.Select(line => textData[pass].GetLine(line, true)).ToList();
                textLines[pass] = lines[pass].Select(line => diffParams.FormatLine(line).Item1).ToList();
            }

            var linesLCS = LCS.GetLCS(textLines[0], textLines[1], (str1, str2) => string.IsNullOrWhiteSpace(str1) == string.IsNullOrWhiteSpace(str2));

            var ranges  = new List <Tuple <int, int> >();
            var strs    = new List <string>();
            var curLine = new int[] { -1, -1 };

            diffParams = new DiffParams(ignoreWhitespace ?? false, ignoreCase ?? false, ignoreNumbers ?? false, ignoreLineEndings ?? src.OnlyEnding != null, ignoreCharacters);
            for (var line = 0; line < linesLCS.Count; ++line)
            {
                var mappedCurLine = new int[2];
                for (var pass = 0; pass < 2; ++pass)
                {
                    if (linesLCS[line][pass] != LCS.MatchType.Gap)
                    {
                        ++curLine[pass];
                        mappedCurLine[pass] = lineMap[pass][curLine[pass]];
                    }
                }

                if (linesLCS[line].IsMatch)
                {
                    var colLines = new string[2];
                    var map      = new List <int> [2];
                    for (var pass = 0; pass < 2; ++pass)
                    {
                        var formatDiffLine = diffParams.FormatLine(lines[pass][curLine[pass]]);
                        colLines[pass] = formatDiffLine.Item1;
                        map[pass]      = formatDiffLine.Item2;
                    }

                    if (colLines[0] != colLines[1])
                    {
                        var colsLCS = LCS.GetLCS(colLines[0], colLines[1]);
                        for (var pass = 0; pass < 2; ++pass)
                        {
                            var start = default(int?);
                            var pos   = -1;
                            for (var ctr = 0; ctr <= colsLCS.Count; ++ctr)
                            {
                                if ((ctr == colsLCS.Count) || (colsLCS[ctr][pass] != LCS.MatchType.Gap))
                                {
                                    ++pos;
                                }

                                if ((ctr == colsLCS.Count) || (colsLCS[ctr].IsMatch))
                                {
                                    if (start.HasValue)
                                    {
                                        var lineOffset = textData[pass].GetOffset(mappedCurLine[pass], 0);
                                        var begin      = lineOffset + map[pass][start.Value];
                                        var end        = lineOffset + map[pass][pos];
                                        if (pass == 0)
                                        {
                                            strs.Add(textData[pass].GetString(begin, end - begin));
                                        }
                                        else
                                        {
                                            ranges.Add(Tuple.Create(begin, end));
                                        }
                                        start = null;
                                    }
                                    continue;
                                }

                                start = start ?? pos + (colsLCS[ctr][pass] == LCS.MatchType.Gap ? 1 : 0);
                            }
                        }
                    }
                }

                if ((ignoreLineEndings == null) && (src.OnlyEnding != null) && (linesLCS[line][1] != LCS.MatchType.Gap))
                {
                    var endingStart = dest.endingOffset[mappedCurLine[1]];
                    var endingEnd   = dest.lineOffset[mappedCurLine[1] + 1];
                    if (endingStart == endingEnd)
                    {
                        continue;
                    }

                    if (dest.Data.Substring(endingStart, endingEnd - endingStart) != src.OnlyEnding)
                    {
                        ranges.Add(Tuple.Create(endingStart, endingEnd));
                        strs.Add(src.OnlyEnding);
                    }
                }
            }

            return(Tuple.Create(ranges, strs));
        }
Пример #3
0
        public static void CalculateDiff(TextData textData0, TextData textData1, bool ignoreWhitespace, bool ignoreCase, bool ignoreNumbers, bool ignoreLineEndings, string ignoreCharacters)
        {
            var diffParams = new DiffParams(ignoreWhitespace, ignoreCase, ignoreNumbers, ignoreLineEndings, ignoreCharacters);

            if ((textData0.diffData != null) && (textData1.diffData != null) && (textData0.diffData.Data == textData0.Data) && (textData1.diffData.Data == textData1.Data) && (textData0.diffData.DiffParams.Equals(diffParams)) && (textData1.diffData.DiffParams.Equals(diffParams)))
            {
                return;
            }

            var textData = new TextData[] { textData0, textData1 };
            var lines    = new List <string> [2];
            var map      = new List <List <int> > [2];

            for (var pass = 0; pass < 2; ++pass)
            {
                textData[pass].ClearDiff();
                textData[pass].diffData = new DiffData(textData[pass].Data, diffParams);
                var formatDiffLine = Enumerable.Range(0, textData[pass].NumLines).Select(line => diffParams.FormatLine(textData[pass].GetLine(line, true))).ToList();
                lines[pass] = formatDiffLine.Select(val => val.Item1).ToList();
                map[pass]   = formatDiffLine.Select(val => val.Item2).ToList();
            }

            var linesLCS = LCS.GetLCS(lines[0], lines[1], (str1, str2) => (string.IsNullOrWhiteSpace(str1) == string.IsNullOrWhiteSpace(str2)));

            for (var pass = 0; pass < 2; ++pass)
            {
                textData[pass].diffData.LineCompare = linesLCS.Select(val => val[pass]).ToList();
                for (var ctr = 0; ctr < linesLCS.Count; ++ctr)
                {
                    if (linesLCS[ctr][pass] == LCS.MatchType.Gap)
                    {
                        textData[pass].lineOffset.Insert(ctr, textData[pass].lineOffset[ctr]);
                        textData[pass].endingOffset.Insert(ctr, textData[pass].lineOffset[ctr]);
                    }
                }

                textData[pass].diffData.LineMap = new Dictionary <int, int>();
                var pos = -1;
                for (var line = 0; line < linesLCS.Count; ++line)
                {
                    if (linesLCS[line][pass] != LCS.MatchType.Gap)
                    {
                        ++pos;
                    }
                    textData[pass].diffData.LineMap[line] = pos;
                }
                textData[pass].diffData.LineRevMap = textData[pass].diffData.LineMap.GroupBy(pair => pair.Value).ToDictionary(group => group.Key, group => group.Min(pair => pair.Key));
                textData[pass].diffData.ColCompare = new List <Tuple <int, int> > [linesLCS.Count];
            }

            var curLine = new int[] { -1, -1 };

            for (var line = 0; line < textData0.diffData.ColCompare.Length; ++line)
            {
                for (var pass = 0; pass < 2; ++pass)
                {
                    ++curLine[pass];
                }

                if (linesLCS[line].IsMatch)
                {
                    continue;
                }

                var skip = false;
                for (var pass = 0; pass < 2; ++pass)
                {
                    if (linesLCS[line][pass] == LCS.MatchType.Gap)
                    {
                        --curLine[pass];
                        textData[1 - pass].diffData.ColCompare[line] = new List <Tuple <int, int> > {
                            Tuple.Create(0, int.MaxValue)
                        };
                        skip = true;
                    }
                }
                if (skip)
                {
                    continue;
                }

                var colsLCS = LCS.GetLCS(lines[0][curLine[0]], lines[1][curLine[1]], (ch1, ch2) => (char.IsLetterOrDigit(ch1) && char.IsLetterOrDigit(ch2)) || (char.IsWhiteSpace(ch1) && char.IsWhiteSpace(ch2)));

                for (var pass = 0; pass < 2; ++pass)
                {
                    var start = default(int?);
                    var pos   = -1;
                    textData[pass].diffData.ColCompare[line] = new List <Tuple <int, int> >();
                    for (var ctr = 0; ctr <= colsLCS.Count; ++ctr)
                    {
                        if ((ctr == colsLCS.Count) || (colsLCS[ctr][pass] != LCS.MatchType.Gap))
                        {
                            ++pos;
                        }

                        if ((ctr == colsLCS.Count) || (colsLCS[ctr].IsMatch))
                        {
                            if (start.HasValue)
                            {
                                textData[pass].diffData.ColCompare[line].Add(Tuple.Create(map[pass][curLine[pass]][start.Value], map[pass][curLine[pass]][pos]));
                            }
                            start = null;
                            continue;
                        }

                        if (colsLCS[ctr][pass] == LCS.MatchType.Mismatch)
                        {
                            start = start ?? pos;
                        }
                    }
                }
            }
        }