static int GetRepetitionScore(List <string> data, int lines) { var count = data.Count / lines; if (count * lines != data.Count) { return(0); } var score = 0; for (var repetition = 1; repetition < count; ++repetition) { for (var index = 0; index < lines; ++index) { score += LCS.GetLCS(data[index], data[repetition * lines + index]).Count(val => val[0] == LCS.MatchType.Match); } } return(score); }
static public Tuple <List <Tuple <int, int> >, List <string> > GetDiffFixes(TextData src, TextData dest, int lineStartTabStop, bool?ignoreWhitespace, bool?ignoreCase, bool?ignoreNumbers, bool?ignoreLineEndings, string ignoreCharacters) { var textData = new TextData[] { src, dest }; var lineMap = new Dictionary <int, int> [2]; var lines = new List <string> [2]; var textLines = new List <string> [2]; var diffParams = new DiffParams(ignoreWhitespace ?? true, ignoreCase ?? true, ignoreNumbers ?? true, ignoreLineEndings ?? true, ignoreCharacters, lineStartTabStop); for (var pass = 0; pass < 2; ++pass) { lineMap[pass] = Enumerable.Range(0, textData[pass].NumLines).Indexes(line => textData[pass].diffData?.LineCompare[line] != LCS.MatchType.Gap).Select((index1, index2) => new { index1, index2 }).ToDictionary(obj => obj.index2, obj => obj.index1); lines[pass] = lineMap[pass].Values.Select(line => textData[pass].GetLine(line, true)).ToList(); textLines[pass] = lines[pass].Select(line => diffParams.FormatLine(line).Item1).ToList(); } var linesLCS = LCS.GetLCS(textLines[0], textLines[1], (str1, str2) => string.IsNullOrWhiteSpace(str1) == string.IsNullOrWhiteSpace(str2)); var ranges = new List <Tuple <int, int> >(); var strs = new List <string>(); var curLine = new int[] { -1, -1 }; diffParams = new DiffParams(ignoreWhitespace ?? false, ignoreCase ?? false, ignoreNumbers ?? false, ignoreLineEndings ?? src.OnlyEnding != null, ignoreCharacters); for (var line = 0; line < linesLCS.Count; ++line) { var mappedCurLine = new int[2]; for (var pass = 0; pass < 2; ++pass) { if (linesLCS[line][pass] != LCS.MatchType.Gap) { ++curLine[pass]; mappedCurLine[pass] = lineMap[pass][curLine[pass]]; } } if (linesLCS[line].IsMatch) { var colLines = new string[2]; var map = new List <int> [2]; for (var pass = 0; pass < 2; ++pass) { var formatDiffLine = diffParams.FormatLine(lines[pass][curLine[pass]]); colLines[pass] = formatDiffLine.Item1; map[pass] = formatDiffLine.Item2; } if (colLines[0] != colLines[1]) { var colsLCS = LCS.GetLCS(colLines[0], colLines[1]); for (var pass = 0; pass < 2; ++pass) { var start = default(int?); var pos = -1; for (var ctr = 0; ctr <= colsLCS.Count; ++ctr) { if ((ctr == colsLCS.Count) || (colsLCS[ctr][pass] != LCS.MatchType.Gap)) { ++pos; } if ((ctr == colsLCS.Count) || (colsLCS[ctr].IsMatch)) { if (start.HasValue) { var lineOffset = textData[pass].GetOffset(mappedCurLine[pass], 0); var begin = lineOffset + map[pass][start.Value]; var end = lineOffset + map[pass][pos]; if (pass == 0) { strs.Add(textData[pass].GetString(begin, end - begin)); } else { ranges.Add(Tuple.Create(begin, end)); } start = null; } continue; } start = start ?? pos + (colsLCS[ctr][pass] == LCS.MatchType.Gap ? 1 : 0); } } } } if ((ignoreLineEndings == null) && (src.OnlyEnding != null) && (linesLCS[line][1] != LCS.MatchType.Gap)) { var endingStart = dest.endingOffset[mappedCurLine[1]]; var endingEnd = dest.lineOffset[mappedCurLine[1] + 1]; if (endingStart == endingEnd) { continue; } if (dest.Data.Substring(endingStart, endingEnd - endingStart) != src.OnlyEnding) { ranges.Add(Tuple.Create(endingStart, endingEnd)); strs.Add(src.OnlyEnding); } } } return(Tuple.Create(ranges, strs)); }
public static void CalculateDiff(TextData textData0, TextData textData1, bool ignoreWhitespace, bool ignoreCase, bool ignoreNumbers, bool ignoreLineEndings, string ignoreCharacters) { var diffParams = new DiffParams(ignoreWhitespace, ignoreCase, ignoreNumbers, ignoreLineEndings, ignoreCharacters); if ((textData0.diffData != null) && (textData1.diffData != null) && (textData0.diffData.Data == textData0.Data) && (textData1.diffData.Data == textData1.Data) && (textData0.diffData.DiffParams.Equals(diffParams)) && (textData1.diffData.DiffParams.Equals(diffParams))) { return; } var textData = new TextData[] { textData0, textData1 }; var lines = new List <string> [2]; var map = new List <List <int> > [2]; for (var pass = 0; pass < 2; ++pass) { textData[pass].ClearDiff(); textData[pass].diffData = new DiffData(textData[pass].Data, diffParams); var formatDiffLine = Enumerable.Range(0, textData[pass].NumLines).Select(line => diffParams.FormatLine(textData[pass].GetLine(line, true))).ToList(); lines[pass] = formatDiffLine.Select(val => val.Item1).ToList(); map[pass] = formatDiffLine.Select(val => val.Item2).ToList(); } var linesLCS = LCS.GetLCS(lines[0], lines[1], (str1, str2) => (string.IsNullOrWhiteSpace(str1) == string.IsNullOrWhiteSpace(str2))); for (var pass = 0; pass < 2; ++pass) { textData[pass].diffData.LineCompare = linesLCS.Select(val => val[pass]).ToList(); for (var ctr = 0; ctr < linesLCS.Count; ++ctr) { if (linesLCS[ctr][pass] == LCS.MatchType.Gap) { textData[pass].lineOffset.Insert(ctr, textData[pass].lineOffset[ctr]); textData[pass].endingOffset.Insert(ctr, textData[pass].lineOffset[ctr]); } } textData[pass].diffData.LineMap = new Dictionary <int, int>(); var pos = -1; for (var line = 0; line < linesLCS.Count; ++line) { if (linesLCS[line][pass] != LCS.MatchType.Gap) { ++pos; } textData[pass].diffData.LineMap[line] = pos; } textData[pass].diffData.LineRevMap = textData[pass].diffData.LineMap.GroupBy(pair => pair.Value).ToDictionary(group => group.Key, group => group.Min(pair => pair.Key)); textData[pass].diffData.ColCompare = new List <Tuple <int, int> > [linesLCS.Count]; } var curLine = new int[] { -1, -1 }; for (var line = 0; line < textData0.diffData.ColCompare.Length; ++line) { for (var pass = 0; pass < 2; ++pass) { ++curLine[pass]; } if (linesLCS[line].IsMatch) { continue; } var skip = false; for (var pass = 0; pass < 2; ++pass) { if (linesLCS[line][pass] == LCS.MatchType.Gap) { --curLine[pass]; textData[1 - pass].diffData.ColCompare[line] = new List <Tuple <int, int> > { Tuple.Create(0, int.MaxValue) }; skip = true; } } if (skip) { continue; } var colsLCS = LCS.GetLCS(lines[0][curLine[0]], lines[1][curLine[1]], (ch1, ch2) => (char.IsLetterOrDigit(ch1) && char.IsLetterOrDigit(ch2)) || (char.IsWhiteSpace(ch1) && char.IsWhiteSpace(ch2))); for (var pass = 0; pass < 2; ++pass) { var start = default(int?); var pos = -1; textData[pass].diffData.ColCompare[line] = new List <Tuple <int, int> >(); for (var ctr = 0; ctr <= colsLCS.Count; ++ctr) { if ((ctr == colsLCS.Count) || (colsLCS[ctr][pass] != LCS.MatchType.Gap)) { ++pos; } if ((ctr == colsLCS.Count) || (colsLCS[ctr].IsMatch)) { if (start.HasValue) { textData[pass].diffData.ColCompare[line].Add(Tuple.Create(map[pass][curLine[pass]][start.Value], map[pass][curLine[pass]][pos])); } start = null; continue; } if (colsLCS[ctr][pass] == LCS.MatchType.Mismatch) { start = start ?? pos; } } } } }