/// <summary> /// Find the difference in 2 text documents, comparing by textlines. /// The algorithm itself is comparing 2 arrays of numbers so when comparing 2 text documents /// each line is converted into a (hash) number. This hash-value is computed by storing all /// textlines into a common hashtable so i can find dublicates in there, and generating a /// new number each time a new textline is inserted. /// </summary> /// <param name="TextA">A-version of the text (usualy the old one)</param> /// <param name="TextB">B-version of the text (usualy the new one)</param> /// <param name="trimSpace">When set to true, all leading and trailing whitespace characters are stripped out before the comparation is done.</param> /// <param name="ignoreSpace">When set to true, all whitespace characters are converted to a single space character before the comparation is done.</param> /// <param name="ignoreCase">When set to true, all characters are converted to their lowercase equivivalence before the comparation is done.</param> /// <returns>Returns a array of Items that describe the differences.</returns> static Item[] DiffText(LineCollection a, LineCollection b, DiffOptions options) { // prepare the input-text and convert to comparable numbers. Hashtable h = new Hashtable(a.Count + b.Count); // The A-Version of the data (original data) to be compared. DiffData DataA = new DiffData(DiffCodes(a, h, options)); // The B-Version of the data (modified data) to be compared. DiffData DataB = new DiffData(DiffCodes(b, h, options)); h = null; // free up hashtable memory (maybe) int MAX = DataA.Length + DataB.Length + 1; /// vector for the (0,0) to (x,y) search int[] DownVector = new int[2 * MAX + 2]; /// vector for the (u,v) to (N,M) search int[] UpVector = new int[2 * MAX + 2]; LCS(DataA, 0, DataA.Length, DataB, 0, DataB.Length, DownVector, UpVector); Optimize(DataA); Optimize(DataB); return CreateDiffs(DataA, DataB); } // DiffText
} // Diff /// <summary> /// This function converts all textlines of the text into unique numbers for every unique textline /// so further work can work only with simple numbers. /// </summary> /// <param name="aText">the input text</param> /// <param name="h">This extern initialized hashtable is used for storing all ever used textlines.</param> /// <param name="trimSpace">ignore leading and trailing space characters</param> /// <returns>a array of integers.</returns> private static int[] DiffCodes(LineCollection lines, Hashtable h, DiffOptions options) { // get all codes of the text int lastUsedCode = h.Count; int []codes = new int[lines.Count]; for (int i = 0; i < lines.Count; ++i) { string s = lines[i].Text; if (options != null && options.IgnoreWhitespace) { s = Regex.Replace(s, "\\s+", " "); // TODO: optimization: faster blank removal. } if (options != null && options.IgnoreCase) s = s.ToLower(); object aCode = h[s]; if (aCode == null) { lastUsedCode++; h[s] = lastUsedCode; codes[i] = lastUsedCode; } else { codes[i] = (int)aCode; } // if } // for return codes; } // DiffCodes
static public void Main(string[] args) { LineCollection l1 = new LineCollection(); l1.Open(args[0]); LineCollection l2 = new LineCollection(); l2.Open(args[1]); Item[] diffs = DiffText(l1, l2, null); if (true) { int context = 3; for (int i = 0; i < diffs.Length; ++i) { //Console.WriteLine(diffs[i]); Console.WriteLine("@@ -{0},{1} +{2},{3} @@", diffs[i].StartA + 1, 99, diffs[i].StartB, 99); int firstContext = diffs[i].StartA - context; if (firstContext < 0) firstContext = 0; for (int line = firstContext; line < diffs[i].StartA; ++line) { Console.Write(" "); Console.WriteLine(l1[line].Text); } for (int j = 0; j < diffs[i].DeletedA; ++j) { Console.Write("-"); Console.WriteLine(l1[diffs[i].StartA + j].Text); } for (int j = 0; j < diffs[i].InsertedB; ++j) { Console.Write("+"); Console.WriteLine(l2[diffs[i].StartB + j].Text); } } } else { for (int i = 0; i < diffs.Length; ++i) { Console.WriteLine(diffs[i]); for (int j = 0; j < diffs[i].DeletedA; ++j) { Console.Write("< "); Console.WriteLine(l1[diffs[i].StartA + j].Text); } Console.WriteLine("---"); for (int j = 0; j < diffs[i].InsertedB; ++j) { Console.Write("> "); Console.WriteLine(l2[diffs[i].StartB + j].Text); } } } }