/// <summary> /// Initializes a new instance of the <see cref="StringMap" /> class. /// </summary> /// <param name="input">The input.</param> /// <param name="options">The options.</param> /// <exception cref="ArgumentNullException"> /// <paramref name="input" /> is <see langword="null" />.</exception> /// <exception cref="ArgumentOutOfRangeException"> /// <paramref name="options" /> are invalid.</exception> internal StringMap( [NotNull] string input, TextOptions options = TextOptions.Default) { if (input == null) { throw new ArgumentNullException(nameof(input)); } if (options == (TextOptions.Trim | TextOptions.CollapseWhiteSpace)) { throw new ArgumentOutOfRangeException(nameof(options)); } Original = input; Options = options; int length = input.Length; if (length < 1) { Mapped = string.Empty; Map = _emptyMap; return; } if (options == TextOptions.None) { Mapped = input; Map = new[] { 0, length, length }; return; } StringBuilder builder = new StringBuilder(Original.Length); List <int> map = new List <int>(4); int i = 0; int offset = 0; int count = 0; char c; bool skip; // Logic for adding a map Action <int, int> addMap = (s, e) => { int len = e - s; count += len; map.Add(s); map.Add(len); map.Add(count); }; // ReSharper disable EventExceptionNotDocumented switch (options) { /* * Simplest case is to skip all white space - this is less useful in that it effectively ignores word * boundaries. Note this implicitly 'trims', 'normalizes line endings' and 'collapses white space' as * ALL white space is ignored. */ case TextOptions.IgnoreWhiteSpace: while (i < length) { c = Original[i++]; if (char.IsWhiteSpace(c)) { if (i - offset > 1) { addMap(offset, i - 1); } offset = i; continue; } builder.Append(c); } if (i - offset > 0) { addMap(offset, i); } break; /* * Collapse white space ignores all but the first white space character, effectively preserving the * word boundaries. Note this implicitly 'normalizes line endings' as only the first character of * white space is preserved, so only on line ending character will be kept. * * It makes not sense to allow this in conjunction with trim as line breaks are effectively treated the * same as any white space. */ case TextOptions.CollapseWhiteSpace: skip = false; while (i < length) { c = Original[i++]; if (char.IsWhiteSpace(c)) { if (!skip) { skip = true; builder.Append(c); continue; } if (i - offset > 1) { addMap(offset, i - 1); } offset = i; continue; } builder.Append(c); skip = false; } if (i - offset > 0) { addMap(offset, i); } break; /* * Trim and NormalizeLineEndings both require line detection and so we treat together, they can also * be used in combination. */ case TextOptions.Trim: case TextOptions.NormalizeLineEndings: case TextOptions.Trim | TextOptions.NormalizeLineEndings: bool normalizeLineEndings = options.HasFlag(TextOptions.NormalizeLineEndings); bool trim = options.HasFlag(TextOptions.Trim); // Skip is used to indicate we've had the first character in a line. skip = false; // The index after the last non-white space character StringBuilder trailingWhiteSpace = new StringBuilder(16); while (i < length) { c = Original[i++]; if (char.IsWhiteSpace(c)) { bool isNewLine = c == '\n'; // Detect line endings if (isNewLine || c == '\r') { if (i < 2 || !skip || !normalizeLineEndings || !isNewLine) { if (!skip && (i - offset > 1)) { int end = i; // Trim trailing white space if any if (trim && trailingWhiteSpace.Length > 0) { end -= trailingWhiteSpace.Length; trailingWhiteSpace.Clear(); } if (end - offset > 1) { addMap(offset, end - 1); } offset = i - 1; } // Preserve first line ending when normalizing or all line endings. skip = true; builder.Append(c); continue; } // Skip second line ending character if == '\n' if (i - offset > 1) { addMap(offset, i - 1); } offset = i; continue; } if (trim) { if (i < 2 || skip) { skip = true; // White space at start of line. if (i - offset > 1) { addMap(offset, i - 1); } offset = i; continue; } // Hold whitespace in temporary builder. trailingWhiteSpace.Append(c); continue; } } if (trailingWhiteSpace.Length > 0) { builder.Append(trailingWhiteSpace); trailingWhiteSpace.Clear(); } builder.Append(c); skip = false; } i -= trailingWhiteSpace.Length; trailingWhiteSpace.Clear(); if (i - offset > 0) { addMap(offset, i); } break; default: throw new ArgumentOutOfRangeException(nameof(options), options, null); } // ReSharper restore EventExceptionNotDocumented if (map.Count < 1) { Mapped = string.Empty; Map = _emptyMap; return; } // Store mapped string and the map. Mapped = builder.ToString(); Map = map.ToArray(); #if false // Create map List <int> map = new List <int>(); bool ignoreWhiteSpace = options.HasFlag(TextOptions.IgnoreWhiteSpace); bool collapseWhiteSpace = ignoreWhiteSpace || options.HasFlag(TextOptions.CollapseWhiteSpace); bool normalizeLineEndings = ignoreWhiteSpace || options.HasFlag(TextOptions.NormalizeLineEndings); bool trim = ignoreWhiteSpace || options.HasFlag(TextOptions.Trim); int o = 0; int l = 0; int lnws = -1; int i = 0; int end; // Logic for adding a map Action <int, int> addMap = (int s, int e) => { int mc = map.Count; int len = e - s; Count += len; map.Add(s); map.Add(len); map.Add(Count); }; // TODO Not working for most tests! // ReSharper disable EventExceptionNotDocumented while (i < length) { char c = Original[i++]; // Check for white space if (char.IsWhiteSpace(c)) { if (collapseWhiteSpace && (ignoreWhiteSpace || lnws < i - 2)) { if (lnws - o >= 0 && lnws >= 0) { addMap(o, 1 + lnws); } o = ignoreWhiteSpace ? i : i - 1; lnws = -1; continue; } if ((c == '\r' || c == '\n') && (i < length)) { c = Original[i]; if (c == '\r' || c == '\n') { end = trim ? lnws : i - 1; if (end - o >= 0) { addMap(o, 1 + end); } i++; o = i; lnws = -1; // TODO We have a line ending continue; } } if (trim && lnws < 0) { o = i; } } else { lnws = i - 1; } } end = ignoreWhiteSpace || trim ? lnws : i - 1; if (end - o >= 0) { addMap(o, 1 + end); } // ReSharper restore EventExceptionNotDocumented // Store map Map = map.ToArray(); #endif }
/// <summary> /// Initializes a new instance of the <see cref="StringDifferences" /> class. /// </summary> /// <param name="a">The 'A' string.</param> /// <param name="offsetA">The offset to the start of a window in the first string.</param> /// <param name="lengthA">The length of the window in the first string.</param> /// <param name="b">The 'B' string.</param> /// <param name="offsetB">The offset to the start of a window in the second string.</param> /// <param name="lengthB">The length of the window in the second string.</param> /// <param name="textOptions">The text options.</param> /// <param name="comparer">The character comparer.</param> /// <exception cref="ArgumentNullException"><paramref name="a" /> is <see langword="null" />.</exception> /// <exception cref="ArgumentNullException"><paramref name="b" /> is <see langword="null" />.</exception> /// <exception cref="ArgumentNullException"><paramref name="comparer" /> is <see langword="null" />.</exception> /// <exception cref="ArgumentOutOfRangeException">The <paramref name="offsetA" /> is out of range.</exception> /// <exception cref="ArgumentOutOfRangeException">The <paramref name="lengthA" /> is out of range.</exception> /// <exception cref="ArgumentOutOfRangeException">The <paramref name="offsetB" /> is out of range.</exception> /// <exception cref="ArgumentOutOfRangeException">The <paramref name="lengthB" /> is out of range.</exception> /// <exception cref="Exception">The <paramref name="comparer" /> throws an exception.</exception> internal StringDifferences( [NotNull] string a, int offsetA, int lengthA, [NotNull] string b, int offsetB, int lengthB, TextOptions textOptions, [NotNull] Func <char, char, bool> comparer) { if (a == null) { throw new ArgumentNullException(nameof(a)); } if (b == null) { throw new ArgumentNullException(nameof(b)); } if (comparer == null) { throw new ArgumentNullException(nameof(comparer)); } A = a; B = b; if (textOptions != TextOptions.None) { // Wrap the comparer with an additional check to handle special characters. Func <char, char, bool> oc = comparer; if (textOptions.HasFlag(TextOptions.IgnoreWhiteSpace)) { // Ignore white space - treat all whitespace as the same (note this will handle line endings too). comparer = (x, y) => char.IsWhiteSpace(x) ? char.IsWhiteSpace(y) : oc(x, y); } else if (textOptions.HasFlag(TextOptions.NormalizeLineEndings)) { // Just normalize line endings - treat '\r' and '\n\ as the same comparer = (x, y) => x == '\r' || x == '\n' ? y == '\r' || y == '\n' : oc(x, y); } } // Map strings based on text options StringMap aMap = a.ToMapped(textOptions); StringMap bMap = b.ToMapped(textOptions); // Perform diff on mapped string Differences <char> chunks = aMap.Diff(bMap, comparer); // Special case simple equality if (chunks.Count < 2) { Chunk <char> chunk = chunks.Single(); // ReSharper disable once PossibleNullReferenceException _chunks = new[] { new StringChunk(chunk.AreEqual, a, 0, b, 0) }; return; } // To reverse the mapping we first calculate the split points in the original strings, and find // the last reference to the original strings in each chunk. int[] aEnds = new int[chunks.Count]; int[] bEnds = new int[chunks.Count]; int lastA = 0; int lastB = 0; for (int i = 0; i < chunks.Count; i++) { Chunk <char> chunk = chunks[i]; ReadOnlyWindow <char> chunkA = chunk.A; ReadOnlyWindow <char> chunkB = chunk.B; if (chunk.A != null) { aEnds[i] = aMap.GetOriginalIndex(chunkA.Offset + chunkA.Count - 1) + 1; lastA = i; } else { aEnds[i] = -1; } if (chunk.B != null) { bEnds[i] = bMap.GetOriginalIndex(chunkB.Offset + chunkB.Count - 1) + 1; lastB = i; } else { bEnds[i] = -1; } } // Now we're ready to build up a new chunk array based on the original strings StringChunk[] stringChunks = new StringChunk[chunks.Count]; int aStart = 0; int bStart = 0; for (int i = 0; i < chunks.Count; i++) { int aEnd = i == lastA ? aMap.OriginalCount : aEnds[i]; int bEnd = i == lastB ? bMap.OriginalCount : bEnds[i]; string ac = aEnd > -1 ? a.Substring(aStart, aEnd - aStart) : null; string bc = bEnd > -1 ? b.Substring(bStart, bEnd - bStart) : null; stringChunks[i] = new StringChunk(chunks[i].AreEqual, ac, aEnd > -1 ? aStart : -1, bc, bEnd > -1 ? bStart : -1); if (aEnd > -1) { aStart = aEnd; } if (bEnd > -1) { bStart = bEnd; } } _chunks = stringChunks; }