Example #1
0
        /// <summary>
        /// Initializes a new instance of the <see cref="StringMap" /> class.
        /// </summary>
        /// <param name="input">The input.</param>
        /// <param name="options">The options.</param>
        /// <exception cref="ArgumentNullException">
        ///   <paramref name="input" /> is <see langword="null" />.</exception>
        /// <exception cref="ArgumentOutOfRangeException">
        ///   <paramref name="options" /> are invalid.</exception>
        internal StringMap(
            [NotNull] string input,
            TextOptions options = TextOptions.Default)
        {
            if (input == null)
            {
                throw new ArgumentNullException(nameof(input));
            }
            if (options == (TextOptions.Trim | TextOptions.CollapseWhiteSpace))
            {
                throw new ArgumentOutOfRangeException(nameof(options));
            }
            Original = input;
            Options  = options;
            int length = input.Length;

            if (length < 1)
            {
                Mapped = string.Empty;
                Map    = _emptyMap;
                return;
            }
            if (options == TextOptions.None)
            {
                Mapped = input;
                Map    = new[] { 0, length, length };
                return;
            }

            StringBuilder builder = new StringBuilder(Original.Length);
            List <int>    map     = new List <int>(4);

            int  i      = 0;
            int  offset = 0;
            int  count  = 0;
            char c;
            bool skip;

            // Logic for adding a map
            Action <int, int> addMap = (s, e) =>
            {
                int len = e - s;
                count += len;
                map.Add(s);
                map.Add(len);
                map.Add(count);
            };

            // ReSharper disable EventExceptionNotDocumented
            switch (options)
            {
            /*
             * Simplest case is to skip all white space - this is less useful in that it effectively ignores word
             * boundaries.  Note this implicitly 'trims', 'normalizes line endings' and 'collapses white space' as
             * ALL white space is ignored.
             */
            case TextOptions.IgnoreWhiteSpace:
                while (i < length)
                {
                    c = Original[i++];
                    if (char.IsWhiteSpace(c))
                    {
                        if (i - offset > 1)
                        {
                            addMap(offset, i - 1);
                        }
                        offset = i;
                        continue;
                    }
                    builder.Append(c);
                }
                if (i - offset > 0)
                {
                    addMap(offset, i);
                }
                break;

            /*
             * Collapse white space ignores all but the first white space character, effectively preserving the
             * word boundaries.  Note this implicitly 'normalizes line endings' as only the first character of
             * white space is preserved, so only on line ending character will be kept.
             *
             * It makes not sense to allow this in conjunction with trim as line breaks are effectively treated the
             * same as any white space.
             */
            case TextOptions.CollapseWhiteSpace:
                skip = false;
                while (i < length)
                {
                    c = Original[i++];
                    if (char.IsWhiteSpace(c))
                    {
                        if (!skip)
                        {
                            skip = true;
                            builder.Append(c);
                            continue;
                        }
                        if (i - offset > 1)
                        {
                            addMap(offset, i - 1);
                        }
                        offset = i;
                        continue;
                    }
                    builder.Append(c);
                    skip = false;
                }
                if (i - offset > 0)
                {
                    addMap(offset, i);
                }
                break;

            /*
             * Trim and NormalizeLineEndings both require line detection and so we treat together, they can also
             * be used in combination.
             */
            case TextOptions.Trim:
            case TextOptions.NormalizeLineEndings:
            case TextOptions.Trim | TextOptions.NormalizeLineEndings:
                bool normalizeLineEndings = options.HasFlag(TextOptions.NormalizeLineEndings);
                bool trim = options.HasFlag(TextOptions.Trim);

                // Skip is used to indicate we've had the first character in a line.
                skip = false;

                // The index after the last non-white space character
                StringBuilder trailingWhiteSpace = new StringBuilder(16);
                while (i < length)
                {
                    c = Original[i++];
                    if (char.IsWhiteSpace(c))
                    {
                        bool isNewLine = c == '\n';
                        // Detect line endings
                        if (isNewLine || c == '\r')
                        {
                            if (i < 2 || !skip || !normalizeLineEndings || !isNewLine)
                            {
                                if (!skip && (i - offset > 1))
                                {
                                    int end = i;

                                    // Trim trailing white space if any
                                    if (trim && trailingWhiteSpace.Length > 0)
                                    {
                                        end -= trailingWhiteSpace.Length;
                                        trailingWhiteSpace.Clear();
                                    }
                                    if (end - offset > 1)
                                    {
                                        addMap(offset, end - 1);
                                    }
                                    offset = i - 1;
                                }
                                // Preserve first line ending when normalizing or all line endings.
                                skip = true;
                                builder.Append(c);
                                continue;
                            }

                            // Skip second line ending character if == '\n'
                            if (i - offset > 1)
                            {
                                addMap(offset, i - 1);
                            }
                            offset = i;
                            continue;
                        }
                        if (trim)
                        {
                            if (i < 2 || skip)
                            {
                                skip = true;
                                // White space at start of line.
                                if (i - offset > 1)
                                {
                                    addMap(offset, i - 1);
                                }
                                offset = i;
                                continue;
                            }

                            // Hold whitespace in temporary builder.
                            trailingWhiteSpace.Append(c);
                            continue;
                        }
                    }

                    if (trailingWhiteSpace.Length > 0)
                    {
                        builder.Append(trailingWhiteSpace);
                        trailingWhiteSpace.Clear();
                    }
                    builder.Append(c);
                    skip = false;
                }

                i -= trailingWhiteSpace.Length;
                trailingWhiteSpace.Clear();

                if (i - offset > 0)
                {
                    addMap(offset, i);
                }
                break;

            default:
                throw new ArgumentOutOfRangeException(nameof(options), options, null);
            }
            // ReSharper restore EventExceptionNotDocumented

            if (map.Count < 1)
            {
                Mapped = string.Empty;
                Map    = _emptyMap;
                return;
            }
            // Store mapped string and the map.
            Mapped = builder.ToString();
            Map    = map.ToArray();

#if false
            // Create map
            List <int> map = new List <int>();
            bool       ignoreWhiteSpace     = options.HasFlag(TextOptions.IgnoreWhiteSpace);
            bool       collapseWhiteSpace   = ignoreWhiteSpace || options.HasFlag(TextOptions.CollapseWhiteSpace);
            bool       normalizeLineEndings = ignoreWhiteSpace || options.HasFlag(TextOptions.NormalizeLineEndings);
            bool       trim = ignoreWhiteSpace || options.HasFlag(TextOptions.Trim);

            int o    = 0;
            int l    = 0;
            int lnws = -1;
            int i    = 0;
            int end;

            // Logic for adding a map
            Action <int, int> addMap = (int s, int e) =>
            {
                int mc  = map.Count;
                int len = e - s;
                Count += len;
                map.Add(s);
                map.Add(len);
                map.Add(Count);
            };

            // TODO Not working for most tests!

            // ReSharper disable EventExceptionNotDocumented
            while (i < length)
            {
                char c = Original[i++];

                // Check for white space
                if (char.IsWhiteSpace(c))
                {
                    if (collapseWhiteSpace && (ignoreWhiteSpace || lnws < i - 2))
                    {
                        if (lnws - o >= 0 && lnws >= 0)
                        {
                            addMap(o, 1 + lnws);
                        }

                        o    = ignoreWhiteSpace ? i : i - 1;
                        lnws = -1;
                        continue;
                    }
                    if ((c == '\r' || c == '\n') && (i < length))
                    {
                        c = Original[i];
                        if (c == '\r' || c == '\n')
                        {
                            end = trim ? lnws : i - 1;
                            if (end - o >= 0)
                            {
                                addMap(o, 1 + end);
                            }

                            i++;
                            o    = i;
                            lnws = -1;

                            // TODO We have a line ending
                            continue;
                        }
                    }

                    if (trim && lnws < 0)
                    {
                        o = i;
                    }
                }
                else
                {
                    lnws = i - 1;
                }
            }

            end = ignoreWhiteSpace || trim ? lnws : i - 1;
            if (end - o >= 0)
            {
                addMap(o, 1 + end);
            }

            // ReSharper restore EventExceptionNotDocumented
            // Store map
            Map = map.ToArray();
#endif
        }
        /// <summary>
        /// Initializes a new instance of the <see cref="StringDifferences" /> class.
        /// </summary>
        /// <param name="a">The 'A' string.</param>
        /// <param name="offsetA">The offset to the start of a window in the first string.</param>
        /// <param name="lengthA">The length of the window in the first string.</param>
        /// <param name="b">The 'B' string.</param>
        /// <param name="offsetB">The offset to the start of a window in the second string.</param>
        /// <param name="lengthB">The length of the window in the second string.</param>
        /// <param name="textOptions">The text options.</param>
        /// <param name="comparer">The character comparer.</param>
        /// <exception cref="ArgumentNullException"><paramref name="a" /> is <see langword="null" />.</exception>
        /// <exception cref="ArgumentNullException"><paramref name="b" /> is <see langword="null" />.</exception>
        /// <exception cref="ArgumentNullException"><paramref name="comparer" /> is <see langword="null" />.</exception>
        /// <exception cref="ArgumentOutOfRangeException">The <paramref name="offsetA" /> is out of range.</exception>
        /// <exception cref="ArgumentOutOfRangeException">The <paramref name="lengthA" /> is out of range.</exception>
        /// <exception cref="ArgumentOutOfRangeException">The <paramref name="offsetB" /> is out of range.</exception>
        /// <exception cref="ArgumentOutOfRangeException">The <paramref name="lengthB" /> is out of range.</exception>
        /// <exception cref="Exception">The <paramref name="comparer" /> throws an exception.</exception>
        internal StringDifferences(
            [NotNull] string a,
            int offsetA,
            int lengthA,
            [NotNull] string b,
            int offsetB,
            int lengthB,
            TextOptions textOptions,
            [NotNull] Func <char, char, bool> comparer)
        {
            if (a == null)
            {
                throw new ArgumentNullException(nameof(a));
            }
            if (b == null)
            {
                throw new ArgumentNullException(nameof(b));
            }
            if (comparer == null)
            {
                throw new ArgumentNullException(nameof(comparer));
            }
            A = a;
            B = b;

            if (textOptions != TextOptions.None)
            {
                // Wrap the comparer with an additional check to handle special characters.
                Func <char, char, bool> oc = comparer;
                if (textOptions.HasFlag(TextOptions.IgnoreWhiteSpace))
                {
                    // Ignore white space - treat all whitespace as the same (note this will handle line endings too).
                    comparer = (x, y) => char.IsWhiteSpace(x) ? char.IsWhiteSpace(y) : oc(x, y);
                }
                else if (textOptions.HasFlag(TextOptions.NormalizeLineEndings))
                {
                    // Just normalize line endings - treat '\r' and '\n\ as the same
                    comparer = (x, y) => x == '\r' || x == '\n' ? y == '\r' || y == '\n' : oc(x, y);
                }
            }

            // Map strings based on text options
            StringMap aMap = a.ToMapped(textOptions);
            StringMap bMap = b.ToMapped(textOptions);

            // Perform diff on mapped string
            Differences <char> chunks = aMap.Diff(bMap, comparer);

            // Special case simple equality
            if (chunks.Count < 2)
            {
                Chunk <char> chunk = chunks.Single();
                // ReSharper disable once PossibleNullReferenceException
                _chunks = new[] { new StringChunk(chunk.AreEqual, a, 0, b, 0) };
                return;
            }

            // To reverse the mapping we first calculate the split points in the original strings, and find
            // the last reference to the original strings in each chunk.
            int[] aEnds = new int[chunks.Count];
            int[] bEnds = new int[chunks.Count];
            int   lastA = 0;
            int   lastB = 0;

            for (int i = 0; i < chunks.Count; i++)
            {
                Chunk <char>          chunk  = chunks[i];
                ReadOnlyWindow <char> chunkA = chunk.A;
                ReadOnlyWindow <char> chunkB = chunk.B;
                if (chunk.A != null)
                {
                    aEnds[i] = aMap.GetOriginalIndex(chunkA.Offset + chunkA.Count - 1) + 1;
                    lastA    = i;
                }
                else
                {
                    aEnds[i] = -1;
                }

                if (chunk.B != null)
                {
                    bEnds[i] = bMap.GetOriginalIndex(chunkB.Offset + chunkB.Count - 1) + 1;
                    lastB    = i;
                }
                else
                {
                    bEnds[i] = -1;
                }
            }

            // Now we're ready to build up a new chunk array based on the original strings
            StringChunk[] stringChunks = new StringChunk[chunks.Count];
            int           aStart       = 0;
            int           bStart       = 0;

            for (int i = 0; i < chunks.Count; i++)
            {
                int aEnd = i == lastA ? aMap.OriginalCount : aEnds[i];
                int bEnd = i == lastB ? bMap.OriginalCount : bEnds[i];

                string ac = aEnd > -1 ? a.Substring(aStart, aEnd - aStart) : null;
                string bc = bEnd > -1 ? b.Substring(bStart, bEnd - bStart) : null;

                stringChunks[i] = new StringChunk(chunks[i].AreEqual, ac, aEnd > -1 ? aStart : -1, bc, bEnd > -1 ? bStart : -1);

                if (aEnd > -1)
                {
                    aStart = aEnd;
                }
                if (bEnd > -1)
                {
                    bStart = bEnd;
                }
            }

            _chunks = stringChunks;
        }