Exemplo n.º 1
0
        /// <summary>
        /// Compares substrings of two specified strings using the specified comparison rules,
        /// and returns an integer that indicates their relative position in the sort order.
        /// </summary>
        /// <param name="strA">The first string to use in the comparison.</param>
        /// <param name="indexA">The zero-based starting character position of the substring within <paramref name="strA"/>.</param>
        /// <param name="lengthA">The number of characters constituting the substring from <paramref name="strA"/>.</param>
        /// <param name="strB">The second string to use in the comparison.</param>
        /// <param name="indexB">The zero-based starting character position of the substring within <paramref name="strB"/>.</param>
        /// <param name="lengthB">The number of characters constituting the substring from <paramref name="strB"/>.</param>
        /// <param name="comparisonType">One of the enumeration values that specifies the rules to use in the comparison.</param>
        /// <returns>
        /// A signed integer that indicates the lexical relationship between the two comparands.
        /// <list type="table">
        /// <listheader>
        /// <term>Value</term>
        /// <term>Condition</term>
        /// </listheader>
        /// <item>
        /// <term>Less than zero</term>
        /// <term>The substring in <paramref name="strA"/> is less than the substring in <paramref name="strB"/>.</term>
        /// </item>
        /// <item>
        /// <term>Zero</term>
        /// <term>The substrings are equal, or <paramref name="lengthA"/> and <paramref name="lengthB"/> are both zero.</term>
        /// </item>
        /// <item>
        /// <term>Greater than zero</term>
        /// <term>The substring in <paramref name="strA"/> is greater than the substring in <paramref name="strB"/>.</term>
        /// </item>
        /// </list>
        /// </returns>
        /// <remarks>
        /// <para>
        /// This method is similar to the <see cref="string.Compare(string, int, string, int, int, StringComparison)"/> method
        /// in the .NET Framework Class Library, but allows different lengths to be specified for the two substrings.
        /// It is implemented by calling the <see cref="CompareInfo.Compare(string, int, int, string, int, int, CompareOptions)"/> method
        /// on the appropriate <see cref="CompareInfo"/> instance with the appropriate <see cref="CompareOptions"/> value
        /// for each known value of <paramref name="comparisonType"/>.
        /// For performance, substring instantiation is avoided, working with the start indexes and lengths instead.
        /// </para>
        /// <para>
        /// The implementation of this method is adapted from the internal implementations for
        /// <see cref="string.Compare(string, int, string, int, int, StringComparison)"/>
        /// (<see href="https://referencesource.microsoft.com/#mscorlib/system/string.cs,1ae4d07b01230bb6">source</see>)
        /// and <see cref="string.IndexOf(string, int, int, StringComparison)"/>
        /// (<see href="https://referencesource.microsoft.com/#mscorlib/system/string.cs,ef82268cfee756fe">source</see>).
        /// </para>
        /// </remarks>
        public static int Compare(string strA, int indexA, int lengthA, string strB, int indexB, int lengthB, StringComparison comparisonType)
        {
            ArgumentValidate.EnumDefined(comparisonType, nameof(comparisonType));

            if (strA == null)
            {
                return(strB == null ? 0 : -1);
            }
            if (strB == null)
            {
                return(1);
            }

            ArgumentValidate.StringIndexLength(strA, nameof(strA), indexA, nameof(indexA), lengthA, nameof(lengthA));
            ArgumentValidate.StringIndexLength(strB, nameof(strB), indexB, nameof(indexB), lengthB, nameof(lengthB));

            if (lengthA == 0 && lengthB == 0)
            {
                return(0);
            }
            if (string.ReferenceEquals(strA, strB) && indexA == indexB && lengthA == lengthB)
            {
                return(0);
            }

            return(CompareInner(strA, indexA, lengthA, strB, indexB, lengthB, comparisonType));
        }
Exemplo n.º 2
0
        /// <summary>
        /// Reports the zero-based index and length of the first occurrence of the specified substring in the source string.
        /// </summary>
        /// <param name="source">The source string in which to search.</param>
        /// <param name="substring">The substring to seek.</param>
        /// <param name="searchIndex">The zero-based starting character position in <paramref name="source"/> to search from.</param>
        /// <param name="searchLength">The number of character positions in <paramref name="source"/> to search through.</param>
        /// <param name="comparisonType">One of the enumeration values that specifies the rules for the search.</param>
        /// <param name="matchIndex">
        /// When this method returns, contains the zero-based starting character position of the match, if found;
        /// or -1 if no match is found.
        /// If <paramref name="substring"/> is the empty string (<c>""</c>), the value will be <paramref name="searchIndex"/>.
        /// </param>
        /// <param name="matchLength">
        /// When this method returns, contains the length (in characters) of the match, if found;
        /// or -1 if no match is found.
        /// If <paramref name="substring"/> is the empty string (<c>""</c>), the value will be 0.
        /// </param>
        /// <remarks>
        /// <para>
        /// This method builds upon the <see cref="string.IndexOf(string, int, int, StringComparison)"/> method
        /// from the .NET Framework Class Library, but extends it to also return the <i>length</i> of the match,
        /// allowing string manipulation operations to subsequently be performed correctly.
        /// </para>
        /// <para>
        /// Culture-sensitive comparisons can result in a match that has a different length from the specified <paramref name="substring"/> parameter.
        /// For example, under the en-US culture, <c>"æ"</c> and <c>"ae"</c> are considered equal.
        /// <c>"Encyclopædia".IndexOf("aedia")</c> evaluates to 8, indicating a match.
        /// However, the length of the matched substring, <c>"ædia"</c>, is 4, whilst the length of the searched-for parameter, <c>"aedia"</c>, is 5.
        /// This can lead to subtle bugs. Consider the following code for removing the first occurrence of substring from a string,
        /// taken from a <see href="http://stackoverflow.com/a/2201648/1149773">highly-upvoted answer</see> on Stack Overflow:
        /// <code>
        /// int index = sourceString.IndexOf(removeString);
        /// string cleanPath = index &lt; 0 ? sourceString : sourceString.Remove(index, removeString.Length);
        /// </code>
        /// If one were to run the above code snippet with <c>sourceString = "Encyclopædia"</c> and <c>removeString = "aedia"</c>,
        /// then it would throw an <see cref="ArgumentOutOfRangeException"/>.
        /// On the other hand, one would get correct results by using the current extension method:
        /// <code>
        /// int index, length;
        /// sourceString.Find(removeString, StringComparison.CurrentCulture, out index, out length);
        /// string cleanPath = index &lt; 0 ? sourceString : sourceString.Remove(index, length);
        /// </code>
        /// </para>
        /// <para>
        /// There is no public functionality provided in the .NET Framework Class Library that performs such substring searches.
        /// The current method first calls <see cref="string.IndexOf(string, int, int, StringComparison)"/> to get the
        /// starting position of the match, then iteratively attempts to identify its length.
        /// It begins with the most likely case (hot path) of the match having the same length as <paramref name="substring"/>,
        /// verifying this through a call to <see cref="SubstringCompare.Compare(string, int, int, string, int, int, StringComparison)"/>.
        /// If not equal, it would attempt to decrement and increment the length of the match by one character each time,
        /// calling the aforementioned method until equality is confirmed.
        /// </para>
        /// <para>
        /// The approach of iterating over the substring's length is endorsed by
        /// <see href="http://stackoverflow.com/questions/15980310/how-can-i-perform-a-culture-sensitive-starts-with-operation-from-the-middle-of/16001302?noredirect=1#comment22956089_16062528">usr</see>:
        /// </para>
        /// <blockquote>
        /// I have solved a similar problem once like this (search-string highlighting in HTML). I did it similarly.
        /// You can tune the loop and search strategy in a way that makes it completed very quickly by checking the likely cases first.
        /// The nice thing about this is that it seems to be totally correct and no Unicode details leak into your code.
        /// </blockquote>
        /// <para>
        /// An alternative to this approach sacrifices portability for performance by executing a P/Invoke call to the
        /// <see href="https://msdn.microsoft.com/en-us/library/dd318056%28v=vs.85%29.aspx"><c>FindNLSString</c></see> function
        /// (or related), as is done internally within the <see cref="string"/> class implementation.
        /// This approach is described under <see href="http://stackoverflow.com/a/20484094/1149773">this Stack Overflow answer</see>.
        /// </para>
        /// <para>
        /// Another alternative approach involves subjecting the strings to Unicode normalization
        /// (through the <see cref="string.Normalize(NormalizationForm)"/> method) before comparison,
        /// as suggested in <see href="http://stackoverflow.com/a/16001302/1149773">this Stack Overflow answer</see>.
        /// However, this approach is undesirable since the returned results would only apply to the <i>normalized</i> forms
        /// of <paramref name="source"/> and <paramref name="substring"/>, requiring the original strings to be discarded
        /// and replaced by their normalized forms for all subsequent processing and storage.
        /// </para>
        /// <para>
        /// Furthermore, Unicode normalization would not always yield results consistent with <see cref="string.Compare(string, string)"/>
        /// or <see cref="string.Equals(string, string, StringComparison)"/> under <see cref="StringComparison.CurrentCulture"/>.
        /// As discussed under <see href="http://unicode.org/reports/tr15/">Unicode Normalization Forms</see>,
        /// <see cref="NormalizationForm.FormC"/> and <see cref="NormalizationForm.FormD"/> only support <i>canonical</i> mappings,
        /// such as between precomposed characters and combining character sequences – for example, <c>"é"</c> and <c>"e\u0301"</c>.
        /// However, the said forms do not perform <i>compatibility</i> mappings, as is required for ligatures.
        /// For example, <c>"æ"</c> is not decomposed to <c>"ae"</c>, nor <c>"ffi"</c> to <c>"ffi"</c>, despite that
        /// the said ligatures are considered to be equal to their corresponding character sequences under the en-US culture.
        /// <see cref="NormalizationForm.FormKC"/> and <see cref="NormalizationForm.FormKD"/> handle compatibility mappings,
        /// and can decompose some ligatures, such as <c>"ffi"</c>, but miss others, such as <c>"æ"</c>.
        /// (A <see href="http://stackoverflow.com/a/15485970/1149773">Stack Overflow answer</see> mentions that
        /// “Unicode 6.2 doesn't appear to contain a normative mapping from Æ to AE.”)
        /// The issue is made worse by the discrepancies between cultures – <c>"æ"</c> is equal to <c>"ae"</c> under en-US,
        /// but not under da-DK, as discussed under the MSDN documentation for
        /// <see href="https://msdn.microsoft.com/en-us/library/system.string%28v=vs.110%29.aspx#comparison">string comparison</see>.
        /// Thus, normalization (to any form) would not give results that are consistent with <see cref="StringComparison.CurrentCulture"/> comparisons.
        /// </para>
        /// <para>
        /// Yet another alternative involves iterating over the strings as a sequence of <i>text elements</i>,
        /// rather than UTF-16 code units, using the <see cref="StringInfo.GetNextTextElement(string, int)"/> method,
        /// as presented in <see href="http://stackoverflow.com/a/22513015/1149773">this Stack Overflow answer</see>.
        /// Results would be similar to those obtained from Unicode normalization: canonical mappings are honored,
        /// but compatibility mappings are not.
        /// </para>
        /// <list type="bullet">
        /// <listheader>References</listheader>
        /// <item><see href="http://stackoverflow.com/q/35485677/1149773">Get substring from string using culture-sensitive comparison</see>, <i>Stack Overflow</i></item>
        /// <item><see href="http://stackoverflow.com/q/20480016/1149773">Length of substring matched by culture-sensitive String.IndexOf method</see>, <i>Stack Overflow</i></item>
        /// <item><see href="http://stackoverflow.com/q/15980310/1149773">How can I perform a culture-sensitive “starts-with” operation from the middle of a string?</see> by Jon Skeet, <i>Stack Overflow</i></item>
        /// <item><see href="http://stackoverflow.com/q/9376621/1149773">Folding/Normalizing Ligatures (e.g. Æ to ae) Using (Core)Foundation</see>, <i>Stack Overflow</i></item>
        /// </list>
        /// </remarks>
        public static void Find(this string source, string substring, int searchIndex, int searchLength, StringComparison comparisonType, out int matchIndex, out int matchLength)
        {
            ArgumentValidate.NotNull(source, nameof(source));
            ArgumentValidate.NotNull(substring, nameof(substring));
            ArgumentValidate.StringIndexLength(source, nameof(source), searchIndex, nameof(searchIndex), searchLength, nameof(searchLength));
            ArgumentValidate.EnumDefined(comparisonType, nameof(comparisonType));

            FindInner(source, substring, searchIndex, searchLength, comparisonType, out matchIndex, out matchLength);
        }
        /// <summary>
        /// Reports the zero-based index and length of the first occurrence of the specified substring in the source string.
        /// </summary>
        /// <param name="source">The source string in which to search.</param>
        /// <param name="searchValue">The substring to seek.</param>
        /// <param name="searchIndex">The zero-based starting character position in <paramref name="source"/> to search from.</param>
        /// <param name="searchLength">The number of character positions in <paramref name="source"/> to search through.</param>
        /// <param name="comparisonType">One of the enumeration values that specifies the rules for the search.</param>
        /// <param name="matchIndex">
        /// When this method returns, contains the zero-based starting character position of the match, if found;
        /// or -1 if no match is found.
        /// If <paramref name="searchValue"/> is the empty string (<c>""</c>),
        /// the value will be <paramref name="searchIndex"/>.
        /// </param>
        /// <param name="matchLength">
        /// When this method returns, contains the length (in characters) of the match, if found;
        /// or -1 if no match is found.
        /// If <paramref name="searchValue"/> is the empty string (<c>""</c>), the value will be 0.
        /// </param>
        /// <returns>
        /// <see langword="true"/> if a match for <paramref name="searchValue"/> is found in the source string;
        /// otherwise, <see langword="false"/>.
        /// </returns>
        /// <remarks>
        /// <para>
        /// This method builds upon the <see cref="string.IndexOf(string, int, int, StringComparison)"/> method
        /// from the .NET Framework Class Library, but extends it to also return the <i>length</i> of the match,
        /// allowing string manipulation operations to subsequently be performed correctly.
        /// </para>
        /// <para>
        /// Culture-sensitive comparisons can result in a match that has a different length
        /// than the specified <paramref name="searchValue"/> argument.
        /// For example, under the en-US culture, <c>"æ"</c> and <c>"ae"</c> are considered equal.
        /// <c>"Encyclopædia".IndexOf("aedia")</c> evaluates to 8, indicating a match.
        /// However, the length of the matched substring, <c>"ædia"</c>, is 4,
        /// whilst the length of the searched-for parameter, <c>"aedia"</c>, is 5.
        /// This can lead to subtle bugs.
        /// Consider the following code for removing the first occurrence of substring from a string,
        /// taken from a <see href="https://stackoverflow.com/a/2201648/1149773">highly-upvoted answer</see> on Stack Overflow:
        /// <code>
        /// int index = sourceString.IndexOf(removeString);
        /// string cleanPath = index &lt; 0 ? sourceString : sourceString.Remove(index, removeString.Length);
        /// </code>
        /// If one were to run the above code snippet with <c>sourceString = "Encyclopædia"</c> and <c>removeString = "aedia"</c>,
        /// then it would throw an <see cref="ArgumentOutOfRangeException"/>.
        /// On the other hand, one would get correct results by using the current extension method:
        /// <code>
        /// int index, length;
        /// sourceString.Find(removeString, StringComparison.CurrentCulture, out index, out length);
        /// string cleanPath = index &lt; 0 ? sourceString : sourceString.Remove(index, length);
        /// </code>
        /// </para>
        /// <para>
        /// There is no public functionality provided in the .NET Framework Class Library that performs such substring searches.
        /// The current method first calls <see cref="string.IndexOf(string, int, int, StringComparison)"/> to get the
        /// starting position of the match, then iteratively attempts to identify its length.
        /// It begins with the most likely case (hot path) of the match having the same length as <paramref name="searchValue"/>,
        /// verifying this through a call to <see cref="Substring.Compare(string, int, int, string, int, int, StringComparison)"/>.
        /// If not equal, it would attempt to decrement and increment the length of the match by one character each time,
        /// calling the aforementioned method until equality is confirmed.
        /// </para>
        /// <para>
        /// The approach of iterating over the substring's length is endorsed by
        /// <see href="https://stackoverflow.com/q/15980310/1149773#comment22956089_16062528">usr</see>:
        /// </para>
        /// <blockquote>
        /// I have solved a similar problem once like this (search-string highlighting in HTML). I did it similarly.
        /// You can tune the loop and search strategy in a way that makes it completed very quickly by checking the likely cases first.
        /// The nice thing about this is that it seems to be totally correct and no Unicode details leak into your code.
        /// </blockquote>
        /// <para>
        /// An alternative to this approach sacrifices portability for performance by executing a P/Invoke call to the
        /// <see href="https://docs.microsoft.com/en-us/windows/desktop/api/winnls/nf-winnls-findnlsstring"><c>FindNLSString</c></see> function
        /// (or related), as is done internally within the <see cref="string"/> class implementation.
        /// This approach is described under <see href="https://stackoverflow.com/a/20484094/1149773">this Stack Overflow answer</see>.
        /// </para>
        /// <para>
        /// Another alternative approach involves subjecting the strings to Unicode normalization
        /// (through the <see cref="string.Normalize(NormalizationForm)"/> method) before comparison,
        /// as suggested in <see href="https://stackoverflow.com/a/16001302/1149773">this Stack Overflow answer</see>.
        /// However, this approach is undesirable since the returned results would only apply to the <i>normalized</i> forms
        /// of <paramref name="source"/> and <paramref name="searchValue"/>, requiring the original strings to be discarded
        /// and replaced by their normalized forms for all subsequent processing and storage.
        /// </para>
        /// <para>
        /// Furthermore, Unicode normalization would not always yield results consistent with
        /// culture-sensitive comparisons in .NET (such as <see cref="string.Compare(string, string)"/>
        /// or <see cref="string.Equals(string, string, StringComparison)"/>
        /// with <see cref="StringComparison.CurrentCulture"/>).
        /// As mentioned in the <see href="https://unicode.org/reports/tr15/">Unicode Normalization Forms</see> annex,
        /// <see cref="NormalizationForm.FormC"/> and <see cref="NormalizationForm.FormD"/> only support <i>canonical</i> mappings,
        /// such as between precomposed characters and combining character sequences – for example, <c>"é"</c> and <c>"e\u0301"</c>.
        /// However, the said forms do not perform <i>compatibility</i> mappings, as is required for ligatures.
        /// For example, <c>"æ"</c> is not decomposed to <c>"ae"</c>, nor <c>"ffi"</c> to <c>"ffi"</c>, despite that
        /// the said ligatures are considered to be equal to their corresponding character sequences under the en-US culture.
        /// <see cref="NormalizationForm.FormKC"/> and <see cref="NormalizationForm.FormKD"/> handle compatibility mappings,
        /// and can decompose some ligatures, such as <c>"ffi"</c>, but miss others, such as <c>"æ"</c>.
        /// (A <see href="https://stackoverflow.com/a/15485970/1149773">Stack Overflow answer</see> mentions that
        /// “Unicode 6.2 doesn't appear to contain a normative mapping from Æ to AE.”)
        /// The issue is made worse by the discrepancies between cultures – <c>"æ"</c> is equal to <c>"ae"</c> under en-US,
        /// but not under da-DK, as discussed under the MSDN documentation for
        /// <see href="https://docs.microsoft.com/en-us/dotnet/api/system.string?view=netframework-4.7#comparison">string comparison</see>.
        /// Thus, normalization (to any form) would not give results that are consistent with <see cref="StringComparison.CurrentCulture"/> comparisons.
        /// </para>
        /// <para>
        /// Yet another alternative involves iterating over the strings as a sequence of <i>text elements</i>,
        /// rather than UTF-16 code units, using the <see cref="StringInfo.GetNextTextElement(string, int)"/> method,
        /// as presented in <see href="https://stackoverflow.com/a/22513015/1149773">this Stack Overflow answer</see>.
        /// Results would be similar to those obtained from Unicode normalization: canonical mappings are honored,
        /// but compatibility mappings are not.
        /// </para>
        /// <list type="bullet">
        /// <listheader>References</listheader>
        /// <item><see href="https://stackoverflow.com/q/35485677/1149773">Get substring from string using culture-sensitive comparison</see>, <i>Stack Overflow</i></item>
        /// <item><see href="https://stackoverflow.com/q/20480016/1149773">Length of substring matched by culture-sensitive String.IndexOf method</see>, <i>Stack Overflow</i></item>
        /// <item><see href="https://stackoverflow.com/q/15980310/1149773">How can I perform a culture-sensitive “starts-with” operation from the middle of a string?</see> by Jon Skeet, <i>Stack Overflow</i></item>
        /// <item><see href="https://stackoverflow.com/q/9376621/1149773">Folding/Normalizing Ligatures (e.g. Æ to ae) Using (Core)Foundation</see>, <i>Stack Overflow</i></item>
        /// </list>
        /// </remarks>
        public static bool Find(this string source, string searchValue, int searchIndex, int searchLength, StringComparison comparisonType, out int matchIndex, out int matchLength)
        {
            ArgumentValidate.NotNull(source, nameof(source));
            ArgumentValidate.NotNull(searchValue, nameof(searchValue));
            ArgumentValidate.StringIndexLength(source, nameof(source), searchIndex, nameof(searchIndex), searchLength, nameof(searchLength));
            ArgumentValidate.EnumDefined(comparisonType, nameof(comparisonType));

            return(FindInner(source, searchValue, searchIndex, searchLength, comparisonType, out matchIndex, out matchLength));
        }
Exemplo n.º 4
0
        private static void StringIndexLengthFail <TException>(
            string str, int idx, int len,
            bool strFail = false, bool idxFail = false, bool lenFail = false)
            where TException : ArgumentException
        {
            var exception = ExceptionAssert.Throws <TException>(() =>
                                                                ArgumentValidate.StringIndexLength(str, nameof(str), idx, nameof(idx), len, nameof(len)));

            string failParamName =
                strFail ? nameof(str) :
                idxFail?nameof(idx) :
                    lenFail?nameof(len) : null;

            Assert.AreEqual(failParamName, exception.ParamName);
        }
Exemplo n.º 5
0
        public void StringIndexLength()
        {
            string str;
            int    idx;
            int    len;

            ArgumentValidate.StringIndexLength("", nameof(str), 0, nameof(idx), 0, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 0, nameof(idx), 0, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 0, nameof(idx), 2, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 0, nameof(idx), 3, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 1, nameof(idx), 2, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 2, nameof(idx), 1, nameof(len));
            ArgumentValidate.StringIndexLength("abc", nameof(str), 3, nameof(idx), 0, nameof(len));

            StringIndexLengthFail <ArgumentNullException>(null, 0, 0, strFail: true);
            StringIndexLengthFail <ArgumentOutOfRangeException>("abc", 4, 0, idxFail: true);
            StringIndexLengthFail <ArgumentOutOfRangeException>("abc", 0, 4, lenFail: true);
            StringIndexLengthFail <ArgumentOutOfRangeException>("abc", 2, 2, lenFail: true);
            StringIndexLengthFail <ArgumentOutOfRangeException>("", 1, 0, idxFail: true);
            StringIndexLengthFail <ArgumentOutOfRangeException>("", 0, 1, lenFail: true);
        }