InternalGetUnicodeCategory() static private method

static private InternalGetUnicodeCategory ( String value, int index ) : UnicodeCategory
value String
index int
return UnicodeCategory
Example #1
0
        public static int[] ParseCombiningCharacters(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            int length = str.Length;

            int[] array = new int[length];
            if (length == 0)
            {
                return(array);
            }
            int             num = 0;
            int             i   = 0;
            int             num2;
            UnicodeCategory unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, 0, out num2);

            while (i < length)
            {
                array[num++] = i;
                i           += StringInfo.GetCurrentTextElementLen(str, i, length, ref unicodeCategory, ref num2);
            }
            if (num < length)
            {
                int[] array2 = new int[num];
                Array.Copy(array, array2, num);
                return(array2);
            }
            return(array);
        }
Example #2
0
        // Token: 0x06003054 RID: 12372 RVA: 0x000B935C File Offset: 0x000B755C
        internal static int GetCurrentTextElementLen(string str, int index, int len, ref UnicodeCategory ucCurrent, ref int currentCharCount)
        {
            if (index + currentCharCount == len)
            {
                return(currentCharCount);
            }
            int             num;
            UnicodeCategory unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, index + currentCharCount, out num);

            if (CharUnicodeInfo.IsCombiningCategory(unicodeCategory) && !CharUnicodeInfo.IsCombiningCategory(ucCurrent) && ucCurrent != UnicodeCategory.Format && ucCurrent != UnicodeCategory.Control && ucCurrent != UnicodeCategory.OtherNotAssigned && ucCurrent != UnicodeCategory.Surrogate)
            {
                int num2 = index;
                for (index += currentCharCount + num; index < len; index += num)
                {
                    unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out num);
                    if (!CharUnicodeInfo.IsCombiningCategory(unicodeCategory))
                    {
                        ucCurrent        = unicodeCategory;
                        currentCharCount = num;
                        break;
                    }
                }
                return(index - num2);
            }
            int result = currentCharCount;

            ucCurrent        = unicodeCategory;
            currentCharCount = num;
            return(result);
        }
        internal static int GetCurrentTextElementLen(string str, int index, int len, ref UnicodeCategory ucCurrent, ref int currentCharCount)
        {
            int num;

            if ((index + currentCharCount) == len)
            {
                return(currentCharCount);
            }
            UnicodeCategory uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index + currentCharCount, out num);

            if (((!CharUnicodeInfo.IsCombiningCategory(uc) || CharUnicodeInfo.IsCombiningCategory(ucCurrent)) || ((ucCurrent == UnicodeCategory.Format) || (ucCurrent == UnicodeCategory.Control))) || ((ucCurrent == UnicodeCategory.OtherNotAssigned) || (ucCurrent == UnicodeCategory.Surrogate)))
            {
                int num3 = currentCharCount;
                ucCurrent        = uc;
                currentCharCount = num;
                return(num3);
            }
            int num2 = index;

            index += currentCharCount + num;
            while (index < len)
            {
                uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out num);
                if (!CharUnicodeInfo.IsCombiningCategory(uc))
                {
                    ucCurrent        = uc;
                    currentCharCount = num;
                    break;
                }
                index += num;
            }
            return(index - num2);
        }
Example #4
0
        // Returns the str containing the next text element in str starting at
        // index index.  If index is not supplied, then it will start at the beginning
        // of str.  It recognizes a base character plus one or more combining
        // characters or a properly formed surrogate pair as a text element.  See also
        // the ParseCombiningCharacters() and the ParseSurrogates() methods.
        public static string GetNextTextElement(string str, int index)
        {
            //
            // Validate parameters.
            //
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            Contract.EndContractBlock();

            int len = str.Length;

            if (index < 0 || index >= len)
            {
                if (index == len)
                {
                    return(String.Empty);
                }
                throw new ArgumentOutOfRangeException("index", SR.ArgumentOutOfRange_Index);
            }

            int             charLen;
            UnicodeCategory uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLen);

            return(str.Substring(index, GetCurrentTextElementLen(str, index, len, ref uc, ref charLen)));
        }
Example #5
0
        public static int[] ParseCombiningCharacters(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            int length1 = str.Length;

            int[] numArray1 = new int[length1];
            if (length1 == 0)
            {
                return(numArray1);
            }
            int             length2 = 0;
            int             index   = 0;
            int             charLength;
            UnicodeCategory unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, 0, out charLength);

            while (index < length1)
            {
                numArray1[length2++] = index;
                index += StringInfo.GetCurrentTextElementLen(str, index, length1, ref unicodeCategory, ref charLength);
            }
            if (length2 >= length1)
            {
                return(numArray1);
            }
            int[] numArray2 = new int[length2];
            Array.Copy((Array)numArray1, (Array)numArray2, length2);
            return(numArray2);
        }
        /*
         * Returns the indices of each base character or properly formed surrogate pair
         * within the str.  It recognizes a base character plus one or more combining
         * characters or a properly formed surrogate pair as a text element and returns
         * the index of the base character or high surrogate.  Each index is the
         * beginning of a text element within a str.  The length of each element is
         * easily computed as the difference between successive indices.  The length of
         * the array will always be less than or equal to the length of the str.  For
         * example, given the str \u4f00\u302a\ud800\udc00\u4f01, this method would
         * return the indices: 0, 2, 4.
         */

        public static int[] ParseCombiningCharacters(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }

            int len = str.Length;

            int[] result = new int[len];
            if (len == 0)
            {
                return(result);
            }

            int resultCount = 0;

            int             i = 0;
            int             currentCharLen;
            UnicodeCategory currentCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, 0, out currentCharLen);

            while (i < len)
            {
                result[resultCount++] = i;
                i += GetCurrentTextElementLen(str, i, len, ref currentCategory, ref currentCharLen);
            }

            if (resultCount < len)
            {
                int[] returnArray = new int[resultCount];
                Array.Copy(result, returnArray, resultCount);
                return(returnArray);
            }
            return(result);
        }
        public static int[] ParseCombiningCharacters(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            int length = str.Length;

            int[] sourceArray = new int[length];
            if (length != 0)
            {
                int             num4;
                int             num2      = 0;
                int             index     = 0;
                UnicodeCategory ucCurrent = CharUnicodeInfo.InternalGetUnicodeCategory(str, 0, out num4);
                while (index < length)
                {
                    sourceArray[num2++] = index;
                    index += GetCurrentTextElementLen(str, index, length, ref ucCurrent, ref num4);
                }
                if (num2 < length)
                {
                    int[] destinationArray = new int[num2];
                    Array.Copy(sourceArray, destinationArray, num2);
                    return(destinationArray);
                }
            }
            return(sourceArray);
        }
Example #8
0
        internal static int GetCurrentTextElementLen(string str, int index, int len, ref UnicodeCategory ucCurrent, ref int currentCharCount)
        {
            if (index + currentCharCount == len)
            {
                return(currentCharCount);
            }
            int             charLength;
            UnicodeCategory unicodeCategory1 = CharUnicodeInfo.InternalGetUnicodeCategory(str, index + currentCharCount, out charLength);

            if (CharUnicodeInfo.IsCombiningCategory(unicodeCategory1) && !CharUnicodeInfo.IsCombiningCategory(ucCurrent) && (ucCurrent != UnicodeCategory.Format && ucCurrent != UnicodeCategory.Control) && (ucCurrent != UnicodeCategory.OtherNotAssigned && ucCurrent != UnicodeCategory.Surrogate))
            {
                int num = index;
                index += currentCharCount + charLength;
                while (index < len)
                {
                    UnicodeCategory unicodeCategory2 = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLength);
                    if (!CharUnicodeInfo.IsCombiningCategory(unicodeCategory2))
                    {
                        ucCurrent        = unicodeCategory2;
                        currentCharCount = charLength;
                        break;
                    }
                    index += charLength;
                }
                return(index - num);
            }
            int num1 = currentCharCount;

            ucCurrent        = unicodeCategory1;
            currentCharCount = charLength;
            return(num1);
        }
Example #9
0
        ////////////////////////////////////////////////////////////////////////
        //
        // Get the code point count of the current text element.
        //
        // A combining class is defined as:
        //      A character/surrogate that has the following Unicode category:
        //      * NonSpacingMark (e.g. U+0300 COMBINING GRAVE ACCENT)
        //      * SpacingCombiningMark (e.g. U+ 0903 DEVANGARI SIGN VISARGA)
        //      * EnclosingMark (e.g. U+20DD COMBINING ENCLOSING CIRCLE)
        //
        // In the context of GetNextTextElement() and ParseCombiningCharacters(), a text element is defined as:
        //
        //  1. If a character/surrogate is in the following category, it is a text element.
        //     It can NOT further combine with characters in the combinging class to form a text element.
        //      * one of the Unicode category in the combinging class
        //      * UnicodeCategory.Format
        //      * UnicodeCateogry.Control
        //      * UnicodeCategory.OtherNotAssigned
        //  2. Otherwise, the character/surrogate can be combined with characters in the combinging class to form a text element.
        //
        //  Return:
        //      The length of the current text element
        //
        //  Parameters:
        //      String str
        //      index   The starting index
        //      len     The total length of str (to define the upper boundary)
        //      ucCurrent   The Unicode category pointed by Index.  It will be updated to the uc of next character if this is not the last text element.
        //      currentCharCount    The char count of an abstract char pointed by Index.  It will be updated to the char count of next abstract character if this is not the last text element.
        //
        ////////////////////////////////////////////////////////////////////////

        internal static int GetCurrentTextElementLen(string str, int index, int len, ref UnicodeCategory ucCurrent, ref int currentCharCount)
        {
            Contract.Assert(index >= 0 && len >= 0, "StringInfo.GetCurrentTextElementLen() : index = " + index + ", len = " + len);
            Contract.Assert(index < len, "StringInfo.GetCurrentTextElementLen() : index = " + index + ", len = " + len);
            if (index + currentCharCount == len)
            {
                // This is the last character/surrogate in the string.
                return(currentCharCount);
            }

            // Call an internal GetUnicodeCategory, which will tell us both the unicode category, and also tell us if it is a surrogate pair or not.
            int             nextCharCount;
            UnicodeCategory ucNext = CharUnicodeInfo.InternalGetUnicodeCategory(str, index + currentCharCount, out nextCharCount);

            if (CharUnicodeInfo.IsCombiningCategory(ucNext))
            {
                // The next element is a combining class.
                // Check if the current text element to see if it is a valid base category (i.e. it should not be a combining category,
                // not a format character, and not a control character).

                if (CharUnicodeInfo.IsCombiningCategory(ucCurrent) ||
                    (ucCurrent == UnicodeCategory.Format) ||
                    (ucCurrent == UnicodeCategory.Control) ||
                    (ucCurrent == UnicodeCategory.OtherNotAssigned) ||
                    (ucCurrent == UnicodeCategory.Surrogate))       // An unpair high surrogate or low surrogate
                {
                    // Will fall thru and return the currentCharCount
                }
                else
                {
                    int startIndex = index; // Remember the current index.

                    // We have a valid base characters, and we have a character (or surrogate) that is combining.
                    // Check if there are more combining characters to follow.
                    // Check if the next character is a nonspacing character.
                    index += currentCharCount + nextCharCount;

                    while (index < len)
                    {
                        ucNext = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out nextCharCount);
                        if (!CharUnicodeInfo.IsCombiningCategory(ucNext))
                        {
                            ucCurrent        = ucNext;
                            currentCharCount = nextCharCount;
                            break;
                        }
                        index += nextCharCount;
                    }
                    return(index - startIndex);
                }
            }
            // The return value will be the currentCharCount.
            int ret = currentCharCount;

            ucCurrent = ucNext;
            // Update currentCharCount.
            currentCharCount = nextCharCount;
            return(ret);
        }
Example #10
0
 public void Reset()
 {
     this.index = this.startIndex;
     if (this.index < this.strLen)
     {
         this.uc = CharUnicodeInfo.InternalGetUnicodeCategory(this.str, this.index, out this.charLen);
     }
 }
 public void Reset()
 {
     index = startIndex;
     if (index < strLen)
     {
         // If we have more than 1 character, get the category of the current char.
         uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLen);
     }
 }
Example #12
0
 private void OnDeserialized(StreamingContext ctx)
 {
     this.strLen             = this.endIndex + 1;
     this.currTextElementLen = this.nextTextElementLen;
     if (this.charLen == -1)
     {
         this.uc = CharUnicodeInfo.InternalGetUnicodeCategory(this.str, this.index, out this.charLen);
     }
 }
 public void Reset()
 {
     _index = _startIndex;
     if (_index < _strLen)
     {
         // If we have more than 1 character, get the category of the current char.
         _uc = CharUnicodeInfo.InternalGetUnicodeCategory(_str, _index, out _charLen);
     }
 }
Example #14
0
        private void OnDeserialized(StreamingContext ctx)
        {
            _strLen             = _endIndex + 1;
            _currTextElementLen = _nextTextElementLen;

            if (_charLen == -1)
            {
                _uc = CharUnicodeInfo.InternalGetUnicodeCategory(_str, _index, out _charLen);
            }
        }
        private void OnDeserialized(StreamingContext ctx)
        {
            strLen             = endIndex + 1;
            currTextElementLen = nextTextElementLen;

            if (charLen == -1)
            {
                uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLen);
            }
        }
Example #16
0
 public static UnicodeCategory GetUnicodeCategory(string s, int index)
 {
     if (s == null)
     {
         throw new ArgumentNullException("s");
     }
     if ((uint)index >= (uint)s.Length)
     {
         throw new ArgumentOutOfRangeException("index");
     }
     return(CharUnicodeInfo.InternalGetUnicodeCategory(s, index));
 }
Example #17
0
        public static string GetNextTextElement(string str, int index)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            int length = str.Length;

            if (index >= 0 && index < length)
            {
                int             num;
                UnicodeCategory unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out num);
                return(str.Substring(index, StringInfo.GetCurrentTextElementLen(str, index, length, ref unicodeCategory, ref num)));
            }
            if (index == length)
            {
                return(string.Empty);
            }
            throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index"));
        }
Example #18
0
        private unsafe static bool IsSortable(char *text, int length)
        {
            Debug.Assert(!GlobalizationMode.Invariant);

            int             index = 0;
            UnicodeCategory uc;

            while (index < length)
            {
                if (Char.IsHighSurrogate(text[index]))
                {
                    if (index == length - 1 || !Char.IsLowSurrogate(text[index + 1]))
                    {
                        return(false); // unpaired surrogate
                    }
                    uc = CharUnicodeInfo.InternalGetUnicodeCategory(Char.ConvertToUtf32(text[index], text[index + 1]));
                    if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
                    {
                        return(false);
                    }

                    index += 2;
                    continue;
                }

                if (Char.IsLowSurrogate(text[index]))
                {
                    return(false); // unpaired surrogate
                }

                uc = CharUnicodeInfo.GetUnicodeCategory(text[index]);
                if (uc == UnicodeCategory.PrivateUse || uc == UnicodeCategory.OtherNotAssigned)
                {
                    return(false);
                }

                index++;
            }

            return(true);
        }
        public static string GetNextTextElement(string str, int index)
        {
            int num2;

            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            int length = str.Length;

            if ((index < 0) || (index >= length))
            {
                if (index != length)
                {
                    throw new ArgumentOutOfRangeException("index", Environment.GetResourceString("ArgumentOutOfRange_Index"));
                }
                return(string.Empty);
            }
            UnicodeCategory ucCurrent = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out num2);

            return(str.Substring(index, GetCurrentTextElementLen(str, index, length, ref ucCurrent, ref num2)));
        }
Example #20
0
        public unsafe String ToTitleCase(String str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }


            int len = str.Length;

            if (len == 0)
            {
                return(str);
            }

            int           i;
            StringBuilder result        = new StringBuilder();
            String        lowercaseData = null;

            for (i = 0; i < len; i++)
            {
                UnicodeCategory charType;
                int             charLen;

                charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                if (Char.CheckLetter(charType))
                {
                    // Do the uppercasing for the first character of the word.
                    // There are titlecase characters that need to be special treated.

                    if (charLen == 1)
                    {
                        result.Append(nativeGetTitleCaseChar(m_pNativeTextInfo, str[i]));
                    }
                    else
                    {
                        //
                        // ASSUMPTION: There is no titlecase char in the surrogate.
                        //
                        char resultHighSurrogate;
                        char resultLowSurrogate;

                        ChangeCaseSurrogate(str[i], str[i + 1], out resultHighSurrogate, out resultLowSurrogate, true);
                        result.Append(resultHighSurrogate);
                        result.Append(resultLowSurrogate);
                    }
                    i += charLen;

                    //
                    // Convert the characters until the end of the this word
                    // to lowercase.
                    //
                    int lowercaseStart = i;

                    //
                    // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
                    // This is in line with Word 2000 behavior of titilecasing.
                    //
                    bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
                    // Use a loop to find all of the other letters following this letter.
                    while (i < len)
                    {
                        charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                        if (IsLetterCategory(charType))
                        {
                            if (charType == UnicodeCategory.LowercaseLetter)
                            {
                                hasLowerCase = true;
                            }
                            i += charLen;
                        }
                        else if (str[i] == '\'')
                        {
                            // Special case for APOSTROPHE.  It should be considered part of the word.  E.g. "can't".
                            i++;
                            if (hasLowerCase)
                            {
                                if (lowercaseData == null)
                                {
                                    lowercaseData = this.ToLower(str);
                                }
                                result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
                            }
                            else
                            {
                                result.Append(str, lowercaseStart, i - lowercaseStart);
                            }
                            lowercaseStart = i;
                            hasLowerCase   = true;
                        }
                        else if (!IsWordSeparator(charType))
                        {
                            // This category is considered to be part of the word.
                            // This is any category that is marked as false in wordSeprator array.
                            i += charLen;
                        }
                        else
                        {
                            // A word separator. Break out of the loop.
                            break;
                        }
                    }

                    int count = i - lowercaseStart;

                    if (count > 0)
                    {
                        if (hasLowerCase)
                        {
                            if (lowercaseData == null)
                            {
                                lowercaseData = this.ToLower(str);
                            }
                            result.Append(lowercaseData, lowercaseStart, count);
                        }
                        else
                        {
                            result.Append(str, lowercaseStart, count);
                        }
                    }

                    if (i < len)
                    {
                        // Add the non-letter character.
                        if (charLen == 1)
                        {
                            result.Append(str[i]);
                        }
                        else
                        {
                            // Surrogate.
                            result.Append(str[i++]);
                            result.Append(str[i]);
                        }
                    }
                }
                else
                {
                    //
                    // Not a letter, just append them.
                    //
                    if (charLen == 1)
                    {
                        result.Append(str[i]);
                    }
                    else
                    {
                        // Surrogate.
                        result.Append(str[i++]);
                        result.Append(str[i]);
                    }
                }
            }
            return(result.ToString());
        }
        /// <summary>Converts the specified string to title case (except for words that are entirely in uppercase, which are considered to be acronyms).</summary>
        /// <param name="str">The string to convert to title case. </param>
        /// <returns>The specified string converted to title case.</returns>
        /// <exception cref="T:System.ArgumentNullException">
        ///         <paramref name="str" /> is <see langword="null" />. </exception>
        // Token: 0x060030A3 RID: 12451 RVA: 0x000B9EFC File Offset: 0x000B80FC
        public string ToTitleCase(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            if (str.Length == 0)
            {
                return(str);
            }
            StringBuilder stringBuilder = new StringBuilder();
            string        text          = null;

            for (int i = 0; i < str.Length; i++)
            {
                int             num;
                UnicodeCategory unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out num);
                if (char.CheckLetter(unicodeCategory))
                {
                    i = this.AddTitlecaseLetter(ref stringBuilder, ref str, i, num) + 1;
                    int  num2 = i;
                    bool flag = unicodeCategory == UnicodeCategory.LowercaseLetter;
                    while (i < str.Length)
                    {
                        unicodeCategory = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out num);
                        if (TextInfo.IsLetterCategory(unicodeCategory))
                        {
                            if (unicodeCategory == UnicodeCategory.LowercaseLetter)
                            {
                                flag = true;
                            }
                            i += num;
                        }
                        else if (str[i] == '\'')
                        {
                            i++;
                            if (flag)
                            {
                                if (text == null)
                                {
                                    text = this.ToLower(str);
                                }
                                stringBuilder.Append(text, num2, i - num2);
                            }
                            else
                            {
                                stringBuilder.Append(str, num2, i - num2);
                            }
                            num2 = i;
                            flag = true;
                        }
                        else
                        {
                            if (TextInfo.IsWordSeparator(unicodeCategory))
                            {
                                break;
                            }
                            i += num;
                        }
                    }
                    int num3 = i - num2;
                    if (num3 > 0)
                    {
                        if (flag)
                        {
                            if (text == null)
                            {
                                text = this.ToLower(str);
                            }
                            stringBuilder.Append(text, num2, num3);
                        }
                        else
                        {
                            stringBuilder.Append(str, num2, num3);
                        }
                    }
                    if (i < str.Length)
                    {
                        i = TextInfo.AddNonLetter(ref stringBuilder, ref str, i, num);
                    }
                }
                else
                {
                    i = TextInfo.AddNonLetter(ref stringBuilder, ref str, i, num);
                }
            }
            return(stringBuilder.ToString());
        }
Example #22
0
        /// <summary>将指定字符串转换为标题大写(全部大写将被视为首字母缩写的词不包含在内)。</summary>
        /// <returns>转换为标题大写的指定字符串。</returns>
        /// <param name="str">转换为标题大写的字符串。</param>
        /// <exception cref="T:System.ArgumentNullException">
        /// <paramref name="str" /> is null. </exception>
        public string ToTitleCase(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            if (str.Length == 0)
            {
                return(str);
            }
            StringBuilder result = new StringBuilder();
            string        str1   = (string)null;
            int           index1;

            for (int index2 = 0; index2 < str.Length; index2 = index1 + 1)
            {
                int             charLength;
                UnicodeCategory unicodeCategory1 = CharUnicodeInfo.InternalGetUnicodeCategory(str, index2, out charLength);
                if (char.CheckLetter(unicodeCategory1))
                {
                    index1 = this.AddTitlecaseLetter(ref result, ref str, index2, charLength) + 1;
                    int  startIndex = index1;
                    bool flag       = unicodeCategory1 == UnicodeCategory.LowercaseLetter;
                    while (index1 < str.Length)
                    {
                        UnicodeCategory unicodeCategory2 = CharUnicodeInfo.InternalGetUnicodeCategory(str, index1, out charLength);
                        if (TextInfo.IsLetterCategory(unicodeCategory2))
                        {
                            if (unicodeCategory2 == UnicodeCategory.LowercaseLetter)
                            {
                                flag = true;
                            }
                            index1 += charLength;
                        }
                        else if ((int)str[index1] == 39)
                        {
                            ++index1;
                            if (flag)
                            {
                                if (str1 == null)
                                {
                                    str1 = this.ToLower(str);
                                }
                                result.Append(str1, startIndex, index1 - startIndex);
                            }
                            else
                            {
                                result.Append(str, startIndex, index1 - startIndex);
                            }
                            startIndex = index1;
                            flag       = true;
                        }
                        else if (!TextInfo.IsWordSeparator(unicodeCategory2))
                        {
                            index1 += charLength;
                        }
                        else
                        {
                            break;
                        }
                    }
                    int count = index1 - startIndex;
                    if (count > 0)
                    {
                        if (flag)
                        {
                            if (str1 == null)
                            {
                                str1 = this.ToLower(str);
                            }
                            result.Append(str1, startIndex, count);
                        }
                        else
                        {
                            result.Append(str, startIndex, count);
                        }
                    }
                    if (index1 < str.Length)
                    {
                        index1 = TextInfo.AddNonLetter(ref result, ref str, index1, charLength);
                    }
                }
                else
                {
                    index1 = TextInfo.AddNonLetter(ref result, ref str, index2, charLength);
                }
            }
            return(result.ToString());
        }
        public string ToTitleCase(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            if (str.Length == 0)
            {
                return(str);
            }
            StringBuilder result = new StringBuilder();
            string        str2   = null;

            for (int i = 0; i < str.Length; i++)
            {
                int             num2;
                UnicodeCategory uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out num2);
                if (char.CheckLetter(uc))
                {
                    i = this.AddTitlecaseLetter(ref result, ref str, i, num2) + 1;
                    int  startIndex = i;
                    bool flag       = uc == UnicodeCategory.LowercaseLetter;
                    while (i < str.Length)
                    {
                        uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out num2);
                        if (IsLetterCategory(uc))
                        {
                            if (uc == UnicodeCategory.LowercaseLetter)
                            {
                                flag = true;
                            }
                            i += num2;
                        }
                        else
                        {
                            if (str[i] == '\'')
                            {
                                i++;
                                if (flag)
                                {
                                    if (str2 == null)
                                    {
                                        str2 = this.ToLower(str);
                                    }
                                    result.Append(str2, startIndex, i - startIndex);
                                }
                                else
                                {
                                    result.Append(str, startIndex, i - startIndex);
                                }
                                startIndex = i;
                                flag       = true;
                                continue;
                            }
                            if (IsWordSeparator(uc))
                            {
                                break;
                            }
                            i += num2;
                        }
                    }
                    int count = i - startIndex;
                    if (count > 0)
                    {
                        if (flag)
                        {
                            if (str2 == null)
                            {
                                str2 = this.ToLower(str);
                            }
                            result.Append(str2, startIndex, count);
                        }
                        else
                        {
                            result.Append(str, startIndex, count);
                        }
                    }
                    if (i < str.Length)
                    {
                        i = AddNonLetter(ref result, ref str, i, num2);
                    }
                    continue;
                }
                i = AddNonLetter(ref result, ref str, i, num2);
            }
            return(result.ToString());
        }
Example #24
0
 public static UnicodeCategory GetUnicodeCategory(char ch)
 {
     return(CharUnicodeInfo.InternalGetUnicodeCategory((int)ch));
 }
Example #25
0
        //
        // Titlecasing:
        // -----------
        // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
        // and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
        // and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
        // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
        // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
        // are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
        //
        // Moreover, the determination of what actually constitutes a word is language dependent, and this can
        // influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
        // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
        //
        public unsafe string ToTitleCase(string str)
        {
            if (str == null)
            {
                throw new ArgumentNullException(nameof(str));
            }
            if (str.Length == 0)
            {
                return(str);
            }

            StringBuilder result        = new StringBuilder();
            string        lowercaseData = null;
            // Store if the current culture is Dutch (special case)
            bool isDutchCulture = CultureName.StartsWith("nl-", StringComparison.OrdinalIgnoreCase);

            for (int i = 0; i < str.Length; i++)
            {
                UnicodeCategory charType;
                int             charLen;

                charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                if (char.CheckLetter(charType))
                {
                    // Special case to check for Dutch specific titlecasing with "IJ" characters
                    // at the beginning of a word
                    if (isDutchCulture && i < str.Length - 1 && (str[i] == 'i' || str[i] == 'I') && (str[i + 1] == 'j' || str[i + 1] == 'J'))
                    {
                        result.Append("IJ");
                        i += 2;
                    }
                    else
                    {
                        // Do the titlecasing for the first character of the word.
                        i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;
                    }

                    //
                    // Convert the characters until the end of the this word
                    // to lowercase.
                    //
                    int lowercaseStart = i;

                    //
                    // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
                    // This is in line with Word 2000 behavior of titlecasing.
                    //
                    bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
                    // Use a loop to find all of the other letters following this letter.
                    while (i < str.Length)
                    {
                        charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                        if (IsLetterCategory(charType))
                        {
                            if (charType == UnicodeCategory.LowercaseLetter)
                            {
                                hasLowerCase = true;
                            }
                            i += charLen;
                        }
                        else if (str[i] == '\'')
                        {
                            i++;
                            if (hasLowerCase)
                            {
                                if (lowercaseData == null)
                                {
                                    lowercaseData = ToLower(str);
                                }
                                result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
                            }
                            else
                            {
                                result.Append(str, lowercaseStart, i - lowercaseStart);
                            }
                            lowercaseStart = i;
                            hasLowerCase   = true;
                        }
                        else if (!IsWordSeparator(charType))
                        {
                            // This category is considered to be part of the word.
                            // This is any category that is marked as false in wordSeprator array.
                            i += charLen;
                        }
                        else
                        {
                            // A word separator. Break out of the loop.
                            break;
                        }
                    }

                    int count = i - lowercaseStart;

                    if (count > 0)
                    {
                        if (hasLowerCase)
                        {
                            if (lowercaseData == null)
                            {
                                lowercaseData = ToLower(str);
                            }
                            result.Append(lowercaseData, lowercaseStart, count);
                        }
                        else
                        {
                            result.Append(str, lowercaseStart, count);
                        }
                    }

                    if (i < str.Length)
                    {
                        // not a letter, just append it
                        i = AddNonLetter(ref result, ref str, i, charLen);
                    }
                }
                else
                {
                    // not a letter, just append it
                    i = AddNonLetter(ref result, ref str, i, charLen);
                }
            }
            return(result.ToString());
        }
Example #26
0
 internal static UnicodeCategory InternalGetUnicodeCategory(string value, int index)
 {
     return(CharUnicodeInfo.InternalGetUnicodeCategory(CharUnicodeInfo.InternalConvertToUtf32(value, index)));
 }
        //
        // Titlecasing:
        // -----------
        // Titlecasing refers to a casing practice wherein the first letter of a word is an uppercase letter
        // and the rest of the letters are lowercase.  The choice of which words to titlecase in headings
        // and titles is dependent on language and local conventions.  For example, "The Merry Wives of Windor"
        // is the appropriate titlecasing of that play's name in English, with the word "of" not titlecased.
        // In German, however, the title is "Die lustigen Weiber von Windsor," and both "lustigen" and "von"
        // are not titlecased.  In French even fewer words are titlecased: "Les joyeuses commeres de Windsor."
        //
        // Moreover, the determination of what actually constitutes a word is language dependent, and this can
        // influence which letter or letters of a "word" are uppercased when titlecasing strings.  For example
        // "l'arbre" is considered two words in French, whereas "can't" is considered one word in English.
        //
        //
        // Differences between UNICODE 5.0 and the .NET Framework (



#if !FEATURE_CORECLR
        public unsafe String ToTitleCase(String str)
        {
            if (str == null)
            {
                throw new ArgumentNullException("str");
            }
            Contract.EndContractBlock();
            if (str.Length == 0)
            {
                return(str);
            }

            StringBuilder result        = new StringBuilder();
            String        lowercaseData = null;

            for (int i = 0; i < str.Length; i++)
            {
                UnicodeCategory charType;
                int             charLen;

                charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                if (Char.CheckLetter(charType))
                {
                    // Do the titlecasing for the first character of the word.
                    i = AddTitlecaseLetter(ref result, ref str, i, charLen) + 1;

                    //
                    // Convert the characters until the end of the this word
                    // to lowercase.
                    //
                    int lowercaseStart = i;

                    //
                    // Use hasLowerCase flag to prevent from lowercasing acronyms (like "URT", "USA", etc)
                    // This is in line with Word 2000 behavior of titlecasing.
                    //
                    bool hasLowerCase = (charType == UnicodeCategory.LowercaseLetter);
                    // Use a loop to find all of the other letters following this letter.
                    while (i < str.Length)
                    {
                        charType = CharUnicodeInfo.InternalGetUnicodeCategory(str, i, out charLen);
                        if (IsLetterCategory(charType))
                        {
                            if (charType == UnicodeCategory.LowercaseLetter)
                            {
                                hasLowerCase = true;
                            }
                            i += charLen;
                        }
                        else if (str[i] == '\'')
                        {
                            //

                            i++;
                            if (hasLowerCase)
                            {
                                if (lowercaseData == null)
                                {
                                    lowercaseData = this.ToLower(str);
                                }
                                result.Append(lowercaseData, lowercaseStart, i - lowercaseStart);
                            }
                            else
                            {
                                result.Append(str, lowercaseStart, i - lowercaseStart);
                            }
                            lowercaseStart = i;
                            hasLowerCase   = true;
                        }
                        else if (!IsWordSeparator(charType))
                        {
                            // This category is considered to be part of the word.
                            // This is any category that is marked as false in wordSeprator array.
                            i += charLen;
                        }
                        else
                        {
                            // A word separator. Break out of the loop.
                            break;
                        }
                    }

                    int count = i - lowercaseStart;

                    if (count > 0)
                    {
                        if (hasLowerCase)
                        {
                            if (lowercaseData == null)
                            {
                                lowercaseData = this.ToLower(str);
                            }
                            result.Append(lowercaseData, lowercaseStart, count);
                        }
                        else
                        {
                            result.Append(str, lowercaseStart, count);
                        }
                    }

                    if (i < str.Length)
                    {
                        // not a letter, just append it
                        i = AddNonLetter(ref result, ref str, i, charLen);
                    }
                }
                else
                {
                    // not a letter, just append it
                    i = AddNonLetter(ref result, ref str, i, charLen);
                }
            }
            return(result.ToString());
        }
Example #28
0
 internal static UnicodeCategory InternalGetUnicodeCategory(string str, int index, out int charLength)
 {
     return(CharUnicodeInfo.InternalGetUnicodeCategory(CharUnicodeInfo.InternalConvertToUtf32(str, index, out charLength)));
 }