internal Charset(UnicodeCategory cat)
 {
     m_cat = cat;
     for (m_generic=char.MinValue;Char.GetUnicodeCategory(m_generic)!=cat;m_generic++)
         ;
     m_chars[m_generic] = true;
 }
Example #2
0
 public void GetUnicodeCategory(string s, UnicodeCategory[] expected)
 {
     for (int i = 0; i < expected.Length; i++)
     {
         Assert.Equal(expected[i], CharUnicodeInfo.GetUnicodeCategory(s, i));
     }
 }
 public static string Filter(this string value, FilterAction matchRule, UnicodeCategory categories)
 {
     return new string((from c in value
                        where matchRule == FilterAction.Keep
                                  ? categories.HasFlag(Char.GetUnicodeCategory(c))
                                  : !categories.HasFlag(Char.GetUnicodeCategory(c))
                        select c).ToArray());
 }
Example #4
0
		public YyLexer(ErrorHandler eh) 
		{
			erh = eh;
#if (GENTIME)
			UsingCat(UnicodeCategory.OtherPunctuation);
			m_gencat = UnicodeCategory.OtherPunctuation;
#endif
			new Tfactory(this,"TOKEN",new TCreator(Tokenfactory));
		}
 private void OnDeserialized(StreamingContext ctx)
 {
     this.strLen = this.endIndex + 1;
     this.currTextElementLen = this.nextTextElementLen;
     if (this.charLen == -1)
     {
         this.uc = CharUnicodeInfo.InternalGetUnicodeCategory(this.str, this.index, out this.charLen);
     }
 }
        private void OnDeserialized(StreamingContext ctx) 
        {
            strLen              = endIndex + 1; 
            currTextElementLen  = nextTextElementLen;

            if (charLen == -1)
            { 
                uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLen);
            } 
        } 
Example #7
0
        private void OnDeserialized(StreamingContext ctx)
        {
            _strLen = _endIndex + 1;
            _currTextElementLen = _nextTextElementLen;

            if (_charLen == -1)
            {
                _uc = CharUnicodeInfo.InternalGetUnicodeCategory(_str, _index, out _charLen);
            }
        }
Example #8
0
 // Returns true iff this Unicode category represents a separator
 private static bool IsCategorySeparator(UnicodeCategory category)
 {
     return(UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.SpaceSeparator, (uint)UnicodeCategory.ParagraphSeparator));
 }
        private static string RemoveCharacters(string text, UnicodeCategory category, StringBuilder builder)
        {
            builder.Clear();
            foreach (var character in text)
            {
                if (Char.GetUnicodeCategory(character) != category)
                {
                    builder.Append(character);
                }
            }

            return builder.ToString();
        }
 /// <summary>Gets a <see cref="BDD"/> that represents the specified <see cref="UnicodeCategory"/>.</summary>
 public static BDD GetCategory(UnicodeCategory category) =>
 Volatile.Read(ref s_categories[(int)category]) ??
 Interlocked.CompareExchange(ref s_categories[(int)category], BDD.Deserialize(UnicodeCategoryRanges.GetSerializedCategory(category)), null) ??
 s_categories[(int)category] !;
Example #11
0
		public void GetAllCharactersInUnicodeCategory_CategoryShouldNotContainCharacter_CategoryDoesNotContainCharacter(UnicodeCategory category, char character)
		{
			Assert.False(CharacterUtils.GetAllCharactersInUnicodeCategory(category).Contains(character));
		}
Example #12
0
        public static void IsWhiteSpace_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.SpaceSeparator,
            UnicodeCategory.LineSeparator,
            UnicodeCategory.ParagraphSeparator
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsWhiteSpace(c.ToString(), 0));

            // Some control chars are also considered whitespace for legacy reasons.
            // if ((c >= '\x0009' && c <= '\x000d') || c == '\x0085')
            Assert.True(char.IsWhiteSpace("\u000b", 0));
            Assert.True(char.IsWhiteSpace("\u0085", 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
            {
                // Need to special case some control chars that are treated as whitespace
                if ((c >= '\x0009' && c <= '\x000d') || c == '\x0085') continue;
                Assert.False(char.IsWhiteSpace(c.ToString(), 0));
            }
        }
Example #13
0
 // Returns true iff this Unicode category represents a punctuation mark
 private static bool IsCategoryPunctuation(UnicodeCategory category)
 {
     return(UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.ConnectorPunctuation, (uint)UnicodeCategory.OtherPunctuation));
 }
 private bool IsLetterCategory(UnicodeCategory uc) {
     return (uc == UnicodeCategory.UppercaseLetter
          || uc == UnicodeCategory.LowercaseLetter
          || uc == UnicodeCategory.TitlecaseLetter
          || uc == UnicodeCategory.ModifierLetter
          || uc == UnicodeCategory.OtherLetter);
 }
 private UnicodeCategoryInfo(UnicodeCategory category, string shortName, string longName)
 {
     this.Category  = category;
     this.ShortName = shortName;
     this.LongName  = longName;
 }
Example #16
0
        /// <summary>
        /// Returns a valid C# identifier for the specified name.
        /// </summary>
        /// <param name="name">
        /// The original entity name specified in the model.
        /// </param>
        /// <returns>
        /// A valid C# identifier based on rules described at
        /// http://msdn.microsoft.com/en-us/library/aa664670.aspx.
        /// </returns>
        public static string Identifier(string name)
        {
            if (name == null)
            {
                throw new ArgumentNullException("name");
            }

            StringBuilder builder = new StringBuilder();

            char c = '\x0000';     // current character within name
            int  i = 0;            // current index within name

            // Skip invalid characters from the beginning of the name
            while (i < name.Length)
            {
                c = name[i++];

                // First character must be a letter or _
                if (Char.IsLetter(c) || c == '_')
                {
                    break;
                }
            }

            if (i <= name.Length)
            {
                builder.Append(c);
            }

            bool capitalizeNext = false;

            // Strip invalid characters from the remainder of the name and convert it to camelCase
            while (i < name.Length)
            {
                c = name[i++];

                // Subsequent character can be a letter, a digit, combining, connecting or formatting character
                UnicodeCategory category = Char.GetUnicodeCategory(c);
                if (!Char.IsLetterOrDigit(c) &&
                    category != UnicodeCategory.SpacingCombiningMark &&
                    category != UnicodeCategory.ConnectorPunctuation &&
                    category != UnicodeCategory.Format)
                {
                    capitalizeNext = true;
                    continue;
                }

                if (capitalizeNext)
                {
                    c = Char.ToUpperInvariant(c);
                    capitalizeNext = false;
                }

                builder.Append(c);
            }

            string identifier = builder.ToString();

            // If identifier is a reserved C# keyword
            if (Array.BinarySearch(reservedKeywords, identifier) >= 0)
            {
                // Convert it to literal identifer
                return("@" + identifier);
            }

            return(identifier);
        }
Example #17
0
        /// <summary>
        /// 处理文本
        /// </summary>
        private void Process()
        {
            // 清空 Tokens 序列
            m_tokens = new List <Token>();
            // 清空逆向分词结果列表
            m_list_backward = new List <string>();
            // 清空正向分词结果列表
            m_list_forward = new List <string>();
            // 清空最终结果列表
            m_list_final = new List <string>();
            // 清空语句缓冲
            m_buf_char = new char[m_buf_size];
            // 设置当前处理位置为开始位置
            m_pos = 0;
            // 获取文本长度
            int len = m_text.Length;
            // 设置当前处于英文语句中的状态为 true
            bool in_english = true;
            // 定义当前处理字符及其分类的变量
            char            chr;
            UnicodeCategory cat;
            UnicodeCategory cat_last = UnicodeCategory.SpaceSeparator;

            // 逐字符处理文本
            while (m_pos < len)
            {
                // 获取当前字符
                chr = m_text[m_pos];
                // 获取当前字符的 Unicode 分类
                cat = Char.GetUnicodeCategory(chr);
                Debug("m_pos = {0}, chr = {1} ({2})", m_pos, chr, cat);
                // 根据字符类别的不同进行处理
                switch (cat)
                {
                /************ 中日韩字符 ************/
                case UnicodeCategory.OtherLetter:     // 字母,但不是大写字母、小写字母、词首字母大写或修饰符字母
                    if (in_english)
                    {
                        // 如果当前处于英文语句中,先处理当前语句
                        this.ProcessEnglish();
                        in_english = false;
                    }
                    // 将当前字符添加到语句缓冲中
                    m_buf_char[m_buf_len++] = chr;
                    // 如果语句缓冲已满
                    if (m_buf_len == m_buf_size)
                    {
                        // 处理当前语句
                        this.ProcessChinese();
                    }
                    break;

                /*********** 有意义的字符 ***********/
                case UnicodeCategory.LowercaseLetter:    // 小写字母
                case UnicodeCategory.UppercaseLetter:    // 大写字母
                case UnicodeCategory.TitlecaseLetter:    // 词首字母大写字母
                case UnicodeCategory.DecimalDigitNumber: // 十进制数字
                case UnicodeCategory.LetterNumber:       // 由字母表示的数字
                    /*
                     * case UnicodeCategory.ConnectorPunctuation: // 连接两个字符的连接符标点
                     */
                    if (!in_english)
                    {
                        // 如果当前处于中文语句中,先处理当前语句
                        this.ProcessChinese();
                        in_english = true;
                    }
                    // 将当前字符添加到语句缓冲中
                    m_buf_char[m_buf_len++] = chr;
                    // 如果语句缓冲已满
                    if (m_buf_len == m_buf_size)
                    {
                        // 处理当前语句
                        this.ProcessEnglish();
                    }
                    break;

                /*********** 无意义的字符 ***********/
                default:
                    if (in_english)
                    {
                        this.ProcessEnglish();
                    }
                    else
                    {
                        this.ProcessChinese();
                    }
                    break;
                }
                // 记忆当前字符的类别
                cat_last = cat;
                // 前进一个字符
                m_pos++;
            }
            // 如果语句缓冲中有剩余内容,进行处理
            if (in_english)
            {
                this.ProcessEnglish();
            }
            else
            {
                this.ProcessChinese();
            }
#if DEBUG
            Console.Write("Result: ");
            Token[] arr = m_tokens.ToArray();
            for (int i = 0; i < arr.Length; i++)
            {
                Console.Write(arr[i].Text);
                Console.Write('/');
            }
            Console.WriteLine();
#endif
        }
Example #18
0
        private static bool IsValidFirstIdentifierChar(char c)
        {
            UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c);

            return(s_firstIdentifierCharCategories.Contains(category));
        }
        internal static string GetUnicodeCategoryString(UnicodeCategory target)
        {
            switch (target)
            {
            case UnicodeCategory.Control:
                return(":Cc:");

            case UnicodeCategory.Format:
                return(":Cf:");

            case UnicodeCategory.OtherNotAssigned:
                return(":Cn:");

            case UnicodeCategory.PrivateUse:
                return(":Co:");

            case UnicodeCategory.Surrogate:
                return(":Cs:");

            case UnicodeCategory.LowercaseLetter:
                return(":Ll:");

            case UnicodeCategory.ModifierLetter:
                return(":Lm:");

            case UnicodeCategory.OtherLetter:
                return(":Lo:");

            case UnicodeCategory.TitlecaseLetter:
                return(":Lt:");

            case UnicodeCategory.UppercaseLetter:
                return(":Lu:");

            case UnicodeCategory.EnclosingMark:
                return(":Me:");

            case UnicodeCategory.NonSpacingMark:
                return(":Mn:");

            case UnicodeCategory.SpacingCombiningMark:
                return(":Mc:");

            case UnicodeCategory.DecimalDigitNumber:
                return(":Nd:");

            case UnicodeCategory.LetterNumber:
                return(":Nl:");

            case UnicodeCategory.OtherNumber:
                return(":No:");

            case UnicodeCategory.ConnectorPunctuation:
                return(":Pc:");

            case UnicodeCategory.ClosePunctuation:
                return(":Pe:");

            case UnicodeCategory.DashPunctuation:
                return(":Pd:");

            case UnicodeCategory.FinalQuotePunctuation:
                return(":Pf:");

            case UnicodeCategory.OpenPunctuation:
                return(":Ps:");

            case UnicodeCategory.OtherPunctuation:
                return(":Po:");

            case UnicodeCategory.InitialQuotePunctuation:
                return(":Pi:");

            case UnicodeCategory.CurrencySymbol:
                return(":Sc:");

            case UnicodeCategory.ModifierSymbol:
                return(":Sk:");

            case UnicodeCategory.MathSymbol:
                return(":Sm:");

            case UnicodeCategory.OtherSymbol:
                return(":So:");

            case UnicodeCategory.LineSeparator:
                return(":Zl:");

            case UnicodeCategory.ParagraphSeparator:
                return(":Zp:");

            case UnicodeCategory.SpaceSeparator:
                return(":Zs:");
            }
            return(null);
        }
 /// <summary>Gets an <see cref="UnicodeCategoryInfo"/> value providing information on the specified unicode category.</summary>
 /// <param name="category">The category on which information should be retrieved.</param>
 /// <returns>Information on the specified category.</returns>
 public static UnicodeCategoryInfo Get(UnicodeCategory category)
 {
     return(categories[(int)category]);
 }
 private static bool CharInCategoryGroup(char ch, UnicodeCategory chcategory, string category, ref int i)
 {
     i++;
     int num = (short) category[i];
     if (num > 0)
     {
         bool flag = false;
         while (num != 0)
         {
             if (!flag)
             {
                 num--;
                 if ((int)chcategory == num)
                 {
                     flag = true;
                 }
             }
             i++;
             num = (short) category[i];
         }
         return flag;
     }
     bool flag2 = true;
     while (num != 0)
     {
         if (flag2)
         {
             num = -num;
             num--;
             if ((int)chcategory == num)
             {
                 flag2 = false;
             }
         }
         i++;
         num = (short) category[i];
     }
     return flag2;
 }
Example #22
0
        public static void IsPunctuation_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.ConnectorPunctuation,
            UnicodeCategory.DashPunctuation,
            UnicodeCategory.OpenPunctuation,
            UnicodeCategory.ClosePunctuation,
            UnicodeCategory.InitialQuotePunctuation,
            UnicodeCategory.FinalQuotePunctuation,
            UnicodeCategory.OtherPunctuation
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsPunctuation(c.ToString(), 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsPunctuation(c.ToString(), 0));
        }
    protected UnicodeCategory TranslateUnicodeCategory(string strCatAbbrev)
    {
        UnicodeCategory eCategory = (UnicodeCategory)(-1);

        if ((strCatAbbrev == null) || (strCatAbbrev.Length == 0))
        {
            throw new ArgumentException("String null/empty", "strCatAbbrev");
        }
        switch (strCatAbbrev)
        {
        case "Lu":
            eCategory = UnicodeCategory.UppercaseLetter;
            break;

        case "Ll":
            eCategory = UnicodeCategory.LowercaseLetter;
            break;

        case "Lt":
            eCategory = UnicodeCategory.TitlecaseLetter;
            break;

        case "Mn":
            eCategory = UnicodeCategory.NonSpacingMark;
            break;

        case "Mc":
            eCategory = UnicodeCategory.SpacingCombiningMark;
            break;

        case "Me":
            eCategory = UnicodeCategory.EnclosingMark;
            break;

        case "Nd":
            eCategory = UnicodeCategory.DecimalDigitNumber;
            break;

        case "Nl":
            eCategory = UnicodeCategory.LetterNumber;
            break;

        case "No":
            eCategory = UnicodeCategory.OtherNumber;
            break;

        case "Zs":
            eCategory = UnicodeCategory.SpaceSeparator;
            break;

        case "Zl":
            eCategory = UnicodeCategory.LineSeparator;
            break;

        case "Zp":
            eCategory = UnicodeCategory.ParagraphSeparator;
            break;

        case "Cc":
            eCategory = UnicodeCategory.Control;
            break;

        case "Cf":
            eCategory = UnicodeCategory.Format;
            break;

        case "Cs":
            eCategory = UnicodeCategory.Surrogate;
            break;

        case "Co":
            eCategory = UnicodeCategory.PrivateUse;
            break;

        case "Cn":
            eCategory = UnicodeCategory.OtherNotAssigned;
            break;

        case "Lm":
            eCategory = UnicodeCategory.ModifierLetter;
            break;

        case "Lo":
            eCategory = UnicodeCategory.OtherLetter;
            break;

        case "Pc":
            eCategory = UnicodeCategory.ConnectorPunctuation;
            break;

        case "Pd":
            eCategory = UnicodeCategory.DashPunctuation;
            break;

        case "Ps":
            eCategory = UnicodeCategory.OpenPunctuation;
            break;

        case "Pe":
            eCategory = UnicodeCategory.ClosePunctuation;
            break;

        case "Pi":
            eCategory = UnicodeCategory.InitialQuotePunctuation;
            break;

        case "Pf":
            eCategory = UnicodeCategory.FinalQuotePunctuation;
            break;

        case "Po":
            eCategory = UnicodeCategory.OtherPunctuation;
            break;

        case "Sm":
            eCategory = UnicodeCategory.MathSymbol;
            break;

        case "Sc":
            eCategory = UnicodeCategory.CurrencySymbol;
            break;

        case "Sk":
            eCategory = UnicodeCategory.ModifierSymbol;
            break;

        case "So":
            eCategory = UnicodeCategory.OtherSymbol;
            break;

        default:
            throw new NotSupportedException
                      (UNSUPPORTED_CATEGORY + strCatAbbrev);
        }
        return(eCategory);
    }
Example #24
0
        private string Slugify(FillSlugContext slugContext)
        {
            _slugEventHandler.FillingSlugFromTitle(slugContext);

            if (!slugContext.Adjusted)
            {
                string stFormKD = slugContext.Title.ToLower().Normalize(NormalizationForm.FormKD);
                var    sb       = new StringBuilder();

                foreach (char t in stFormKD)
                {
                    // Allowed symbols
                    if (t == '-' || t == '_' || t == '~')
                    {
                        sb.Append(t);
                        continue;
                    }

                    UnicodeCategory uc = CharUnicodeInfo.GetUnicodeCategory(t);
                    switch (uc)
                    {
                    case UnicodeCategory.LowercaseLetter:
                    case UnicodeCategory.OtherLetter:
                    case UnicodeCategory.DecimalDigitNumber:
                        // Keep letters and digits
                        sb.Append(t);
                        break;

                    case UnicodeCategory.NonSpacingMark:
                        // Remove diacritics
                        break;

                    default:
                        // Replace all other chars with dash
                        sb.Append('-');
                        break;
                    }
                }

                slugContext.Slug = sb.ToString().Normalize(NormalizationForm.FormC);

                // Simplifies dash groups
                for (int i = 0; i < slugContext.Slug.Length - 1; i++)
                {
                    if (slugContext.Slug[i] == '-')
                    {
                        int j = 0;
                        while (i + j + 1 < slugContext.Slug.Length && slugContext.Slug[i + j + 1] == '-')
                        {
                            j++;
                        }
                        if (j > 0)
                        {
                            slugContext.Slug = slugContext.Slug.Remove(i + 1, j);
                        }
                    }
                }

                if (slugContext.Slug.Length > 1000)
                {
                    slugContext.Slug = slugContext.Slug.Substring(0, 1000);
                }

                slugContext.Slug = slugContext.Slug.Trim('-', '_', '.');
            }

            _slugEventHandler.FilledSlugFromTitle(slugContext);

            return(slugContext.Slug);
        }
Example #25
0
        public static void IsLetterOrDigit_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.UppercaseLetter,
            UnicodeCategory.LowercaseLetter,
            UnicodeCategory.TitlecaseLetter,
            UnicodeCategory.ModifierLetter,
            UnicodeCategory.OtherLetter,
            UnicodeCategory.DecimalDigitNumber
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsLetterOrDigit(c.ToString(), 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsLetterOrDigit(c.ToString(), 0));
        }
Example #26
0
        public void GetUnicodeCategory(char ch, UnicodeCategory expected)
        {
            UnicodeCategory actual = CharUnicodeInfo.GetUnicodeCategory(ch);

            Assert.True(actual == expected, ErrorMessage(ch, expected, actual));
        }
Example #27
0
        public static void IsSymbol_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.MathSymbol,
            UnicodeCategory.ModifierSymbol,
            UnicodeCategory.CurrencySymbol,
            UnicodeCategory.OtherSymbol
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsSymbol(c.ToString(), 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsSymbol(c.ToString(), 0));
        }
Example #28
0
 // Returns true iff this Unicode category represents a symbol
 private static bool IsCategorySymbol(UnicodeCategory category)
 {
     return(UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.MathSymbol, (uint)UnicodeCategory.OtherSymbol));
 }
 internal static bool IsCombiningCategory(UnicodeCategory uc) {
     Contract.Assert(uc >= 0, "uc >= 0");
     return (
         uc == UnicodeCategory.NonSpacingMark ||
         uc == UnicodeCategory.SpacingCombiningMark ||
         uc == UnicodeCategory.EnclosingMark
     );
 }
Example #30
0
		static CharacterClass GetCharacterClass(UnicodeCategory c)
		{
			switch (c) {
				case UnicodeCategory.SpaceSeparator:
				case UnicodeCategory.LineSeparator:
				case UnicodeCategory.ParagraphSeparator:
				case UnicodeCategory.Control:
					return CharacterClass.Whitespace;
				case UnicodeCategory.UppercaseLetter:
				case UnicodeCategory.LowercaseLetter:
				case UnicodeCategory.TitlecaseLetter:
				case UnicodeCategory.ModifierLetter:
				case UnicodeCategory.OtherLetter:
				case UnicodeCategory.DecimalDigitNumber:
					return CharacterClass.IdentifierPart;
				case UnicodeCategory.NonSpacingMark:
				case UnicodeCategory.SpacingCombiningMark:
				case UnicodeCategory.EnclosingMark:
					return CharacterClass.CombiningMark;
				default:
					return CharacterClass.Other;
			}
		}
Example #31
0
            public void ShouldReturnExpectedCategory_ForCodePoint(CodePoint codepoint, UnicodeCategory expected)
            {
                // Act
                var result = codepoint.Category;

                // Assert
                Assert.Equal(expected, result);
            }
        // input: a string like loadByWhateverStuff
        // output: a string like Load By Whatever Stuff
        // BBKing -> BBKing
        // BBOKing -> BboKing
        // LoadBy25Years -> Load By 25 Years
        // SoftFluent.PetShop -> Soft Fluent. Pet Shop
        // Data2_FileName -> Data 2 File Name
        // _WhatIs -> _What is
        // __WhatIs -> __What is
        // __What__Is -> __What is
        // MyParam1 -> My Param 1
        // MyParam1Baby -> My Param1 Baby (if DontDecamelizeNumbers)
        public virtual string Decamelize(string text, DecamelizeOptions options)
        {
            if (string.IsNullOrEmpty(text))
            {
                return(text);
            }

            if (options == null)
            {
                options = new DecamelizeOptions();
            }

            StringBuilder sb = new StringBuilder(text.Length);

            // 0=lower, 1=upper, 2=special char
            UnicodeCategory lastCategory = CharUnicodeInfo.GetUnicodeCategory(text[0]);
            UnicodeCategory prevCategory = lastCategory;

            if (lastCategory == UnicodeCategory.UppercaseLetter)
            {
                lastCategory = UnicodeCategory.LowercaseLetter;
            }

            int  i = 0;
            bool firstIsStillUnderscore = (text[0] == '_');

            if (((options.TextOptions & DecamelizeTextOptions.UnescapeUnicode) == DecamelizeTextOptions.UnescapeUnicode) && (CanUnicodeEscape(text, 0)))
            {
                sb.Append(GetUnicodeEscape(text, ref i));
            }
            else if (((options.TextOptions & DecamelizeTextOptions.UnescapeHexadecimal) == DecamelizeTextOptions.UnescapeHexadecimal) && (CanHexadecimalEscape(text, 0)))
            {
                sb.Append(GetHexadecimalEscape(text, ref i));
            }
            else
            {
                if ((options.TextOptions & DecamelizeTextOptions.ForceFirstUpper) == DecamelizeTextOptions.ForceFirstUpper)
                {
                    sb.Append(Char.ToUpper(text[0]));
                }
                else
                {
                    sb.Append(text[0]);
                }
            }
            bool separated  = false;
            bool keepFormat = (options.TextOptions & DecamelizeTextOptions.KeepFormattingIndices) == DecamelizeTextOptions.KeepFormattingIndices;

            for (i++; i < text.Length; i++)
            {
                char c = text[i];
                if (((options.TextOptions & DecamelizeTextOptions.UnescapeUnicode) == DecamelizeTextOptions.UnescapeUnicode) && (CanUnicodeEscape(text, i)))
                {
                    sb.Append(GetUnicodeEscape(text, ref i));
                    separated = true;
                }
                else if (((options.TextOptions & DecamelizeTextOptions.UnescapeHexadecimal) == DecamelizeTextOptions.UnescapeHexadecimal) && (CanHexadecimalEscape(text, i)))
                {
                    sb.Append(GetHexadecimalEscape(text, ref i));
                    separated = true;
                }
                else if (c == '_')
                {
                    if ((!firstIsStillUnderscore) || ((options.TextOptions & DecamelizeTextOptions.KeepFirstUnderscores) != DecamelizeTextOptions.KeepFirstUnderscores))
                    {
                        sb.Append(' ');
                        separated = true;
                    }
                    else
                    {
                        sb.Append(c);
                    }
                }
                else
                {
                    UnicodeCategory category = CharUnicodeInfo.GetUnicodeCategory(c);
                    switch (category)
                    {
                    case UnicodeCategory.ClosePunctuation:
                    case UnicodeCategory.ConnectorPunctuation:
                    case UnicodeCategory.DashPunctuation:
                    case UnicodeCategory.EnclosingMark:
                    case UnicodeCategory.FinalQuotePunctuation:
                    case UnicodeCategory.Format:
                    case UnicodeCategory.InitialQuotePunctuation:
                    case UnicodeCategory.LineSeparator:
                    case UnicodeCategory.OpenPunctuation:
                    case UnicodeCategory.OtherPunctuation:
                    case UnicodeCategory.ParagraphSeparator:
                    case UnicodeCategory.SpaceSeparator:
                    case UnicodeCategory.SpacingCombiningMark:
                        if ((keepFormat) && (c == '{'))
                        {
                            while (c != '}')
                            {
                                c = text[i++];
                                sb.Append(c);
                            }
                            i--;
                            separated = true;
                            break;
                        }

                        if ((options.TextOptions & DecamelizeTextOptions.ForceRestLower) == DecamelizeTextOptions.ForceRestLower)
                        {
                            sb.Append(Char.ToLower(c));
                        }
                        else
                        {
                            sb.Append(c);
                        }
                        sb.Append(' ');
                        separated = true;
                        break;

                    case UnicodeCategory.LetterNumber:
                    case UnicodeCategory.DecimalDigitNumber:
                    case UnicodeCategory.OtherNumber:

                    case UnicodeCategory.CurrencySymbol:
                    case UnicodeCategory.LowercaseLetter:
                    case UnicodeCategory.MathSymbol:
                    case UnicodeCategory.ModifierLetter:
                    case UnicodeCategory.ModifierSymbol:
                    case UnicodeCategory.NonSpacingMark:
                    case UnicodeCategory.OtherLetter:
                    case UnicodeCategory.OtherNotAssigned:
                    case UnicodeCategory.Control:
                    case UnicodeCategory.OtherSymbol:
                    case UnicodeCategory.Surrogate:
                    case UnicodeCategory.PrivateUse:
                    case UnicodeCategory.TitlecaseLetter:
                    case UnicodeCategory.UppercaseLetter:
                        if (((category != lastCategory) && (c != ' ')) && (IsNewCategory(category, options)))
                        {
                            if ((!separated) && (prevCategory != UnicodeCategory.UppercaseLetter) &&
                                ((!firstIsStillUnderscore) || ((options.TextOptions & DecamelizeTextOptions.KeepFirstUnderscores) != DecamelizeTextOptions.KeepFirstUnderscores)))
                            {
                                sb.Append(' ');
                            }

                            if ((options.TextOptions & DecamelizeTextOptions.ForceRestLower) != 0)
                            {
                                sb.Append(Char.ToLower(c));
                            }
                            else
                            {
                                sb.Append(Char.ToUpper(c));
                            }

                            char upper = Char.ToUpper(c);
                            category     = CharUnicodeInfo.GetUnicodeCategory(upper);
                            lastCategory = category == UnicodeCategory.UppercaseLetter ? UnicodeCategory.LowercaseLetter : category;
                        }
                        else
                        {
                            if ((options.TextOptions & DecamelizeTextOptions.ForceRestLower) == DecamelizeTextOptions.ForceRestLower)
                            {
                                sb.Append(Char.ToLower(c));
                            }
                            else
                            {
                                sb.Append(c);
                            }
                        }
                        separated = false;
                        break;
                    }
                    firstIsStillUnderscore = firstIsStillUnderscore && (c == '_');
                    prevCategory           = category;
                }
            }

            if ((options.TextOptions & DecamelizeTextOptions.ReplaceSpacesByUnderscore) == DecamelizeTextOptions.ReplaceSpacesByUnderscore)
            {
                return(sb.Replace(' ', '_').ToString());
            }

            if ((options.TextOptions & DecamelizeTextOptions.ReplaceSpacesByMinus) == DecamelizeTextOptions.ReplaceSpacesByMinus)
            {
                return(sb.Replace(' ', '-').ToString());
            }

            if ((options.TextOptions & DecamelizeTextOptions.ReplaceSpacesByDot) == DecamelizeTextOptions.ReplaceSpacesByDot)
            {
                return(sb.Replace(' ', '.').ToString());
            }

            return(sb.ToString());
        }
Example #33
0
        static int GetNextTextElementLength(string str, int index)
        {
            if (str == null)
            {
                throw new ArgumentNullException("string is null");
            }

#if NET_2_0
            if (index >= str.Length)
            {
                return(0);
            }
            if (index < 0)
#else
            if (index < 0 || index >= str.Length)
#endif
            { throw new ArgumentOutOfRangeException("Index is not valid"); }

            /* Find the next base character, surrogate
             * pair or combining character sequence
             */

            char            ch  = str[index];
            UnicodeCategory cat = char.GetUnicodeCategory(ch);

            if (cat == UnicodeCategory.Surrogate)
            {
                /* Check that it's a high surrogate
                 * followed by a low surrogate
                 */
                if (ch >= 0xD800 && ch <= 0xDBFF)
                {
                    if ((index + 1) < str.Length &&
                        str[index + 1] >= 0xDC00 &&
                        str[index + 1] <= 0xDFFF)
                    {
                        /* A valid surrogate pair */
                        return(2);
                    }
                    else
                    {
                        /* High surrogate on its own */
                        return(1);
                    }
                }
                else
                {
                    /* Low surrogate on its own */
                    return(1);
                }
            }
            else
            {
                /* Look for a base character, which
                 * may or may not be followed by a
                 * series of combining characters
                 */

                if (cat == UnicodeCategory.NonSpacingMark ||
                    cat == UnicodeCategory.SpacingCombiningMark ||
                    cat == UnicodeCategory.EnclosingMark)
                {
                    /* Not a base character */
                    return(1);
                }

                int count = 1;

                while (index + count < str.Length)
                {
                    cat = char.GetUnicodeCategory(str[index + count]);
                    if (cat != UnicodeCategory.NonSpacingMark &&
                        cat != UnicodeCategory.SpacingCombiningMark &&
                        cat != UnicodeCategory.EnclosingMark)
                    {
                        /* Finished the sequence */
                        break;
                    }
                    count++;
                }

                return(count);
            }
        }
 // Token: 0x06002CDF RID: 11487 RVA: 0x000AB4FE File Offset: 0x000A96FE
 internal static bool IsCombiningCategory(UnicodeCategory uc)
 {
     return(uc == UnicodeCategory.NonSpacingMark || uc == UnicodeCategory.SpacingCombiningMark || uc == UnicodeCategory.EnclosingMark);
 }
Example #35
0
		private static bool IsCategory (UnicodeCategory uc, char c) {
			if (Char.GetUnicodeCategory (c) == uc)
				return true;

			return false;
		}
        // Token: 0x06002CCC RID: 11468 RVA: 0x000AB234 File Offset: 0x000A9434
        internal static bool IsWhiteSpace(string s, int index)
        {
            UnicodeCategory unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(s, index);

            return(unicodeCategory - UnicodeCategory.SpaceSeparator <= 2);
        }
Example #37
0
		public void GetAllCharactersInUnicodeCategory_CategoryShouldContainCharacter_CategoryContainsCharacter(UnicodeCategory category, char character)
		{
			Assert.True(CharacterUtils.GetAllCharactersInUnicodeCategory(category).Contains(character));
		}
        // Token: 0x06002CCD RID: 11469 RVA: 0x000AB254 File Offset: 0x000A9454
        internal static bool IsWhiteSpace(char c)
        {
            UnicodeCategory unicodeCategory = CharUnicodeInfo.GetUnicodeCategory(c);

            return(unicodeCategory - UnicodeCategory.SpaceSeparator <= 2);
        }
Example #39
0
		static bool IsSeparator (UnicodeCategory category)
		{
			switch (category) {
			case UnicodeCategory.SpaceSeparator:
			case UnicodeCategory.LineSeparator:
			case UnicodeCategory.ParagraphSeparator:
			case UnicodeCategory.Control:
			case UnicodeCategory.Format:
			case UnicodeCategory.ConnectorPunctuation:
			case UnicodeCategory.DashPunctuation:
			case UnicodeCategory.OpenPunctuation:
			case UnicodeCategory.ClosePunctuation:
			case UnicodeCategory.InitialQuotePunctuation:
			case UnicodeCategory.FinalQuotePunctuation:
			case UnicodeCategory.OtherPunctuation:
				return true;
			}

			return false;
		}
Example #40
0
            /* false */ (0 << 29);  // OtherNotAssigned = 29;

        private static bool IsWordSeparator(UnicodeCategory category)
        {
            return((wordSeparatorMask & (1 << (int)category)) != 0);
        }
Example #41
0
 public void Reset()
 {
     _index = _startIndex;
     if (_index < _strLen)
     {
         // If we have more than 1 character, get the category of the current char.
         _uc = CharUnicodeInfo.InternalGetUnicodeCategory(_str, _index, out _charLen);
     }
 }
Example #42
0
 private static bool IsInRange(UnicodeCategory c, UnicodeCategory min, UnicodeCategory max) => (uint)(c - min) <= (uint)(max - min);
 /// <summary>Builds an AST that active any codepoint in a <see cref="UnicodeCategory"/>.</summary>
 /// <param name="cat">The general category.</param>
 private CategoryAST(UnicodeCategory cat)
     : base()
 {
     Category = cat;
 }
Example #44
0
 /*=================================CheckLetter=====================================
 ** Check if the specified UnicodeCategory belongs to the letter categories.
 ** ==============================================================================*/
 internal static bool CheckLetter(UnicodeCategory uc)
 {
     return(IsInRange(uc, UnicodeCategory.UppercaseLetter, UnicodeCategory.OtherLetter));
 }
 [System.Security.SecuritySafeCritical]  // auto-generated
 public void Reset() 
 { 
     index = startIndex;
     if (index < strLen) { 
         // If we have more than 1 character, get the category of the current char.
         uc = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out charLen);
     }
 } 
Example #46
0
 internal static bool CheckPunctuation(UnicodeCategory uc)
 {
     return(IsInRange(uc, UnicodeCategory.ConnectorPunctuation, UnicodeCategory.OtherPunctuation));
 }
            /* false */ (0 << 29);  // OtherNotAssigned = 29;

        private bool IsWordSeparator(UnicodeCategory category) {
            return (wordSeparatorMask & (1 << (int)category)) != 0;
        }
Example #48
0
 /*=================================CheckLetterOrDigit=====================================
 ** Check if the specified UnicodeCategory belongs to the letter or digit categories.
 ** ==============================================================================*/
 internal static bool CheckLetterOrDigit(UnicodeCategory uc)
 {
     return(CheckLetter(uc) || uc == UnicodeCategory.DecimalDigitNumber);
 }
Example #49
0
        public static void IsLetter_Char()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.UppercaseLetter,
            UnicodeCategory.LowercaseLetter,
            UnicodeCategory.TitlecaseLetter,
            UnicodeCategory.ModifierLetter,
            UnicodeCategory.OtherLetter
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsLetter(c));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsLetter(c));
        }
Example #50
0
        /*=================================CheckNumber=====================================
        ** Check if the specified UnicodeCategory belongs to the number categories.
        ** ==============================================================================*/

        internal static bool CheckNumber(UnicodeCategory uc)
        {
            return(IsInRange(uc, UnicodeCategory.DecimalDigitNumber, UnicodeCategory.OtherNumber));
        }
Example #51
0
        public static void IsNumber_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.DecimalDigitNumber,
            UnicodeCategory.LetterNumber,
            UnicodeCategory.OtherNumber
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsNumber(c.ToString(), 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsNumber(c.ToString(), 0));
        }
Example #52
0
        /*================================= CheckSeparator ============================
        ** Check if the specified UnicodeCategory belongs to the separator categories.
        ** ==============================================================================*/

        internal static bool CheckSeparator(UnicodeCategory uc)
        {
            return(IsInRange(uc, UnicodeCategory.SpaceSeparator, UnicodeCategory.ParagraphSeparator));
        }
Example #53
0
        public static void IsSeparator_String_Int()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.SpaceSeparator,
            UnicodeCategory.LineSeparator,
            UnicodeCategory.ParagraphSeparator
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsSeparator(c.ToString(), 0));

            foreach (char c in GetTestCharsNotInCategory(categories))
                Assert.False(char.IsSeparator(c.ToString(), 0));
        }
Example #54
0
        /*================================= CheckSymbol ============================
        ** Check if the specified UnicodeCategory belongs to the symbol categories.
        ** ==============================================================================*/

        internal static bool CheckSymbol(UnicodeCategory uc)
        {
            return(IsInRange(uc, UnicodeCategory.MathSymbol, UnicodeCategory.OtherSymbol));
        }
Example #55
0
        public static void IsWhitespace_Char()
        {
            var categories = new UnicodeCategory[]
            {
            UnicodeCategory.SpaceSeparator,
            UnicodeCategory.LineSeparator,
            UnicodeCategory.ParagraphSeparator
            };
            foreach (char c in GetTestChars(categories))
                Assert.True(char.IsWhiteSpace(c));

            foreach (char c in GetTestCharsNotInCategory(categories))
            {
                // Need to special case some control chars that are treated as whitespace
                if ((c >= '\x0009' && c <= '\x000d') || c == '\x0085') continue;
                Assert.False(char.IsWhiteSpace(c));
            }
        }
Example #56
0
 private static bool IsSpaceCategory(UnicodeCategory c)
 {
     return(c == UnicodeCategory.SpaceSeparator || c == UnicodeCategory.Control || c == UnicodeCategory.LineSeparator || c == UnicodeCategory.ParagraphSeparator);
 }
Example #57
0
 public CharInfo()
   {
   chChar = (char) 0;
   eCategory = (UnicodeCategory)(-1);
   dNumericValue = -1;     
   }
 public CharInfo()
 {
     chChar        = (char)0;
     eCategory     = (UnicodeCategory)(-1);
     dNumericValue = -1;
 }
Example #59
0
        ////////////////////////////////////////////////////////////////////////
        //
        // Get the code point count of the current text element.
        //
        // A combining class is defined as:
        //      A character/surrogate that has the following Unicode category:
        //      * NonSpacingMark (e.g. U+0300 COMBINING GRAVE ACCENT)
        //      * SpacingCombiningMark (e.g. U+ 0903 DEVANGARI SIGN VISARGA)
        //      * EnclosingMark (e.g. U+20DD COMBINING ENCLOSING CIRCLE)
        //
        // In the context of GetNextTextElement() and ParseCombiningCharacters(), a text element is defined as:
        //
        //  1. If a character/surrogate is in the following category, it is a text element.  
        //     It can NOT further combine with characters in the combinging class to form a text element.
        //      * one of the Unicode category in the combinging class
        //      * UnicodeCategory.Format
        //      * UnicodeCateogry.Control
        //      * UnicodeCategory.OtherNotAssigned
        //  2. Otherwise, the character/surrogate can be combined with characters in the combinging class to form a text element.
        //
        //  Return:
        //      The length of the current text element
        //
        //  Parameters:
        //      String str
        //      index   The starting index
        //      len     The total length of str (to define the upper boundary)
        //      ucCurrent   The Unicode category pointed by Index.  It will be updated to the uc of next character if this is not the last text element.
        //      currentCharCount    The char count of an abstract char pointed by Index.  It will be updated to the char count of next abstract character if this is not the last text element.
        //
        ////////////////////////////////////////////////////////////////////////

        internal static int GetCurrentTextElementLen(string str, int index, int len, ref UnicodeCategory ucCurrent, ref int currentCharCount)
        {
            Debug.Assert(index >= 0 && len >= 0, "StringInfo.GetCurrentTextElementLen() : index = " + index + ", len = " + len);
            Debug.Assert(index < len, "StringInfo.GetCurrentTextElementLen() : index = " + index + ", len = " + len);
            if (index + currentCharCount == len)
            {
                // This is the last character/surrogate in the string.
                return (currentCharCount);
            }

            // Call an internal GetUnicodeCategory, which will tell us both the unicode category, and also tell us if it is a surrogate pair or not.
            int nextCharCount;
            UnicodeCategory ucNext = CharUnicodeInfo.InternalGetUnicodeCategory(str, index + currentCharCount, out nextCharCount);
            if (CharUnicodeInfo.IsCombiningCategory(ucNext))
            {
                // The next element is a combining class.
                // Check if the current text element to see if it is a valid base category (i.e. it should not be a combining category,
                // not a format character, and not a control character).

                if (CharUnicodeInfo.IsCombiningCategory(ucCurrent)
                    || (ucCurrent == UnicodeCategory.Format)
                    || (ucCurrent == UnicodeCategory.Control)
                    || (ucCurrent == UnicodeCategory.OtherNotAssigned)
                    || (ucCurrent == UnicodeCategory.Surrogate))    // An unpair high surrogate or low surrogate
                {
                    // Will fall thru and return the currentCharCount
                }
                else
                {
                    int startIndex = index; // Remember the current index.

                    // We have a valid base characters, and we have a character (or surrogate) that is combining.
                    // Check if there are more combining characters to follow.
                    // Check if the next character is a nonspacing character.
                    index += currentCharCount + nextCharCount;

                    while (index < len)
                    {
                        ucNext = CharUnicodeInfo.InternalGetUnicodeCategory(str, index, out nextCharCount);
                        if (!CharUnicodeInfo.IsCombiningCategory(ucNext))
                        {
                            ucCurrent = ucNext;
                            currentCharCount = nextCharCount;
                            break;
                        }
                        index += nextCharCount;
                    }
                    return (index - startIndex);
                }
            }
            // The return value will be the currentCharCount.
            int ret = currentCharCount;
            ucCurrent = ucNext;
            // Update currentCharCount.
            currentCharCount = nextCharCount;
            return (ret);
        }
Example #60
0
 // Returns true iff this Unicode category represents a number
 private static bool IsCategoryNumber(UnicodeCategory category)
 {
     return(UnicodeUtility.IsInRangeInclusive((uint)category, (uint)UnicodeCategory.DecimalDigitNumber, (uint)UnicodeCategory.OtherNumber));
 }