コード例 #1
0
        public static void LoadUnicodeData()
        {
            const string UnicodeDataFileName   = "UnicodeData.txt";
            const string SpecialCasingFileName = "SpecialCasing.txt";

            GetFile(UnicodeDataFileName);

            GetFile(SpecialCasingFileName);

            SortedList <uint, CodePoint>   codePointsByValue = CodePoint.codePointsByValue;
            Dictionary <uint, CodePoint[]> uppercaseMappings = CodePoint.uppercaseMappings;
            Dictionary <uint, CodePoint[]> lowercaseMappings = CodePoint.lowercaseMappings;
            Dictionary <uint, CodePoint[]> titlecaseMappings = CodePoint.titlecaseMappings;

            char[] spaceArray     = new char[] { ' ' };
            char[] semicolonArray = new char[] { ';' };

            #region Process UnicodeData file
            {
                #region Lookup dictionaries
                Dictionary <string, GeneralCategory> generalCategoryLookup;
                {
                    GeneralCategory[] generalCategoryValues = (GeneralCategory[])Enum.GetValues(typeof(GeneralCategory));
                    generalCategoryLookup = new Dictionary <string, GeneralCategory>(generalCategoryValues.Length, StringComparer.Ordinal);
                    for (int i = 0; i < generalCategoryValues.Length; i++)
                    {
                        GeneralCategory generalCategoryValue = generalCategoryValues[i];
                        generalCategoryLookup[generalCategoryValue.ToString("G")] = generalCategoryValue;
                    }
                }
                Dictionary <string, BidiClass> bidiClassLookup;
                {
                    BidiClass[] bidiClassValues = (BidiClass[])Enum.GetValues(typeof(BidiClass));
                    bidiClassLookup = new Dictionary <string, BidiClass>(bidiClassValues.Length, StringComparer.Ordinal);
                    for (int i = 0; i < bidiClassValues.Length; i++)
                    {
                        BidiClass bidiClassValue = bidiClassValues[i];
                        bidiClassLookup[bidiClassValue.ToString("G")] = bidiClassValue;
                    }
                }
                #endregion                 // Lookup dictionaries

                string[] unicodeDataLines = File.ReadAllLines(UnicodeDataFileName, Encoding.UTF8);
                for (int i = 0; i < unicodeDataLines.Length; i++)
                {
                    string unicodeDataLine = unicodeDataLines[i];
                    if (!string.IsNullOrEmpty(unicodeDataLine) && unicodeDataLine[0] != '#')
                    {
                        string[] unicodeDataTokens = unicodeDataLine.Split(semicolonArray);
                        Debug.Assert(unicodeDataTokens.Length >= 15);

                        const int ValueIndex                   = 0;
                        const int NameIndex                    = 1;
                        const int GeneralCategoryIndex         = 2;
                        const int CanonicalCombiningClassIndex = 3;
                        const int BidiClassIndex               = 4;
                        // 5, 6, 7, 8 omitted for the moment
                        const int BidiMirroredIndex           = 9;
                        const int Unicode1NameIndex           = 10;
                        const int IsoCommentIndex             = 11;
                        const int SimpleUppercaseMappingIndex = 12;
                        const int SimpleLowercaseMappingIndex = 13;
                        const int SimpleTitlecaseMappingIndex = 14;

                        uint value = uint.Parse(unicodeDataTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

                        string name = unicodeDataTokens[NameIndex];
                        if (string.IsNullOrEmpty(name))
                        {
                            name = NameDefault;
                        }

                        GeneralCategory generalCategory;
                        {
                            string generalCategoryString = unicodeDataTokens[GeneralCategoryIndex];
                            if (string.IsNullOrEmpty(generalCategoryString))
                            {
                                generalCategory = GeneralCategory.Cn;
                            }
                            else
                            {
                                generalCategory = generalCategoryLookup[generalCategoryString];
                            }
                        }

                        byte canonicalCombiningClass;
                        {
                            string canonicalCombiningClassString = unicodeDataTokens[CanonicalCombiningClassIndex];
                            if (string.IsNullOrEmpty(canonicalCombiningClassString))
                            {
                                canonicalCombiningClass = 0;
                            }
                            else
                            {
                                canonicalCombiningClass = byte.Parse(canonicalCombiningClassString, NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
                            }
                        }

                        BidiClass bidiClass;
                        {
                            string bidiClassString = unicodeDataTokens[BidiClassIndex];
                            if (string.IsNullOrEmpty(bidiClassString))
                            {
                                bidiClass = BidiClass.Invalid;
                            }
                            else
                            {
                                bidiClass = bidiClassLookup[bidiClassString];
                            }
                        }

                        // 5, 6, 7, 8 omitted for the moment

                        bool bidiMirrored = (unicodeDataTokens[BidiMirroredIndex] == "Y");

                        string unicode1Name = unicodeDataTokens[Unicode1NameIndex];
                        if (string.IsNullOrEmpty(unicode1Name))
                        {
                            unicode1Name = null;
                        }

                        string isoComment = unicodeDataTokens[IsoCommentIndex];
                        if (string.IsNullOrEmpty(isoComment))
                        {
                            isoComment = null;
                        }

                        uint?simpleUppercaseMapping;
                        {
                            string simpleUppercaseMappingString = unicodeDataTokens[SimpleUppercaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleUppercaseMappingString))
                            {
                                simpleUppercaseMapping = null;
                            }
                            else
                            {
                                simpleUppercaseMapping = uint.Parse(simpleUppercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleUppercaseMapping.Value == value)
                                {
                                    simpleUppercaseMapping = null;
                                }
                            }
                        }

                        uint?simpleLowercaseMapping;
                        {
                            string simpleLowercaseMappingString = unicodeDataTokens[SimpleLowercaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleLowercaseMappingString))
                            {
                                simpleLowercaseMapping = null;
                            }
                            else
                            {
                                simpleLowercaseMapping = uint.Parse(simpleLowercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleLowercaseMapping.Value == value)
                                {
                                    simpleLowercaseMapping = null;
                                }
                            }
                        }

                        uint?simpleTitlecaseMapping;
                        {
                            string simpleTitlecaseMappingString = unicodeDataTokens[SimpleTitlecaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleTitlecaseMappingString))
                            {
                                simpleTitlecaseMapping = null;
                            }
                            else
                            {
                                simpleTitlecaseMapping = uint.Parse(simpleTitlecaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleTitlecaseMapping.Value == value)
                                {
                                    simpleTitlecaseMapping = null;
                                }
                            }
                        }

                        codePointsByValue[value] = new CodePoint(value, name, generalCategory, canonicalCombiningClass, bidiClass, bidiMirrored, unicode1Name, isoComment, simpleUppercaseMapping, simpleLowercaseMapping, simpleTitlecaseMapping);
                    }
                }
            }
            #endregion             // Process UnicodeData file

            #region Process SpecialCasing file
            {
                List <CodePoint> mappingCodePoints = new List <CodePoint>();

                string[] specialCasingLines = File.ReadAllLines(SpecialCasingFileName, Encoding.UTF8);
                for (int i = 0; i < specialCasingLines.Length; i++)
                {
                    string specialCasingLine = specialCasingLines[i];
                    if (!string.IsNullOrEmpty(specialCasingLine))
                    {
                        int commentStartIndex = specialCasingLine.IndexOf('#');
                        if (commentStartIndex >= 0)
                        {
                            specialCasingLine = specialCasingLine.Remove(commentStartIndex).Trim(spaceArray);
                            if (string.IsNullOrEmpty(specialCasingLine))
                            {
                                continue;
                            }
                        }

                        string[] specialCasingTokens = specialCasingLine.Split(semicolonArray);
                        Debug.Assert(specialCasingTokens.Length >= 4);

                        const int ValueIndex        = 0;
                        const int LowerMappingIndex = 1;
                        const int TitleMappingIndex = 2;
                        const int UpperMappingIndex = 3;
                        const int ConditionIndex    = 4;

                        uint value = uint.Parse(specialCasingTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

                        if (specialCasingTokens.Length >= 5 && !string.IsNullOrEmpty(specialCasingTokens[ConditionIndex].Trim(spaceArray)))
                        {
                            // We don't want any conditional mappings
                            continue;
                        }

                        ProcessTokenStringForCodePoints(value, specialCasingTokens[LowerMappingIndex], spaceArray, mappingCodePoints, lowercaseMappings);
                        ProcessTokenStringForCodePoints(value, specialCasingTokens[TitleMappingIndex], spaceArray, mappingCodePoints, titlecaseMappings);
                        ProcessTokenStringForCodePoints(value, specialCasingTokens[UpperMappingIndex], spaceArray, mappingCodePoints, uppercaseMappings);
                    }
                }
            }
            #endregion             // Process SpecialCasing file
        }
コード例 #2
0
 /// <summary>
 /// Gets a description of the specified Unicode category.
 /// </summary>
 /// <param name="category">An enumerated constant that identifies Unicode category.</param>
 /// <returns></returns>
 public static string GetCategoryDescription(GeneralCategory category)
 {
     switch (category)
     {
         case GeneralCategory.AllControlCharacters:
             return "All control characters. This includes the Cc, Cf, Cs, Co, and Cn categories.";
         case GeneralCategory.AllDiacriticMarks:
             return "All diacritic marks. This includes the Mn, Mc, and Me categories.";
         case GeneralCategory.AllLetterCharacters:
             return "All letter characters. This includes the Lu, Ll, Lt, Lm, and Lo characters.";
         case GeneralCategory.AllNumbers:
             return "All numbers. This includes the Nd, Nl, and No categories.";
         case GeneralCategory.AllPunctuationCharacters:
             return "All punctuation characters. This includes the Pc, Pd, Ps, Pe, Pi, Pf, and Po categories.";
         case GeneralCategory.AllSeparatorCharacters:
             return "All separator characters. This includes the Zs, Zl, and Zp categories.";
         case GeneralCategory.AllSymbols:
             return "All symbols. This includes the Sm, Sc, Sk, and So categories.";
         case GeneralCategory.LetterLowercase:
             return "Letter, Lowercase";
         case GeneralCategory.LetterModifier:
             return "Letter, Modifier";
         case GeneralCategory.LetterOther:
             return "Letter, Other";
         case GeneralCategory.LetterTitlecase:
             return "Letter, Titlecase";
         case GeneralCategory.LetterUppercase:
             return "Letter, Uppercase";
         case GeneralCategory.MarkEnclosing:
             return "Mark, Enclosing";
         case GeneralCategory.MarkNonspacing:
             return "Mark, Nonspacing";
         case GeneralCategory.MarkSpacingCombining:
             return "Mark, Spacing Combining";
         case GeneralCategory.NumberDecimalDigit:
             return "Number, Decimal Digit";
         case GeneralCategory.NumberLetter:
             return "Number, Letter";
         case GeneralCategory.NumberOther:
             return "Number, Other";
         case GeneralCategory.OtherControl:
             return "Other, Control";
         case GeneralCategory.OtherFormat:
             return "Other, Format";
         case GeneralCategory.OtherNotAssigned:
             return "Other, Not Assigned (no characters have this property)";
         case GeneralCategory.OtherPrivateUse:
             return "Other, Private Use";
         case GeneralCategory.OtherSurrogate:
             return "Other, Surrogate";
         case GeneralCategory.PunctuationClose:
             return "Punctuation, Close";
         case GeneralCategory.PunctuationConnector:
             return "Punctuation, Connector";
         case GeneralCategory.PunctuationDash:
             return "Punctuation, Dash";
         case GeneralCategory.PunctuationFinalQuote:
             return "Punctuation, Final quote (may behave like Ps or Pe depending on usage)";
         case GeneralCategory.PunctuationInitialQuote:
             return "Punctuation, Initial quote (may behave like Ps or Pe depending on usage)";
         case GeneralCategory.PunctuationOpen:
             return "Punctuation, Open";
         case GeneralCategory.PunctuationOther:
             return "Punctuation, Other";
         case GeneralCategory.SeparatorLine:
             return "Separator, Line";
         case GeneralCategory.SeparatorParagraph:
             return "Separator, Paragraph";
         case GeneralCategory.SeparatorSpace:
             return "Separator, Space";
         case GeneralCategory.SymbolCurrency:
             return "Symbol, Currency";
         case GeneralCategory.SymbolMath:
             return "Symbol, Math";
         case GeneralCategory.SymbolModifier:
             return "Symbol, Modifier";
         case GeneralCategory.SymbolOther:
             return "Symbol, Other";
         default:
             Debug.Assert(false, category.ToString());
             return "";
     }
 }