Ejemplo n.º 1
0
		public static void LoadUnicodeData()
		{
			const string UnicodeDataFileName = "UnicodeData.txt";
			const string SpecialCasingFileName = "SpecialCasing.txt";

			GetFile(UnicodeDataFileName);

			GetFile(SpecialCasingFileName);

			SortedList<uint, CodePoint> codePointsByValue = CodePoint.codePointsByValue;
			Dictionary<uint, CodePoint[]> uppercaseMappings = CodePoint.uppercaseMappings;
			Dictionary<uint, CodePoint[]> lowercaseMappings = CodePoint.lowercaseMappings;
			Dictionary<uint, CodePoint[]> titlecaseMappings = CodePoint.titlecaseMappings;

			char[] spaceArray = new char[] { ' ' };
			char[] semicolonArray = new char[] { ';' };

			#region Process UnicodeData file
			{
				#region Lookup dictionaries
				Dictionary<string, GeneralCategory> generalCategoryLookup;
				{
					GeneralCategory[] generalCategoryValues = (GeneralCategory[])Enum.GetValues(typeof(GeneralCategory));
					generalCategoryLookup = new Dictionary<string, GeneralCategory>(generalCategoryValues.Length, StringComparer.Ordinal);
					for (int i = 0; i < generalCategoryValues.Length; i++)
					{
						GeneralCategory generalCategoryValue = generalCategoryValues[i];
						generalCategoryLookup[generalCategoryValue.ToString("G")] = generalCategoryValue;
					}
				}
				Dictionary<string, BidiClass> bidiClassLookup;
				{
					BidiClass[] bidiClassValues = (BidiClass[])Enum.GetValues(typeof(BidiClass));
					bidiClassLookup = new Dictionary<string, BidiClass>(bidiClassValues.Length, StringComparer.Ordinal);
					for (int i = 0; i < bidiClassValues.Length; i++)
					{
						BidiClass bidiClassValue = bidiClassValues[i];
						bidiClassLookup[bidiClassValue.ToString("G")] = bidiClassValue;
					}
				}
				#endregion // Lookup dictionaries

				string[] unicodeDataLines = File.ReadAllLines(UnicodeDataFileName, Encoding.UTF8);
				for (int i = 0; i < unicodeDataLines.Length; i++)
				{
					string unicodeDataLine = unicodeDataLines[i];
					if (!string.IsNullOrEmpty(unicodeDataLine) && unicodeDataLine[0] != '#')
					{
						string[] unicodeDataTokens = unicodeDataLine.Split(semicolonArray);
						Debug.Assert(unicodeDataTokens.Length >= 15);

						const int ValueIndex = 0;
						const int NameIndex = 1;
						const int GeneralCategoryIndex = 2;
						const int CanonicalCombiningClassIndex = 3;
						const int BidiClassIndex = 4;
						// 5, 6, 7, 8 omitted for the moment
						const int BidiMirroredIndex = 9;
						const int Unicode1NameIndex = 10;
						const int IsoCommentIndex = 11;
						const int SimpleUppercaseMappingIndex = 12;
						const int SimpleLowercaseMappingIndex = 13;
						const int SimpleTitlecaseMappingIndex = 14;

						uint value = uint.Parse(unicodeDataTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

						string name = unicodeDataTokens[NameIndex];
						if (string.IsNullOrEmpty(name))
						{
							name = NameDefault;
						}

						GeneralCategory generalCategory;
						{
							string generalCategoryString = unicodeDataTokens[GeneralCategoryIndex];
							if (string.IsNullOrEmpty(generalCategoryString))
							{
								generalCategory = GeneralCategory.Cn;
							}
							else
							{
								generalCategory = generalCategoryLookup[generalCategoryString];
							}
						}

						byte canonicalCombiningClass;
						{
							string canonicalCombiningClassString = unicodeDataTokens[CanonicalCombiningClassIndex];
							if (string.IsNullOrEmpty(canonicalCombiningClassString))
							{
								canonicalCombiningClass = 0;
							}
							else
							{
								canonicalCombiningClass = byte.Parse(canonicalCombiningClassString, NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
							}
						}

						BidiClass bidiClass;
						{
							string bidiClassString = unicodeDataTokens[BidiClassIndex];
							if (string.IsNullOrEmpty(bidiClassString))
							{
								bidiClass = BidiClass.Invalid;
							}
							else
							{
								bidiClass = bidiClassLookup[bidiClassString];
							}
						}

						// 5, 6, 7, 8 omitted for the moment

						bool bidiMirrored = (unicodeDataTokens[BidiMirroredIndex] == "Y");

						string unicode1Name = unicodeDataTokens[Unicode1NameIndex];
						if (string.IsNullOrEmpty(unicode1Name))
						{
							unicode1Name = null;
						}

						string isoComment = unicodeDataTokens[IsoCommentIndex];
						if (string.IsNullOrEmpty(isoComment))
						{
							isoComment = null;
						}

						uint? simpleUppercaseMapping;
						{
							string simpleUppercaseMappingString = unicodeDataTokens[SimpleUppercaseMappingIndex];
							if (string.IsNullOrEmpty(simpleUppercaseMappingString))
							{
								simpleUppercaseMapping = null;
							}
							else
							{
								simpleUppercaseMapping = uint.Parse(simpleUppercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
								if (simpleUppercaseMapping.Value == value)
								{
									simpleUppercaseMapping = null;
								}
							}
						}

						uint? simpleLowercaseMapping;
						{
							string simpleLowercaseMappingString = unicodeDataTokens[SimpleLowercaseMappingIndex];
							if (string.IsNullOrEmpty(simpleLowercaseMappingString))
							{
								simpleLowercaseMapping = null;
							}
							else
							{
								simpleLowercaseMapping = uint.Parse(simpleLowercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
								if (simpleLowercaseMapping.Value == value)
								{
									simpleLowercaseMapping = null;
								}
							}
						}

						uint? simpleTitlecaseMapping;
						{
							string simpleTitlecaseMappingString = unicodeDataTokens[SimpleTitlecaseMappingIndex];
							if (string.IsNullOrEmpty(simpleTitlecaseMappingString))
							{
								simpleTitlecaseMapping = null;
							}
							else
							{
								simpleTitlecaseMapping = uint.Parse(simpleTitlecaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
								if (simpleTitlecaseMapping.Value == value)
								{
									simpleTitlecaseMapping = null;
								}
							}
						}

						codePointsByValue[value] = new CodePoint(value, name, generalCategory, canonicalCombiningClass, bidiClass, bidiMirrored, unicode1Name, isoComment, simpleUppercaseMapping, simpleLowercaseMapping, simpleTitlecaseMapping);
					}
				}
			}
			#endregion // Process UnicodeData file

			#region Process SpecialCasing file
			{
				List<CodePoint> mappingCodePoints = new List<CodePoint>();

				string[] specialCasingLines = File.ReadAllLines(SpecialCasingFileName, Encoding.UTF8);
				for (int i = 0; i < specialCasingLines.Length; i++)
				{
					string specialCasingLine = specialCasingLines[i];
					if (!string.IsNullOrEmpty(specialCasingLine))
					{
						int commentStartIndex = specialCasingLine.IndexOf('#');
						if (commentStartIndex >= 0)
						{
							specialCasingLine = specialCasingLine.Remove(commentStartIndex).Trim(spaceArray);
							if (string.IsNullOrEmpty(specialCasingLine))
							{
								continue;
							}
						}

						string[] specialCasingTokens = specialCasingLine.Split(semicolonArray);
						Debug.Assert(specialCasingTokens.Length >= 4);

						const int ValueIndex = 0;
						const int LowerMappingIndex = 1;
						const int TitleMappingIndex = 2;
						const int UpperMappingIndex = 3;
						const int ConditionIndex = 4;

						uint value = uint.Parse(specialCasingTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

						if (specialCasingTokens.Length >= 5 && !string.IsNullOrEmpty(specialCasingTokens[ConditionIndex].Trim(spaceArray)))
						{
							// We don't want any conditional mappings
							continue;
						}

						ProcessTokenStringForCodePoints(value, specialCasingTokens[LowerMappingIndex], spaceArray, mappingCodePoints, lowercaseMappings);
						ProcessTokenStringForCodePoints(value, specialCasingTokens[TitleMappingIndex], spaceArray, mappingCodePoints, titlecaseMappings);
						ProcessTokenStringForCodePoints(value, specialCasingTokens[UpperMappingIndex], spaceArray, mappingCodePoints, uppercaseMappings);
					}
				}
			}
			#endregion // Process SpecialCasing file
		}
Ejemplo n.º 2
0
        public static void LoadUnicodeData()
        {
            const string UnicodeDataFileName   = "UnicodeData.txt";
            const string SpecialCasingFileName = "SpecialCasing.txt";

            GetFile(UnicodeDataFileName);

            GetFile(SpecialCasingFileName);

            SortedList <uint, CodePoint>   codePointsByValue = CodePoint.codePointsByValue;
            Dictionary <uint, CodePoint[]> uppercaseMappings = CodePoint.uppercaseMappings;
            Dictionary <uint, CodePoint[]> lowercaseMappings = CodePoint.lowercaseMappings;
            Dictionary <uint, CodePoint[]> titlecaseMappings = CodePoint.titlecaseMappings;

            char[] spaceArray     = new char[] { ' ' };
            char[] semicolonArray = new char[] { ';' };

            #region Process UnicodeData file
            {
                #region Lookup dictionaries
                Dictionary <string, GeneralCategory> generalCategoryLookup;
                {
                    GeneralCategory[] generalCategoryValues = (GeneralCategory[])Enum.GetValues(typeof(GeneralCategory));
                    generalCategoryLookup = new Dictionary <string, GeneralCategory>(generalCategoryValues.Length, StringComparer.Ordinal);
                    for (int i = 0; i < generalCategoryValues.Length; i++)
                    {
                        GeneralCategory generalCategoryValue = generalCategoryValues[i];
                        generalCategoryLookup[generalCategoryValue.ToString("G")] = generalCategoryValue;
                    }
                }
                Dictionary <string, BidiClass> bidiClassLookup;
                {
                    BidiClass[] bidiClassValues = (BidiClass[])Enum.GetValues(typeof(BidiClass));
                    bidiClassLookup = new Dictionary <string, BidiClass>(bidiClassValues.Length, StringComparer.Ordinal);
                    for (int i = 0; i < bidiClassValues.Length; i++)
                    {
                        BidiClass bidiClassValue = bidiClassValues[i];
                        bidiClassLookup[bidiClassValue.ToString("G")] = bidiClassValue;
                    }
                }
                #endregion                 // Lookup dictionaries

                string[] unicodeDataLines = File.ReadAllLines(UnicodeDataFileName, Encoding.UTF8);
                for (int i = 0; i < unicodeDataLines.Length; i++)
                {
                    string unicodeDataLine = unicodeDataLines[i];
                    if (!string.IsNullOrEmpty(unicodeDataLine) && unicodeDataLine[0] != '#')
                    {
                        string[] unicodeDataTokens = unicodeDataLine.Split(semicolonArray);
                        Debug.Assert(unicodeDataTokens.Length >= 15);

                        const int ValueIndex                   = 0;
                        const int NameIndex                    = 1;
                        const int GeneralCategoryIndex         = 2;
                        const int CanonicalCombiningClassIndex = 3;
                        const int BidiClassIndex               = 4;
                        // 5, 6, 7, 8 omitted for the moment
                        const int BidiMirroredIndex           = 9;
                        const int Unicode1NameIndex           = 10;
                        const int IsoCommentIndex             = 11;
                        const int SimpleUppercaseMappingIndex = 12;
                        const int SimpleLowercaseMappingIndex = 13;
                        const int SimpleTitlecaseMappingIndex = 14;

                        uint value = uint.Parse(unicodeDataTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

                        string name = unicodeDataTokens[NameIndex];
                        if (string.IsNullOrEmpty(name))
                        {
                            name = NameDefault;
                        }

                        GeneralCategory generalCategory;
                        {
                            string generalCategoryString = unicodeDataTokens[GeneralCategoryIndex];
                            if (string.IsNullOrEmpty(generalCategoryString))
                            {
                                generalCategory = GeneralCategory.Cn;
                            }
                            else
                            {
                                generalCategory = generalCategoryLookup[generalCategoryString];
                            }
                        }

                        byte canonicalCombiningClass;
                        {
                            string canonicalCombiningClassString = unicodeDataTokens[CanonicalCombiningClassIndex];
                            if (string.IsNullOrEmpty(canonicalCombiningClassString))
                            {
                                canonicalCombiningClass = 0;
                            }
                            else
                            {
                                canonicalCombiningClass = byte.Parse(canonicalCombiningClassString, NumberStyles.Integer, NumberFormatInfo.InvariantInfo);
                            }
                        }

                        BidiClass bidiClass;
                        {
                            string bidiClassString = unicodeDataTokens[BidiClassIndex];
                            if (string.IsNullOrEmpty(bidiClassString))
                            {
                                bidiClass = BidiClass.Invalid;
                            }
                            else
                            {
                                bidiClass = bidiClassLookup[bidiClassString];
                            }
                        }

                        // 5, 6, 7, 8 omitted for the moment

                        bool bidiMirrored = (unicodeDataTokens[BidiMirroredIndex] == "Y");

                        string unicode1Name = unicodeDataTokens[Unicode1NameIndex];
                        if (string.IsNullOrEmpty(unicode1Name))
                        {
                            unicode1Name = null;
                        }

                        string isoComment = unicodeDataTokens[IsoCommentIndex];
                        if (string.IsNullOrEmpty(isoComment))
                        {
                            isoComment = null;
                        }

                        uint?simpleUppercaseMapping;
                        {
                            string simpleUppercaseMappingString = unicodeDataTokens[SimpleUppercaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleUppercaseMappingString))
                            {
                                simpleUppercaseMapping = null;
                            }
                            else
                            {
                                simpleUppercaseMapping = uint.Parse(simpleUppercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleUppercaseMapping.Value == value)
                                {
                                    simpleUppercaseMapping = null;
                                }
                            }
                        }

                        uint?simpleLowercaseMapping;
                        {
                            string simpleLowercaseMappingString = unicodeDataTokens[SimpleLowercaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleLowercaseMappingString))
                            {
                                simpleLowercaseMapping = null;
                            }
                            else
                            {
                                simpleLowercaseMapping = uint.Parse(simpleLowercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleLowercaseMapping.Value == value)
                                {
                                    simpleLowercaseMapping = null;
                                }
                            }
                        }

                        uint?simpleTitlecaseMapping;
                        {
                            string simpleTitlecaseMappingString = unicodeDataTokens[SimpleTitlecaseMappingIndex];
                            if (string.IsNullOrEmpty(simpleTitlecaseMappingString))
                            {
                                simpleTitlecaseMapping = null;
                            }
                            else
                            {
                                simpleTitlecaseMapping = uint.Parse(simpleTitlecaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);
                                if (simpleTitlecaseMapping.Value == value)
                                {
                                    simpleTitlecaseMapping = null;
                                }
                            }
                        }

                        codePointsByValue[value] = new CodePoint(value, name, generalCategory, canonicalCombiningClass, bidiClass, bidiMirrored, unicode1Name, isoComment, simpleUppercaseMapping, simpleLowercaseMapping, simpleTitlecaseMapping);
                    }
                }
            }
            #endregion             // Process UnicodeData file

            #region Process SpecialCasing file
            {
                List <CodePoint> mappingCodePoints = new List <CodePoint>();

                string[] specialCasingLines = File.ReadAllLines(SpecialCasingFileName, Encoding.UTF8);
                for (int i = 0; i < specialCasingLines.Length; i++)
                {
                    string specialCasingLine = specialCasingLines[i];
                    if (!string.IsNullOrEmpty(specialCasingLine))
                    {
                        int commentStartIndex = specialCasingLine.IndexOf('#');
                        if (commentStartIndex >= 0)
                        {
                            specialCasingLine = specialCasingLine.Remove(commentStartIndex).Trim(spaceArray);
                            if (string.IsNullOrEmpty(specialCasingLine))
                            {
                                continue;
                            }
                        }

                        string[] specialCasingTokens = specialCasingLine.Split(semicolonArray);
                        Debug.Assert(specialCasingTokens.Length >= 4);

                        const int ValueIndex        = 0;
                        const int LowerMappingIndex = 1;
                        const int TitleMappingIndex = 2;
                        const int UpperMappingIndex = 3;
                        const int ConditionIndex    = 4;

                        uint value = uint.Parse(specialCasingTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo);

                        if (specialCasingTokens.Length >= 5 && !string.IsNullOrEmpty(specialCasingTokens[ConditionIndex].Trim(spaceArray)))
                        {
                            // We don't want any conditional mappings
                            continue;
                        }

                        ProcessTokenStringForCodePoints(value, specialCasingTokens[LowerMappingIndex], spaceArray, mappingCodePoints, lowercaseMappings);
                        ProcessTokenStringForCodePoints(value, specialCasingTokens[TitleMappingIndex], spaceArray, mappingCodePoints, titlecaseMappings);
                        ProcessTokenStringForCodePoints(value, specialCasingTokens[UpperMappingIndex], spaceArray, mappingCodePoints, uppercaseMappings);
                    }
                }
            }
            #endregion             // Process SpecialCasing file
        }
			public StringCaseMapping(uint codePointValue, CodePoint[] uppercaseMapping)
			{
				this.CodePointValue = codePointValue;
				this.UppercaseMappingValues = new uint[uppercaseMapping.Length];
				for (int i = 0; i < uppercaseMapping.Length; i++)
				{
					this.UppercaseMappingValues[i] = uppercaseMapping[i].Value;
				}
				this.UppercaseMappingValue = 0;
			}