public static void LoadUnicodeData() { const string UnicodeDataFileName = "UnicodeData.txt"; const string SpecialCasingFileName = "SpecialCasing.txt"; GetFile(UnicodeDataFileName); GetFile(SpecialCasingFileName); SortedList<uint, CodePoint> codePointsByValue = CodePoint.codePointsByValue; Dictionary<uint, CodePoint[]> uppercaseMappings = CodePoint.uppercaseMappings; Dictionary<uint, CodePoint[]> lowercaseMappings = CodePoint.lowercaseMappings; Dictionary<uint, CodePoint[]> titlecaseMappings = CodePoint.titlecaseMappings; char[] spaceArray = new char[] { ' ' }; char[] semicolonArray = new char[] { ';' }; #region Process UnicodeData file { #region Lookup dictionaries Dictionary<string, GeneralCategory> generalCategoryLookup; { GeneralCategory[] generalCategoryValues = (GeneralCategory[])Enum.GetValues(typeof(GeneralCategory)); generalCategoryLookup = new Dictionary<string, GeneralCategory>(generalCategoryValues.Length, StringComparer.Ordinal); for (int i = 0; i < generalCategoryValues.Length; i++) { GeneralCategory generalCategoryValue = generalCategoryValues[i]; generalCategoryLookup[generalCategoryValue.ToString("G")] = generalCategoryValue; } } Dictionary<string, BidiClass> bidiClassLookup; { BidiClass[] bidiClassValues = (BidiClass[])Enum.GetValues(typeof(BidiClass)); bidiClassLookup = new Dictionary<string, BidiClass>(bidiClassValues.Length, StringComparer.Ordinal); for (int i = 0; i < bidiClassValues.Length; i++) { BidiClass bidiClassValue = bidiClassValues[i]; bidiClassLookup[bidiClassValue.ToString("G")] = bidiClassValue; } } #endregion // Lookup dictionaries string[] unicodeDataLines = File.ReadAllLines(UnicodeDataFileName, Encoding.UTF8); for (int i = 0; i < unicodeDataLines.Length; i++) { string unicodeDataLine = unicodeDataLines[i]; if (!string.IsNullOrEmpty(unicodeDataLine) && unicodeDataLine[0] != '#') { string[] unicodeDataTokens = unicodeDataLine.Split(semicolonArray); Debug.Assert(unicodeDataTokens.Length >= 15); const int ValueIndex = 0; const int NameIndex = 1; const int GeneralCategoryIndex = 2; const int CanonicalCombiningClassIndex = 3; const int BidiClassIndex = 4; // 5, 6, 7, 8 omitted for the moment const int BidiMirroredIndex = 9; const int Unicode1NameIndex = 10; const int IsoCommentIndex = 11; const int SimpleUppercaseMappingIndex = 12; const int SimpleLowercaseMappingIndex = 13; const int SimpleTitlecaseMappingIndex = 14; uint value = uint.Parse(unicodeDataTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); string name = unicodeDataTokens[NameIndex]; if (string.IsNullOrEmpty(name)) { name = NameDefault; } GeneralCategory generalCategory; { string generalCategoryString = unicodeDataTokens[GeneralCategoryIndex]; if (string.IsNullOrEmpty(generalCategoryString)) { generalCategory = GeneralCategory.Cn; } else { generalCategory = generalCategoryLookup[generalCategoryString]; } } byte canonicalCombiningClass; { string canonicalCombiningClassString = unicodeDataTokens[CanonicalCombiningClassIndex]; if (string.IsNullOrEmpty(canonicalCombiningClassString)) { canonicalCombiningClass = 0; } else { canonicalCombiningClass = byte.Parse(canonicalCombiningClassString, NumberStyles.Integer, NumberFormatInfo.InvariantInfo); } } BidiClass bidiClass; { string bidiClassString = unicodeDataTokens[BidiClassIndex]; if (string.IsNullOrEmpty(bidiClassString)) { bidiClass = BidiClass.Invalid; } else { bidiClass = bidiClassLookup[bidiClassString]; } } // 5, 6, 7, 8 omitted for the moment bool bidiMirrored = (unicodeDataTokens[BidiMirroredIndex] == "Y"); string unicode1Name = unicodeDataTokens[Unicode1NameIndex]; if (string.IsNullOrEmpty(unicode1Name)) { unicode1Name = null; } string isoComment = unicodeDataTokens[IsoCommentIndex]; if (string.IsNullOrEmpty(isoComment)) { isoComment = null; } uint? simpleUppercaseMapping; { string simpleUppercaseMappingString = unicodeDataTokens[SimpleUppercaseMappingIndex]; if (string.IsNullOrEmpty(simpleUppercaseMappingString)) { simpleUppercaseMapping = null; } else { simpleUppercaseMapping = uint.Parse(simpleUppercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleUppercaseMapping.Value == value) { simpleUppercaseMapping = null; } } } uint? simpleLowercaseMapping; { string simpleLowercaseMappingString = unicodeDataTokens[SimpleLowercaseMappingIndex]; if (string.IsNullOrEmpty(simpleLowercaseMappingString)) { simpleLowercaseMapping = null; } else { simpleLowercaseMapping = uint.Parse(simpleLowercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleLowercaseMapping.Value == value) { simpleLowercaseMapping = null; } } } uint? simpleTitlecaseMapping; { string simpleTitlecaseMappingString = unicodeDataTokens[SimpleTitlecaseMappingIndex]; if (string.IsNullOrEmpty(simpleTitlecaseMappingString)) { simpleTitlecaseMapping = null; } else { simpleTitlecaseMapping = uint.Parse(simpleTitlecaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleTitlecaseMapping.Value == value) { simpleTitlecaseMapping = null; } } } codePointsByValue[value] = new CodePoint(value, name, generalCategory, canonicalCombiningClass, bidiClass, bidiMirrored, unicode1Name, isoComment, simpleUppercaseMapping, simpleLowercaseMapping, simpleTitlecaseMapping); } } } #endregion // Process UnicodeData file #region Process SpecialCasing file { List<CodePoint> mappingCodePoints = new List<CodePoint>(); string[] specialCasingLines = File.ReadAllLines(SpecialCasingFileName, Encoding.UTF8); for (int i = 0; i < specialCasingLines.Length; i++) { string specialCasingLine = specialCasingLines[i]; if (!string.IsNullOrEmpty(specialCasingLine)) { int commentStartIndex = specialCasingLine.IndexOf('#'); if (commentStartIndex >= 0) { specialCasingLine = specialCasingLine.Remove(commentStartIndex).Trim(spaceArray); if (string.IsNullOrEmpty(specialCasingLine)) { continue; } } string[] specialCasingTokens = specialCasingLine.Split(semicolonArray); Debug.Assert(specialCasingTokens.Length >= 4); const int ValueIndex = 0; const int LowerMappingIndex = 1; const int TitleMappingIndex = 2; const int UpperMappingIndex = 3; const int ConditionIndex = 4; uint value = uint.Parse(specialCasingTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (specialCasingTokens.Length >= 5 && !string.IsNullOrEmpty(specialCasingTokens[ConditionIndex].Trim(spaceArray))) { // We don't want any conditional mappings continue; } ProcessTokenStringForCodePoints(value, specialCasingTokens[LowerMappingIndex], spaceArray, mappingCodePoints, lowercaseMappings); ProcessTokenStringForCodePoints(value, specialCasingTokens[TitleMappingIndex], spaceArray, mappingCodePoints, titlecaseMappings); ProcessTokenStringForCodePoints(value, specialCasingTokens[UpperMappingIndex], spaceArray, mappingCodePoints, uppercaseMappings); } } } #endregion // Process SpecialCasing file }
public static void LoadUnicodeData() { const string UnicodeDataFileName = "UnicodeData.txt"; const string SpecialCasingFileName = "SpecialCasing.txt"; GetFile(UnicodeDataFileName); GetFile(SpecialCasingFileName); SortedList <uint, CodePoint> codePointsByValue = CodePoint.codePointsByValue; Dictionary <uint, CodePoint[]> uppercaseMappings = CodePoint.uppercaseMappings; Dictionary <uint, CodePoint[]> lowercaseMappings = CodePoint.lowercaseMappings; Dictionary <uint, CodePoint[]> titlecaseMappings = CodePoint.titlecaseMappings; char[] spaceArray = new char[] { ' ' }; char[] semicolonArray = new char[] { ';' }; #region Process UnicodeData file { #region Lookup dictionaries Dictionary <string, GeneralCategory> generalCategoryLookup; { GeneralCategory[] generalCategoryValues = (GeneralCategory[])Enum.GetValues(typeof(GeneralCategory)); generalCategoryLookup = new Dictionary <string, GeneralCategory>(generalCategoryValues.Length, StringComparer.Ordinal); for (int i = 0; i < generalCategoryValues.Length; i++) { GeneralCategory generalCategoryValue = generalCategoryValues[i]; generalCategoryLookup[generalCategoryValue.ToString("G")] = generalCategoryValue; } } Dictionary <string, BidiClass> bidiClassLookup; { BidiClass[] bidiClassValues = (BidiClass[])Enum.GetValues(typeof(BidiClass)); bidiClassLookup = new Dictionary <string, BidiClass>(bidiClassValues.Length, StringComparer.Ordinal); for (int i = 0; i < bidiClassValues.Length; i++) { BidiClass bidiClassValue = bidiClassValues[i]; bidiClassLookup[bidiClassValue.ToString("G")] = bidiClassValue; } } #endregion // Lookup dictionaries string[] unicodeDataLines = File.ReadAllLines(UnicodeDataFileName, Encoding.UTF8); for (int i = 0; i < unicodeDataLines.Length; i++) { string unicodeDataLine = unicodeDataLines[i]; if (!string.IsNullOrEmpty(unicodeDataLine) && unicodeDataLine[0] != '#') { string[] unicodeDataTokens = unicodeDataLine.Split(semicolonArray); Debug.Assert(unicodeDataTokens.Length >= 15); const int ValueIndex = 0; const int NameIndex = 1; const int GeneralCategoryIndex = 2; const int CanonicalCombiningClassIndex = 3; const int BidiClassIndex = 4; // 5, 6, 7, 8 omitted for the moment const int BidiMirroredIndex = 9; const int Unicode1NameIndex = 10; const int IsoCommentIndex = 11; const int SimpleUppercaseMappingIndex = 12; const int SimpleLowercaseMappingIndex = 13; const int SimpleTitlecaseMappingIndex = 14; uint value = uint.Parse(unicodeDataTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); string name = unicodeDataTokens[NameIndex]; if (string.IsNullOrEmpty(name)) { name = NameDefault; } GeneralCategory generalCategory; { string generalCategoryString = unicodeDataTokens[GeneralCategoryIndex]; if (string.IsNullOrEmpty(generalCategoryString)) { generalCategory = GeneralCategory.Cn; } else { generalCategory = generalCategoryLookup[generalCategoryString]; } } byte canonicalCombiningClass; { string canonicalCombiningClassString = unicodeDataTokens[CanonicalCombiningClassIndex]; if (string.IsNullOrEmpty(canonicalCombiningClassString)) { canonicalCombiningClass = 0; } else { canonicalCombiningClass = byte.Parse(canonicalCombiningClassString, NumberStyles.Integer, NumberFormatInfo.InvariantInfo); } } BidiClass bidiClass; { string bidiClassString = unicodeDataTokens[BidiClassIndex]; if (string.IsNullOrEmpty(bidiClassString)) { bidiClass = BidiClass.Invalid; } else { bidiClass = bidiClassLookup[bidiClassString]; } } // 5, 6, 7, 8 omitted for the moment bool bidiMirrored = (unicodeDataTokens[BidiMirroredIndex] == "Y"); string unicode1Name = unicodeDataTokens[Unicode1NameIndex]; if (string.IsNullOrEmpty(unicode1Name)) { unicode1Name = null; } string isoComment = unicodeDataTokens[IsoCommentIndex]; if (string.IsNullOrEmpty(isoComment)) { isoComment = null; } uint?simpleUppercaseMapping; { string simpleUppercaseMappingString = unicodeDataTokens[SimpleUppercaseMappingIndex]; if (string.IsNullOrEmpty(simpleUppercaseMappingString)) { simpleUppercaseMapping = null; } else { simpleUppercaseMapping = uint.Parse(simpleUppercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleUppercaseMapping.Value == value) { simpleUppercaseMapping = null; } } } uint?simpleLowercaseMapping; { string simpleLowercaseMappingString = unicodeDataTokens[SimpleLowercaseMappingIndex]; if (string.IsNullOrEmpty(simpleLowercaseMappingString)) { simpleLowercaseMapping = null; } else { simpleLowercaseMapping = uint.Parse(simpleLowercaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleLowercaseMapping.Value == value) { simpleLowercaseMapping = null; } } } uint?simpleTitlecaseMapping; { string simpleTitlecaseMappingString = unicodeDataTokens[SimpleTitlecaseMappingIndex]; if (string.IsNullOrEmpty(simpleTitlecaseMappingString)) { simpleTitlecaseMapping = null; } else { simpleTitlecaseMapping = uint.Parse(simpleTitlecaseMappingString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (simpleTitlecaseMapping.Value == value) { simpleTitlecaseMapping = null; } } } codePointsByValue[value] = new CodePoint(value, name, generalCategory, canonicalCombiningClass, bidiClass, bidiMirrored, unicode1Name, isoComment, simpleUppercaseMapping, simpleLowercaseMapping, simpleTitlecaseMapping); } } } #endregion // Process UnicodeData file #region Process SpecialCasing file { List <CodePoint> mappingCodePoints = new List <CodePoint>(); string[] specialCasingLines = File.ReadAllLines(SpecialCasingFileName, Encoding.UTF8); for (int i = 0; i < specialCasingLines.Length; i++) { string specialCasingLine = specialCasingLines[i]; if (!string.IsNullOrEmpty(specialCasingLine)) { int commentStartIndex = specialCasingLine.IndexOf('#'); if (commentStartIndex >= 0) { specialCasingLine = specialCasingLine.Remove(commentStartIndex).Trim(spaceArray); if (string.IsNullOrEmpty(specialCasingLine)) { continue; } } string[] specialCasingTokens = specialCasingLine.Split(semicolonArray); Debug.Assert(specialCasingTokens.Length >= 4); const int ValueIndex = 0; const int LowerMappingIndex = 1; const int TitleMappingIndex = 2; const int UpperMappingIndex = 3; const int ConditionIndex = 4; uint value = uint.Parse(specialCasingTokens[ValueIndex], NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); if (specialCasingTokens.Length >= 5 && !string.IsNullOrEmpty(specialCasingTokens[ConditionIndex].Trim(spaceArray))) { // We don't want any conditional mappings continue; } ProcessTokenStringForCodePoints(value, specialCasingTokens[LowerMappingIndex], spaceArray, mappingCodePoints, lowercaseMappings); ProcessTokenStringForCodePoints(value, specialCasingTokens[TitleMappingIndex], spaceArray, mappingCodePoints, titlecaseMappings); ProcessTokenStringForCodePoints(value, specialCasingTokens[UpperMappingIndex], spaceArray, mappingCodePoints, uppercaseMappings); } } } #endregion // Process SpecialCasing file }
public StringCaseMapping(uint codePointValue, CodePoint[] uppercaseMapping) { this.CodePointValue = codePointValue; this.UppercaseMappingValues = new uint[uppercaseMapping.Length]; for (int i = 0; i < uppercaseMapping.Length; i++) { this.UppercaseMappingValues[i] = uppercaseMapping[i].Value; } this.UppercaseMappingValue = 0; }