public void Should_Generate_Data() { if (!Directory.Exists("Generated")) { Directory.CreateDirectory("Generated"); } var unicodeDataTrie = UnicodeDataGenerator.GenerateUnicodeDataTrie(out var unicodeDataEntries, out var unicodeData); foreach (var value in unicodeData.Values) { var data = unicodeDataTrie.Get(value.Codepoint); Assert.Equal(value.GeneralCategory, GetValue(data, 0, UnicodeData.CATEGORY_MASK)); Assert.Equal(value.Script, GetValue(data, UnicodeData.SCRIPT_SHIFT, UnicodeData.SCRIPT_MASK)); Assert.Equal(value.LineBreakClass, GetValue(data, UnicodeData.LINEBREAK_SHIFT, UnicodeData.LINEBREAK_MASK)); } var biDiTrie = UnicodeDataGenerator.GenerateBiDiTrie(out var biDiDataEntries, out var biDiData); foreach (var value in biDiData.Values) { var data = biDiTrie.Get(value.Codepoint); Assert.Equal(value.Bracket, GetValue(data, 0, UnicodeData.BIDIPAIREDBRACKED_MASK)); Assert.Equal(value.BracketType, GetValue(data, UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT, UnicodeData.BIDIPAIREDBRACKEDTYPE_MASK)); Assert.Equal(value.BiDiClass, GetValue(data, UnicodeData.BIDICLASS_SHIFT, UnicodeData.BIDICLASS_MASK)); } UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(unicodeDataEntries, biDiDataEntries); }
public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData) { biDiData = new Dictionary <int, BiDiDataItem>(); var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiClassData = ReadBiDiData(); foreach (var(range, name) in biDiClassData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(biDiData, range, biDiClass); } var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum(); var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries); var biDiPairedBracketData = ReadBiDiPairedBracketData(); foreach (var(range, name) in biDiPairedBracketData) { var bracketType = biDiPairedBracketTypeMappings[name]; AddBiDiBracket(biDiData, range, bracketType); } var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in biDiData.Values) { //[bracket]|[bracketType]|[biDiClass] var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) | (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket; biDiTrieBuilder.Set(properties.Codepoint, (uint)value); } biDiDataEntries = new BiDiDataEntries() { PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries }; using (var stream = File.Create("Generated\\BiDi.trie")) { var trie = biDiTrieBuilder.Freeze(); trie.Save(stream); return(trie); } }
private static UnicodeTrie GenerateBreakTypeTrie() { var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)"); var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList(); var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData( "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt"); foreach (var(start, end, graphemeBreakType) in graphemeBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt"); foreach (var(start, end, graphemeBreakType) in emojiBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } return(trieBuilder.Freeze()); }
public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData) { var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings); var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in unicodeData.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value); } dataEntries = new UnicodeDataEntries { Scripts = scriptEntries, GeneralCategories = generalCategoryEntries, LineBreakClasses = lineBreakClassEntries }; using (var stream = File.Create("Generated\\UnicodeData.trie")) { var trie = unicodeDataTrieBuilder.Freeze(); trie.Save(stream); return(trie); } }
public static void Execute() { var codepoints = new Dictionary <int, UnicodeDataItem>(); var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var generalCategoryData = ReadGeneralCategoryData(); foreach (var(range, name) in generalCategoryData) { var generalCategory = generalCategoryMappings[name]; AddGeneralCategoryRange(codepoints, range, generalCategory); } var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var scriptData = ReadScriptData(); foreach (var(range, name) in scriptData) { var script = scriptMappings[name]; AddScriptRange(codepoints, range, script); } var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiData = ReadBiDiData(); foreach (var(range, name) in biDiData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(codepoints, range, biDiClass); } var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); var lineBreakClassData = ReadLineBreakClassData(); foreach (var(range, name) in lineBreakClassData) { var lineBreakClass = lineBreakClassMappings[name]; AddLineBreakClassRange(codepoints, range, lineBreakClass); } //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) | // (0 << UnicodeData.BIDI_SHIFT) | // (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other; var builder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in codepoints.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.BiDiClass << UnicodeData.BIDI_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; builder.Set(properties.Codepoint, (uint)value); } using (var stream = File.Create("Generated\\UnicodeData.trie")) { var trie = builder.Freeze(); trie.Save(stream); } UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries, biDiClassEntries, lineBreakClassEntries); }