private static UnicodeTrie GenerateBreakTypeTrie() { var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)"); var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList(); var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData( "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt"); foreach (var(start, end, graphemeBreakType) in graphemeBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt"); foreach (var(start, end, graphemeBreakType) in emojiBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } return(trieBuilder.Freeze()); }
private static UnicodeTrie GenerateBreakTypeTrie() { var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt")); var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt")); foreach (var breakData in new [] { graphemeBreakData, emojiBreakData }) { foreach (var(start, end, graphemeBreakType) in breakData) { if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)value); } else { trieBuilder.SetRange(start, end, (uint)value); } } } return(trieBuilder.Freeze()); }
public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData) { biDiData = new Dictionary <int, BiDiDataItem>(); var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiClassData = ReadBiDiData(); foreach (var(range, name) in biDiClassData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(biDiData, range, biDiClass); } var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum(); var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries); var biDiPairedBracketData = ReadBiDiPairedBracketData(); foreach (var(range, name) in biDiPairedBracketData) { var bracketType = biDiPairedBracketTypeMappings[name]; AddBiDiBracket(biDiData, range, bracketType); } var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in biDiData.Values) { //[bracket]|[bracketType]|[biDiClass] var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) | (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket; biDiTrieBuilder.Set(properties.Codepoint, (uint)value); } biDiDataEntries = new BiDiDataEntries() { PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries }; var trie = biDiTrieBuilder.Freeze(); GenerateTrieClass("BiDi", trie); using (var stream = File.Create("Generated\\BiDi.trie")) { trie.Save(stream); return(trie); } }
public void Set() { var trie = new UnicodeTrieBuilder(10, 666); trie.Set(0x4567, 99); Assert.Equal(10u, trie.Get(0x4566)); Assert.Equal(99u, trie.Get(0x4567)); Assert.Equal(666u, trie.Get(-1)); Assert.Equal(666u, trie.Get(0x110000)); }
public void SetCompacted() { var builder = new UnicodeTrieBuilder(10, 666); builder.Set(0x4567, 99); var trie = builder.Freeze(); Assert.Equal(10u, trie.Get(0x4566)); Assert.Equal(99u, trie.Get(0x4567)); Assert.Equal(666u, trie.Get(-1)); Assert.Equal(666u, trie.Get(0x110000)); }
public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData) { var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings); var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in unicodeData.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value); } dataEntries = new UnicodeDataEntries { Scripts = scriptEntries, GeneralCategories = generalCategoryEntries, LineBreakClasses = lineBreakClassEntries }; var trie = unicodeDataTrieBuilder.Freeze(); GenerateTrieClass("UnicodeData", trie); using (var stream = File.Create("Generated\\UnicodeData.trie")) { trie.Save(stream); return(trie); } }
public static void Execute() { var codepoints = new Dictionary <int, UnicodeDataItem>(); var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var generalCategoryData = ReadGeneralCategoryData(); foreach (var(range, name) in generalCategoryData) { var generalCategory = generalCategoryMappings[name]; AddGeneralCategoryRange(codepoints, range, generalCategory); } var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var scriptData = ReadScriptData(); foreach (var(range, name) in scriptData) { var script = scriptMappings[name]; AddScriptRange(codepoints, range, script); } var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiData = ReadBiDiData(); foreach (var(range, name) in biDiData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(codepoints, range, biDiClass); } var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); var lineBreakClassData = ReadLineBreakClassData(); foreach (var(range, name) in lineBreakClassData) { var lineBreakClass = lineBreakClassMappings[name]; AddLineBreakClassRange(codepoints, range, lineBreakClass); } //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) | // (0 << UnicodeData.BIDI_SHIFT) | // (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other; var builder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in codepoints.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.BiDiClass << UnicodeData.BIDI_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; builder.Set(properties.Codepoint, (uint)value); } using (var stream = File.Create("Generated\\UnicodeData.trie")) { var trie = builder.Freeze(); trie.Save(stream); } UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries, biDiClassEntries, lineBreakClassEntries); }