private static UnicodeTrie GenerateBreakTypeTrie() { var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt")); var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt")); foreach (var breakData in new [] { graphemeBreakData, emojiBreakData }) { foreach (var(start, end, graphemeBreakType) in breakData) { if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)value); } else { trieBuilder.SetRange(start, end, (uint)value); } } } return(trieBuilder.Freeze()); }
/// <summary> /// Generates the UnicodeTrie for the LineBreak code point ranges. /// </summary> public static void GenerateLineBreakTrie() { var regex = new Regex(@"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*(.*?)\s*#"); var builder = new UnicodeTrieBuilder((uint)LineBreakClass.XX); using (StreamReader sr = GetStreamReader("LineBreak.txt")) { string line; while ((line = sr.ReadLine()) != null) { Match match = regex.Match(line); if (match.Success) { var start = match.Groups[1].Value; var end = match.Groups[2].Value; var point = match.Groups[3].Value; if (end?.Length == 0) { end = start; } builder.SetRange(int.Parse(start, NumberStyles.HexNumber), int.Parse(end, NumberStyles.HexNumber), (uint)Enum.Parse <LineBreakClass>(point), true); } } } UnicodeTrie trie = builder.Freeze(); using FileStream stream = GetStreamWriter("LineBreak.trie"); trie.Save(stream); }
public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues) { uint initialValue = testRanges[0].value; uint errorValue = 0x0bad; var builder = new UnicodeTrieBuilder(initialValue, errorValue); for (int i = 1; i < testRanges.Length; i++) { var r = testRanges[i]; builder.SetRange(r.start, r.end - 1, r.value, r.overwrite); } var frozen = builder.Freeze(); int cp = 0; for (int i = 0; i < checkValues.Length; i++) { var v = checkValues[i]; for (; cp < v.codePoint; cp++) { Assert.Equal(v.value, builder.Get(cp)); Assert.Equal(v.value, frozen.Get(cp)); } } }
public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues) { uint initialValue = testRanges[0].Value; const uint errorValue = 0x0bad; var builder = new UnicodeTrieBuilder(initialValue, errorValue); for (int i = 1; i < testRanges.Length; i++) { TestRange r = testRanges[i]; builder.SetRange(r.Start, r.End - 1, r.Value, r.Overwrite); } UnicodeTrie frozen = builder.Freeze(); int cp = 0; for (int i = 0; i < checkValues.Length; i++) { CheckValue v = checkValues[i]; while (cp < v.CodePoint) { Assert.Equal(v.Value, builder.Get(cp)); Assert.Equal(v.Value, frozen.Get(cp)); cp++; } } }
public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData) { biDiData = new Dictionary <int, BiDiDataItem>(); var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiClassData = ReadBiDiData(); foreach (var(range, name) in biDiClassData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(biDiData, range, biDiClass); } var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum(); var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries); var biDiPairedBracketData = ReadBiDiPairedBracketData(); foreach (var(range, name) in biDiPairedBracketData) { var bracketType = biDiPairedBracketTypeMappings[name]; AddBiDiBracket(biDiData, range, bracketType); } var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in biDiData.Values) { //[bracket]|[bracketType]|[biDiClass] var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) | (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket; biDiTrieBuilder.Set(properties.Codepoint, (uint)value); } biDiDataEntries = new BiDiDataEntries() { PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries }; var trie = biDiTrieBuilder.Freeze(); GenerateTrieClass("BiDi", trie); using (var stream = File.Create("Generated\\BiDi.trie")) { trie.Save(stream); return(trie); } }
public void SetCompacted() { var builder = new UnicodeTrieBuilder(10, 666); builder.Set(0x4567, 99); var trie = builder.Freeze(); Assert.Equal(10u, trie.Get(0x4566)); Assert.Equal(99u, trie.Get(0x4567)); Assert.Equal(666u, trie.Get(-1)); Assert.Equal(666u, trie.Get(0x110000)); }
private static UnicodeTrie GenerateBreakTypeTrie() { var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)"); var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList(); var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData( "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt"); foreach (var(start, end, graphemeBreakType) in graphemeBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt"); foreach (var(start, end, graphemeBreakType) in emojiBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } return(trieBuilder.Freeze()); }
public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData) { var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings); var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in unicodeData.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value); } dataEntries = new UnicodeDataEntries { Scripts = scriptEntries, GeneralCategories = generalCategoryEntries, LineBreakClasses = lineBreakClassEntries }; var trie = unicodeDataTrieBuilder.Freeze(); GenerateTrieClass("UnicodeData", trie); using (var stream = File.Create("Generated\\UnicodeData.trie")) { trie.Save(stream); return(trie); } }
public void SetRangeCompacted() { var builder = new UnicodeTrieBuilder(10, 666); builder.SetRange(13, 6666, 7788, false); builder.SetRange(6000, 7000, 9900, true); var trie = builder.Freeze(); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }
public void SetRangeSerialized() { var builder = new UnicodeTrieBuilder(10, 666); builder.SetRange(13, 6666, 7788, false); builder.SetRange(6000, 7000, 9900, true); using var ms = new MemoryStream(); builder.Freeze().Save(ms); ms.Position = 0; var trie = new UnicodeTrie(ms); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }
public static void Execute() { var codepoints = new Dictionary <int, UnicodeDataItem>(); var generalCategoryEntries = UnicodeEnumsGenerator.CreateGeneralCategoryEnum(); var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries); var generalCategoryData = ReadGeneralCategoryData(); foreach (var(range, name) in generalCategoryData) { var generalCategory = generalCategoryMappings[name]; AddGeneralCategoryRange(codepoints, range, generalCategory); } var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum(); var scriptMappings = CreateNameToIndexMappings(scriptEntries); var scriptData = ReadScriptData(); foreach (var(range, name) in scriptData) { var script = scriptMappings[name]; AddScriptRange(codepoints, range, script); } var biDiClassEntries = UnicodeEnumsGenerator.CreateBiDiClassEnum(); var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries); var biDiData = ReadBiDiData(); foreach (var(range, name) in biDiData) { var biDiClass = biDiClassMappings[name]; AddBiDiClassRange(codepoints, range, biDiClass); } var lineBreakClassEntries = UnicodeEnumsGenerator.CreateLineBreakClassEnum(); var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries); var lineBreakClassData = ReadLineBreakClassData(); foreach (var(range, name) in lineBreakClassData) { var lineBreakClass = lineBreakClassMappings[name]; AddLineBreakClassRange(codepoints, range, lineBreakClass); } //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) | // (0 << UnicodeData.BIDI_SHIFT) | // (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other; var builder = new UnicodeTrieBuilder(/*initialValue*/); foreach (var properties in codepoints.Values) { //[line break]|[biDi]|[script]|[category] var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) | (properties.BiDiClass << UnicodeData.BIDI_SHIFT) | (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory; builder.Set(properties.Codepoint, (uint)value); } using (var stream = File.Create("Generated\\UnicodeData.trie")) { var trie = builder.Freeze(); trie.Save(stream); } UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries, biDiClassEntries, lineBreakClassEntries); }