private static UnicodeTrie GenerateBreakTypeTrie() { var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)"); var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList(); var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData( "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt"); foreach (var(start, end, graphemeBreakType) in graphemeBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt"); foreach (var(start, end, graphemeBreakType) in emojiBreakData) { if (!graphemeBreakClassMapping.Contains(graphemeBreakType)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } else { trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType)); } } return(trieBuilder.Freeze()); }
public void SetRange() { var trie = new UnicodeTrieBuilder(10, 666); trie.SetRange(13, 6666, 7788, false); trie.SetRange(6000, 7000, 9900, true); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }
/// <summary> /// Generates the UnicodeTrie for the LineBreak code point ranges. /// </summary> public static void GenerateLineBreakTrie() { var regex = new Regex(@"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*(.*?)\s*#"); var builder = new UnicodeTrieBuilder((uint)LineBreakClass.XX); using (StreamReader sr = GetStreamReader("LineBreak.txt")) { string line; while ((line = sr.ReadLine()) != null) { Match match = regex.Match(line); if (match.Success) { var start = match.Groups[1].Value; var end = match.Groups[2].Value; var point = match.Groups[3].Value; if (end?.Length == 0) { end = start; } builder.SetRange(int.Parse(start, NumberStyles.HexNumber), int.Parse(end, NumberStyles.HexNumber), (uint)Enum.Parse <LineBreakClass>(point), true); } } } UnicodeTrie trie = builder.Freeze(); using FileStream stream = GetStreamWriter("LineBreak.trie"); trie.Save(stream); }
public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues) { uint initialValue = testRanges[0].value; uint errorValue = 0x0bad; var builder = new UnicodeTrieBuilder(initialValue, errorValue); for (int i = 1; i < testRanges.Length; i++) { var r = testRanges[i]; builder.SetRange(r.start, r.end - 1, r.value, r.overwrite); } var frozen = builder.Freeze(); int cp = 0; for (int i = 0; i < checkValues.Length; i++) { var v = checkValues[i]; for (; cp < v.codePoint; cp++) { Assert.Equal(v.value, builder.Get(cp)); Assert.Equal(v.value, frozen.Get(cp)); } } }
private static UnicodeTrie GenerateBreakTypeTrie() { var trieBuilder = new UnicodeTrieBuilder(); var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt")); var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt")); foreach (var breakData in new [] { graphemeBreakData, emojiBreakData }) { foreach (var(start, end, graphemeBreakType) in breakData) { if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value)) { continue; } if (start == end) { trieBuilder.Set(start, (uint)value); } else { trieBuilder.SetRange(start, end, (uint)value); } } } return(trieBuilder.Freeze()); }
public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues) { uint initialValue = testRanges[0].Value; const uint errorValue = 0x0bad; var builder = new UnicodeTrieBuilder(initialValue, errorValue); for (int i = 1; i < testRanges.Length; i++) { TestRange r = testRanges[i]; builder.SetRange(r.Start, r.End - 1, r.Value, r.Overwrite); } UnicodeTrie frozen = builder.Freeze(); int cp = 0; for (int i = 0; i < checkValues.Length; i++) { CheckValue v = checkValues[i]; while (cp < v.CodePoint) { Assert.Equal(v.Value, builder.Get(cp)); Assert.Equal(v.Value, frozen.Get(cp)); cp++; } } }
public void SetRangeCompacted() { var builder = new UnicodeTrieBuilder(10, 666); builder.SetRange(13, 6666, 7788, false); builder.SetRange(6000, 7000, 9900, true); UnicodeTrie trie = builder.Freeze(); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }
public void SetRangeSerialized() { var builder = new UnicodeTrieBuilder(10, 666); builder.SetRange(13, 6666, 7788, false); builder.SetRange(6000, 7000, 9900, true); var data = builder.ToBuffer(); var trie = new UnicodeTrie(data); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }
public void SetRangeSerialized() { var builder = new UnicodeTrieBuilder(10, 666); builder.SetRange(13, 6666, 7788, false); builder.SetRange(6000, 7000, 9900, true); using var ms = new MemoryStream(); builder.Freeze().Save(ms); ms.Position = 0; var trie = new UnicodeTrie(ms); Assert.Equal(10u, trie.Get(12)); Assert.Equal(7788u, trie.Get(13)); Assert.Equal(7788u, trie.Get(5999)); Assert.Equal(9900u, trie.Get(6000)); Assert.Equal(9900u, trie.Get(7000)); Assert.Equal(10u, trie.Get(7001)); Assert.Equal(666u, trie.Get(0x110000)); }