Example #1
0
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)");

            var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList();

            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(
                "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt");

            foreach (var(start, end, graphemeBreakType) in graphemeBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt");

            foreach (var(start, end, graphemeBreakType) in emojiBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            return(trieBuilder.Freeze());
        }
Example #2
0
        public void SetRange()
        {
            var trie = new UnicodeTrieBuilder(10, 666);

            trie.SetRange(13, 6666, 7788, false);
            trie.SetRange(6000, 7000, 9900, true);

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }
Example #3
0
        /// <summary>
        /// Generates the UnicodeTrie for the LineBreak code point ranges.
        /// </summary>
        public static void GenerateLineBreakTrie()
        {
            var regex   = new Regex(@"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*(.*?)\s*#");
            var builder = new UnicodeTrieBuilder((uint)LineBreakClass.XX);

            using (StreamReader sr = GetStreamReader("LineBreak.txt"))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    Match match = regex.Match(line);

                    if (match.Success)
                    {
                        var start = match.Groups[1].Value;
                        var end   = match.Groups[2].Value;
                        var point = match.Groups[3].Value;

                        if (end?.Length == 0)
                        {
                            end = start;
                        }

                        builder.SetRange(int.Parse(start, NumberStyles.HexNumber), int.Parse(end, NumberStyles.HexNumber), (uint)Enum.Parse <LineBreakClass>(point), true);
                    }
                }
            }

            UnicodeTrie trie = builder.Freeze();

            using FileStream stream = GetStreamWriter("LineBreak.trie");
            trie.Save(stream);
        }
Example #4
0
        public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues)
        {
            uint initialValue = testRanges[0].value;
            uint errorValue   = 0x0bad;

            var builder = new UnicodeTrieBuilder(initialValue, errorValue);

            for (int i = 1; i < testRanges.Length; i++)
            {
                var r = testRanges[i];
                builder.SetRange(r.start, r.end - 1, r.value, r.overwrite);
            }

            var frozen = builder.Freeze();


            int cp = 0;

            for (int i = 0; i < checkValues.Length; i++)
            {
                var v = checkValues[i];

                for (; cp < v.codePoint; cp++)
                {
                    Assert.Equal(v.value, builder.Get(cp));
                    Assert.Equal(v.value, frozen.Get(cp));
                }
            }
        }
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt"));

            var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt"));

            foreach (var breakData in new [] { graphemeBreakData, emojiBreakData })
            {
                foreach (var(start, end, graphemeBreakType) in breakData)
                {
                    if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value))
                    {
                        continue;
                    }

                    if (start == end)
                    {
                        trieBuilder.Set(start, (uint)value);
                    }
                    else
                    {
                        trieBuilder.SetRange(start, end, (uint)value);
                    }
                }
            }

            return(trieBuilder.Freeze());
        }
Example #6
0
        public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues)
        {
            uint       initialValue = testRanges[0].Value;
            const uint errorValue   = 0x0bad;

            var builder = new UnicodeTrieBuilder(initialValue, errorValue);

            for (int i = 1; i < testRanges.Length; i++)
            {
                TestRange r = testRanges[i];
                builder.SetRange(r.Start, r.End - 1, r.Value, r.Overwrite);
            }

            UnicodeTrie frozen = builder.Freeze();

            int cp = 0;

            for (int i = 0; i < checkValues.Length; i++)
            {
                CheckValue v = checkValues[i];

                while (cp < v.CodePoint)
                {
                    Assert.Equal(v.Value, builder.Get(cp));
                    Assert.Equal(v.Value, frozen.Get(cp));
                    cp++;
                }
            }
        }
Example #7
0
        public void SetRangeCompacted()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.SetRange(13, 6666, 7788, false);
            builder.SetRange(6000, 7000, 9900, true);

            UnicodeTrie trie = builder.Freeze();

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }
Example #8
0
        public void SetRangeSerialized()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.SetRange(13, 6666, 7788, false);
            builder.SetRange(6000, 7000, 9900, true);

            var data = builder.ToBuffer();
            var trie = new UnicodeTrie(data);

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }
Example #9
0
        public void SetRangeSerialized()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.SetRange(13, 6666, 7788, false);
            builder.SetRange(6000, 7000, 9900, true);

            using var ms = new MemoryStream();
            builder.Freeze().Save(ms);
            ms.Position = 0;

            var trie = new UnicodeTrie(ms);

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }