コード例 #1
0
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt"));

            var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt"));

            foreach (var breakData in new [] { graphemeBreakData, emojiBreakData })
            {
                foreach (var(start, end, graphemeBreakType) in breakData)
                {
                    if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value))
                    {
                        continue;
                    }

                    if (start == end)
                    {
                        trieBuilder.Set(start, (uint)value);
                    }
                    else
                    {
                        trieBuilder.SetRange(start, end, (uint)value);
                    }
                }
            }

            return(trieBuilder.Freeze());
        }
コード例 #2
0
        /// <summary>
        /// Generates the UnicodeTrie for the LineBreak code point ranges.
        /// </summary>
        public static void GenerateLineBreakTrie()
        {
            var regex   = new Regex(@"^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*(.*?)\s*#");
            var builder = new UnicodeTrieBuilder((uint)LineBreakClass.XX);

            using (StreamReader sr = GetStreamReader("LineBreak.txt"))
            {
                string line;
                while ((line = sr.ReadLine()) != null)
                {
                    Match match = regex.Match(line);

                    if (match.Success)
                    {
                        var start = match.Groups[1].Value;
                        var end   = match.Groups[2].Value;
                        var point = match.Groups[3].Value;

                        if (end?.Length == 0)
                        {
                            end = start;
                        }

                        builder.SetRange(int.Parse(start, NumberStyles.HexNumber), int.Parse(end, NumberStyles.HexNumber), (uint)Enum.Parse <LineBreakClass>(point), true);
                    }
                }
            }

            UnicodeTrie trie = builder.Freeze();

            using FileStream stream = GetStreamWriter("LineBreak.trie");
            trie.Save(stream);
        }
コード例 #3
0
ファイル: TrieBuilderTest.cs プロジェクト: ywscr/RichTextKit
        public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues)
        {
            uint initialValue = testRanges[0].value;
            uint errorValue   = 0x0bad;

            var builder = new UnicodeTrieBuilder(initialValue, errorValue);

            for (int i = 1; i < testRanges.Length; i++)
            {
                var r = testRanges[i];
                builder.SetRange(r.start, r.end - 1, r.value, r.overwrite);
            }

            var frozen = builder.Freeze();


            int cp = 0;

            for (int i = 0; i < checkValues.Length; i++)
            {
                var v = checkValues[i];

                for (; cp < v.codePoint; cp++)
                {
                    Assert.Equal(v.value, builder.Get(cp));
                    Assert.Equal(v.value, frozen.Get(cp));
                }
            }
        }
コード例 #4
0
        public void RunRangeChecks(TestRange[] testRanges, CheckValue[] checkValues)
        {
            uint       initialValue = testRanges[0].Value;
            const uint errorValue   = 0x0bad;

            var builder = new UnicodeTrieBuilder(initialValue, errorValue);

            for (int i = 1; i < testRanges.Length; i++)
            {
                TestRange r = testRanges[i];
                builder.SetRange(r.Start, r.End - 1, r.Value, r.Overwrite);
            }

            UnicodeTrie frozen = builder.Freeze();

            int cp = 0;

            for (int i = 0; i < checkValues.Length; i++)
            {
                CheckValue v = checkValues[i];

                while (cp < v.CodePoint)
                {
                    Assert.Equal(v.Value, builder.Get(cp));
                    Assert.Equal(v.Value, frozen.Get(cp));
                    cp++;
                }
            }
        }
コード例 #5
0
        public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData)
        {
            biDiData = new Dictionary <int, BiDiDataItem>();

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiClassData = ReadBiDiData();

            foreach (var(range, name) in biDiClassData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(biDiData, range, biDiClass);
            }

            var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum();

            var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries);

            var biDiPairedBracketData = ReadBiDiPairedBracketData();

            foreach (var(range, name) in biDiPairedBracketData)
            {
                var bracketType = biDiPairedBracketTypeMappings[name];

                AddBiDiBracket(biDiData, range, bracketType);
            }

            var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in biDiData.Values)
            {
                //[bracket]|[bracketType]|[biDiClass]
                var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) |
                            (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket;

                biDiTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            biDiDataEntries = new BiDiDataEntries()
            {
                PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries
            };

            var trie = biDiTrieBuilder.Freeze();

            GenerateTrieClass("BiDi", trie);

            using (var stream = File.Create("Generated\\BiDi.trie"))
            {
                trie.Save(stream);

                return(trie);
            }
        }
コード例 #6
0
ファイル: TrieBuilderTest.cs プロジェクト: ywscr/RichTextKit
        public void SetCompacted()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.Set(0x4567, 99);

            var trie = builder.Freeze();

            Assert.Equal(10u, trie.Get(0x4566));
            Assert.Equal(99u, trie.Get(0x4567));
            Assert.Equal(666u, trie.Get(-1));
            Assert.Equal(666u, trie.Get(0x110000));
        }
コード例 #7
0
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)");

            var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList();

            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(
                "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt");

            foreach (var(start, end, graphemeBreakType) in graphemeBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt");

            foreach (var(start, end, graphemeBreakType) in emojiBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            return(trieBuilder.Freeze());
        }
コード例 #8
0
        public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData)
        {
            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings);

            var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in unicodeData.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            dataEntries = new UnicodeDataEntries
            {
                Scripts           = scriptEntries,
                GeneralCategories = generalCategoryEntries,
                LineBreakClasses  = lineBreakClassEntries
            };

            var trie = unicodeDataTrieBuilder.Freeze();

            GenerateTrieClass("UnicodeData", trie);

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                trie.Save(stream);

                return(trie);
            }
        }
コード例 #9
0
ファイル: TrieBuilderTest.cs プロジェクト: ywscr/RichTextKit
        public void SetRangeCompacted()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.SetRange(13, 6666, 7788, false);
            builder.SetRange(6000, 7000, 9900, true);

            var trie = builder.Freeze();

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }
コード例 #10
0
        public void SetRangeSerialized()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.SetRange(13, 6666, 7788, false);
            builder.SetRange(6000, 7000, 9900, true);

            using var ms = new MemoryStream();
            builder.Freeze().Save(ms);
            ms.Position = 0;

            var trie = new UnicodeTrie(ms);

            Assert.Equal(10u, trie.Get(12));
            Assert.Equal(7788u, trie.Get(13));
            Assert.Equal(7788u, trie.Get(5999));
            Assert.Equal(9900u, trie.Get(6000));
            Assert.Equal(9900u, trie.Get(7000));
            Assert.Equal(10u, trie.Get(7001));
            Assert.Equal(666u, trie.Get(0x110000));
        }
コード例 #11
0
        public static void Execute()
        {
            var codepoints = new Dictionary <int, UnicodeDataItem>();

            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var generalCategoryData = ReadGeneralCategoryData();

            foreach (var(range, name) in generalCategoryData)
            {
                var generalCategory = generalCategoryMappings[name];

                AddGeneralCategoryRange(codepoints, range, generalCategory);
            }

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var scriptData = ReadScriptData();

            foreach (var(range, name) in scriptData)
            {
                var script = scriptMappings[name];

                AddScriptRange(codepoints, range, script);
            }

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiData = ReadBiDiData();

            foreach (var(range, name) in biDiData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(codepoints, range, biDiClass);
            }

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            var lineBreakClassData = ReadLineBreakClassData();

            foreach (var(range, name) in lineBreakClassData)
            {
                var lineBreakClass = lineBreakClassMappings[name];

                AddLineBreakClassRange(codepoints, range, lineBreakClass);
            }

            //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) |
            //                          (0 << UnicodeData.BIDI_SHIFT) |
            //                          (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other;

            var builder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in codepoints.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.BiDiClass << UnicodeData.BIDI_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                builder.Set(properties.Codepoint, (uint)value);
            }

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                var trie = builder.Freeze();

                trie.Save(stream);
            }

            UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries,
                                                                 biDiClassEntries, lineBreakClassEntries);
        }