Esempio n. 1
0
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)");

            var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList();

            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(
                "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt");

            foreach (var(start, end, graphemeBreakType) in graphemeBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt");

            foreach (var(start, end, graphemeBreakType) in emojiBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            return(trieBuilder.Freeze());
        }
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "auxiliary/GraphemeBreakProperty.txt"));

            var emojiBreakData = ReadBreakData(Path.Combine(UnicodeDataGenerator.Ucd, "emoji/emoji-data.txt"));

            foreach (var breakData in new [] { graphemeBreakData, emojiBreakData })
            {
                foreach (var(start, end, graphemeBreakType) in breakData)
                {
                    if (!Enum.TryParse <GraphemeBreakClass>(graphemeBreakType, out var value))
                    {
                        continue;
                    }

                    if (start == end)
                    {
                        trieBuilder.Set(start, (uint)value);
                    }
                    else
                    {
                        trieBuilder.SetRange(start, end, (uint)value);
                    }
                }
            }

            return(trieBuilder.Freeze());
        }
Esempio n. 3
0
        public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData)
        {
            biDiData = new Dictionary <int, BiDiDataItem>();

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiClassData = ReadBiDiData();

            foreach (var(range, name) in biDiClassData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(biDiData, range, biDiClass);
            }

            var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum();

            var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries);

            var biDiPairedBracketData = ReadBiDiPairedBracketData();

            foreach (var(range, name) in biDiPairedBracketData)
            {
                var bracketType = biDiPairedBracketTypeMappings[name];

                AddBiDiBracket(biDiData, range, bracketType);
            }

            var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in biDiData.Values)
            {
                //[bracket]|[bracketType]|[biDiClass]
                var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) |
                            (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket;

                biDiTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            biDiDataEntries = new BiDiDataEntries()
            {
                PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries
            };

            var trie = biDiTrieBuilder.Freeze();

            GenerateTrieClass("BiDi", trie);

            using (var stream = File.Create("Generated\\BiDi.trie"))
            {
                trie.Save(stream);

                return(trie);
            }
        }
Esempio n. 4
0
        public void Set()
        {
            var trie = new UnicodeTrieBuilder(10, 666);

            trie.Set(0x4567, 99);
            Assert.Equal(10u, trie.Get(0x4566));
            Assert.Equal(99u, trie.Get(0x4567));
            Assert.Equal(666u, trie.Get(-1));
            Assert.Equal(666u, trie.Get(0x110000));
        }
Esempio n. 5
0
        public void SetCompacted()
        {
            var builder = new UnicodeTrieBuilder(10, 666);

            builder.Set(0x4567, 99);

            var trie = builder.Freeze();

            Assert.Equal(10u, trie.Get(0x4566));
            Assert.Equal(99u, trie.Get(0x4567));
            Assert.Equal(666u, trie.Get(-1));
            Assert.Equal(666u, trie.Get(0x110000));
        }
Esempio n. 6
0
        public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData)
        {
            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings);

            var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in unicodeData.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            dataEntries = new UnicodeDataEntries
            {
                Scripts           = scriptEntries,
                GeneralCategories = generalCategoryEntries,
                LineBreakClasses  = lineBreakClassEntries
            };

            var trie = unicodeDataTrieBuilder.Freeze();

            GenerateTrieClass("UnicodeData", trie);

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                trie.Save(stream);

                return(trie);
            }
        }
        public static void Execute()
        {
            var codepoints = new Dictionary <int, UnicodeDataItem>();

            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var generalCategoryData = ReadGeneralCategoryData();

            foreach (var(range, name) in generalCategoryData)
            {
                var generalCategory = generalCategoryMappings[name];

                AddGeneralCategoryRange(codepoints, range, generalCategory);
            }

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var scriptData = ReadScriptData();

            foreach (var(range, name) in scriptData)
            {
                var script = scriptMappings[name];

                AddScriptRange(codepoints, range, script);
            }

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiData = ReadBiDiData();

            foreach (var(range, name) in biDiData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(codepoints, range, biDiClass);
            }

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            var lineBreakClassData = ReadLineBreakClassData();

            foreach (var(range, name) in lineBreakClassData)
            {
                var lineBreakClass = lineBreakClassMappings[name];

                AddLineBreakClassRange(codepoints, range, lineBreakClass);
            }

            //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) |
            //                          (0 << UnicodeData.BIDI_SHIFT) |
            //                          (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other;

            var builder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in codepoints.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.BiDiClass << UnicodeData.BIDI_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                builder.Set(properties.Codepoint, (uint)value);
            }

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                var trie = builder.Freeze();

                trie.Save(stream);
            }

            UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries,
                                                                 biDiClassEntries, lineBreakClassEntries);
        }