示例#1
0
        public void Should_Generate_Data()
        {
            if (!Directory.Exists("Generated"))
            {
                Directory.CreateDirectory("Generated");
            }

            var unicodeDataTrie = UnicodeDataGenerator.GenerateUnicodeDataTrie(out var unicodeDataEntries, out var unicodeData);

            foreach (var value in unicodeData.Values)
            {
                var data = unicodeDataTrie.Get(value.Codepoint);

                Assert.Equal(value.GeneralCategory, GetValue(data, 0, UnicodeData.CATEGORY_MASK));

                Assert.Equal(value.Script, GetValue(data, UnicodeData.SCRIPT_SHIFT, UnicodeData.SCRIPT_MASK));

                Assert.Equal(value.LineBreakClass, GetValue(data, UnicodeData.LINEBREAK_SHIFT, UnicodeData.LINEBREAK_MASK));
            }

            var biDiTrie = UnicodeDataGenerator.GenerateBiDiTrie(out var biDiDataEntries, out var biDiData);

            foreach (var value in biDiData.Values)
            {
                var data = biDiTrie.Get(value.Codepoint);

                Assert.Equal(value.Bracket, GetValue(data, 0, UnicodeData.BIDIPAIREDBRACKED_MASK));

                Assert.Equal(value.BracketType, GetValue(data, UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT, UnicodeData.BIDIPAIREDBRACKEDTYPE_MASK));

                Assert.Equal(value.BiDiClass, GetValue(data, UnicodeData.BIDICLASS_SHIFT, UnicodeData.BIDICLASS_MASK));
            }

            UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(unicodeDataEntries, biDiDataEntries);
        }
示例#2
0
        public static UnicodeTrie GenerateBiDiTrie(out BiDiDataEntries biDiDataEntries, out Dictionary <int, BiDiDataItem> biDiData)
        {
            biDiData = new Dictionary <int, BiDiDataItem>();

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiClassData = ReadBiDiData();

            foreach (var(range, name) in biDiClassData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(biDiData, range, biDiClass);
            }

            var biDiPairedBracketTypeEntries = UnicodeEnumsGenerator.CreateBiDiPairedBracketTypeEnum();

            var biDiPairedBracketTypeMappings = CreateTagToIndexMappings(biDiPairedBracketTypeEntries);

            var biDiPairedBracketData = ReadBiDiPairedBracketData();

            foreach (var(range, name) in biDiPairedBracketData)
            {
                var bracketType = biDiPairedBracketTypeMappings[name];

                AddBiDiBracket(biDiData, range, bracketType);
            }

            var biDiTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in biDiData.Values)
            {
                //[bracket]|[bracketType]|[biDiClass]
                var value = (properties.BiDiClass << UnicodeData.BIDICLASS_SHIFT) |
                            (properties.BracketType << UnicodeData.BIDIPAIREDBRACKEDTYPE_SHIFT) | properties.Bracket;

                biDiTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            biDiDataEntries = new BiDiDataEntries()
            {
                PairedBracketTypes = biDiPairedBracketTypeEntries, BiDiClasses = biDiClassEntries
            };

            using (var stream = File.Create("Generated\\BiDi.trie"))
            {
                var trie = biDiTrieBuilder.Freeze();

                trie.Save(stream);

                return(trie);
            }
        }
示例#3
0
        private static UnicodeTrie GenerateBreakTypeTrie()
        {
            var graphemeBreakClassValues = UnicodeEnumsGenerator.GetPropertyValueAliases("# Grapheme_Cluster_Break (GCB)");

            var graphemeBreakClassMapping = graphemeBreakClassValues.Select(x => x.name).ToList();

            var trieBuilder = new UnicodeTrieBuilder();

            var graphemeBreakData = ReadBreakData(
                "https://www.unicode.org/Public/UCD/latest/ucd/auxiliary/GraphemeBreakProperty.txt");

            foreach (var(start, end, graphemeBreakType) in graphemeBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            var emojiBreakData = ReadBreakData("https://unicode.org/Public/emoji/12.0/emoji-data.txt");

            foreach (var(start, end, graphemeBreakType) in emojiBreakData)
            {
                if (!graphemeBreakClassMapping.Contains(graphemeBreakType))
                {
                    continue;
                }

                if (start == end)
                {
                    trieBuilder.Set(start, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
                else
                {
                    trieBuilder.SetRange(start, end, (uint)graphemeBreakClassMapping.IndexOf(graphemeBreakType));
                }
            }

            return(trieBuilder.Freeze());
        }
示例#4
0
        public static UnicodeTrie GenerateUnicodeDataTrie(out UnicodeDataEntries dataEntries, out Dictionary <int, UnicodeDataItem> unicodeData)
        {
            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            unicodeData = GetUnicodeData(generalCategoryMappings, scriptMappings, lineBreakClassMappings);

            var unicodeDataTrieBuilder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in unicodeData.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                unicodeDataTrieBuilder.Set(properties.Codepoint, (uint)value);
            }

            dataEntries = new UnicodeDataEntries
            {
                Scripts           = scriptEntries,
                GeneralCategories = generalCategoryEntries,
                LineBreakClasses  = lineBreakClassEntries
            };

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                var trie = unicodeDataTrieBuilder.Freeze();

                trie.Save(stream);

                return(trie);
            }
        }
        public static void Execute()
        {
            var codepoints = new Dictionary <int, UnicodeDataItem>();

            var generalCategoryEntries =
                UnicodeEnumsGenerator.CreateGeneralCategoryEnum();

            var generalCategoryMappings = CreateTagToIndexMappings(generalCategoryEntries);

            var generalCategoryData = ReadGeneralCategoryData();

            foreach (var(range, name) in generalCategoryData)
            {
                var generalCategory = generalCategoryMappings[name];

                AddGeneralCategoryRange(codepoints, range, generalCategory);
            }

            var scriptEntries = UnicodeEnumsGenerator.CreateScriptEnum();

            var scriptMappings = CreateNameToIndexMappings(scriptEntries);

            var scriptData = ReadScriptData();

            foreach (var(range, name) in scriptData)
            {
                var script = scriptMappings[name];

                AddScriptRange(codepoints, range, script);
            }

            var biDiClassEntries =
                UnicodeEnumsGenerator.CreateBiDiClassEnum();

            var biDiClassMappings = CreateTagToIndexMappings(biDiClassEntries);

            var biDiData = ReadBiDiData();

            foreach (var(range, name) in biDiData)
            {
                var biDiClass = biDiClassMappings[name];

                AddBiDiClassRange(codepoints, range, biDiClass);
            }

            var lineBreakClassEntries =
                UnicodeEnumsGenerator.CreateLineBreakClassEnum();

            var lineBreakClassMappings = CreateTagToIndexMappings(lineBreakClassEntries);

            var lineBreakClassData = ReadLineBreakClassData();

            foreach (var(range, name) in lineBreakClassData)
            {
                var lineBreakClass = lineBreakClassMappings[name];

                AddLineBreakClassRange(codepoints, range, lineBreakClass);
            }

            //const int initialValue = (0 << UnicodeData.LINEBREAK_SHIFT) |
            //                          (0 << UnicodeData.BIDI_SHIFT) |
            //                          (0 << UnicodeData.SCRIPT_SHIFT) | (int)GeneralCategory.Other;

            var builder = new UnicodeTrieBuilder(/*initialValue*/);

            foreach (var properties in codepoints.Values)
            {
                //[line break]|[biDi]|[script]|[category]
                var value = (properties.LineBreakClass << UnicodeData.LINEBREAK_SHIFT) |
                            (properties.BiDiClass << UnicodeData.BIDI_SHIFT) |
                            (properties.Script << UnicodeData.SCRIPT_SHIFT) | properties.GeneralCategory;

                builder.Set(properties.Codepoint, (uint)value);
            }

            using (var stream = File.Create("Generated\\UnicodeData.trie"))
            {
                var trie = builder.Freeze();

                trie.Save(stream);
            }

            UnicodeEnumsGenerator.CreatePropertyValueAliasHelper(scriptEntries, generalCategoryEntries,
                                                                 biDiClassEntries, lineBreakClassEntries);
        }