Beispiel #1
0
        private static void ParseEmojiList()
        {
            var match_group      = new Regex(@"^# group: (.*)");
            var match_subgroup   = new Regex(@"^# subgroup: (.*)");
            var match_sequence   = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; *([-a-z]*) *# [^ ]* (E[0-9.]* )?(.*)");
            var match_skin_tone  = new Regex($"({string.Join("|", SkinToneComponents.ToArray())})");
            var match_hair_style = new Regex($"({string.Join("|", HairStyleComponents.ToArray())})");

            var adult        = "(👨|👩)(🏻|🏼|🏽|🏾|🏿)?";
            var child        = "(👦|👧|👶)(🏻|🏼|🏽|🏾|🏿)?";
            var match_family = new Regex($"{adult}(\u200d{adult})*(\u200d{child})+");

            var list        = new List <Group>();
            var text_lookup = new Dictionary <string, Emoji>();
            var name_lookup = new Dictionary <string, Emoji>();
            var alltext     = new List <string>();

            Group    current_group    = null;
            SubGroup current_subgroup = null;

            foreach (var line in EmojiDescriptionLines())
            {
                var m = match_group.Match(line);
                if (m.Success)
                {
                    current_group = new Group()
                    {
                        Name = m.Groups[1].ToString()
                    };
                    list.Add(current_group);
                    continue;
                }

                m = match_subgroup.Match(line);
                if (m.Success)
                {
                    current_subgroup = new SubGroup()
                    {
                        Name = m.Groups[1].ToString(), Group = current_group
                    };
                    current_group.SubGroups.Add(current_subgroup);
                    continue;
                }

                m = match_sequence.Match(line);
                if (m.Success)
                {
                    string sequence = m.Groups[1].ToString();
                    string name     = m.Groups[4].ToString();

                    string text = string.Join("", from n in sequence.Split(' ')
                                              select char.ConvertFromUtf32(Convert.ToInt32(n, 16)));
                    bool has_modifier = false;

                    if (match_family.Match(text).Success)
                    {
                        // If this is a family emoji, no need to add it to our big matching
                        // regex, since the match_family regex is already included.
                    }
                    else
                    {
                        // Construct a regex to replace e.g. "🏻" with "(🏻|🏼|🏽|🏾|🏿)" in a big
                        // regex so that we can match all variations of this Emoji even if they are
                        // not in the standard.
                        bool has_nonfirst_modifier = false;
                        var  regex_text            = match_skin_tone.Replace(
                            match_hair_style.Replace(text, (x) =>
                        {
                            has_modifier           = true;
                            has_nonfirst_modifier |= x.Value != HairStyleComponents[0];
                            return(match_hair_style.ToString());
                        }), (x) =>
                        {
                            has_modifier           = true;
                            has_nonfirst_modifier |= x.Value != SkinToneComponents[0];
                            return(match_skin_tone.ToString());
                        });

                        if (!has_nonfirst_modifier)
                        {
                            alltext.Add(has_modifier ? regex_text : text);
                        }
                    }

                    // Only add fully-qualified characters to the groups, or we will
                    // end with a lot of dupes.
                    if (line.Contains("unqualified") || line.Contains("minimally-qualified"))
                    {
                        // Skip this if there is already a fully qualified version
                        if (text_lookup.ContainsKey(text + "\ufe0f"))
                        {
                            continue;
                        }
                        if (text_lookup.ContainsKey(text.Replace("\u20e3", "\ufe0f\u20e3")))
                        {
                            continue;
                        }
                    }

                    var emoji = new Emoji()
                    {
                        Name       = name,
                        Text       = text,
                        SubGroup   = current_subgroup,
                        Renderable = Typeface.CanRender(text),
                    };
                    text_lookup[text] = emoji;
                    name_lookup[name] = emoji;

                    // Get the left part of the name and check whether we’re a variation of an existing
                    // emoji. If so, append to that emoji. Otherwise, add to current subgroup.
                    // FIXME: does not work properly because variations can appear before the generic emoji
                    if (has_modifier && name_lookup.TryGetValue(name.Split(':')[0], out var parent_emoji))
                    {
                        parent_emoji.VariationList.Add(emoji);
                    }
                    else
                    {
                        current_subgroup.EmojiList.Add(emoji);
                    }
                }
            }

            // Remove empty groups, for instance the Components
            list.RemoveAll(g => g.EmojiCount == 0);

            AllGroups = list;
            Lookup    = text_lookup;

            // Build a regex that matches any Emoji
            var sortedtext = alltext.OrderByDescending(x => x.Length);
            var regextext  = "(" + match_family.ToString() + "|" + string.Join("|", sortedtext).Replace("*", "[*]") + ")";

            MatchOne      = new Regex(regextext);
            MatchMultiple = new Regex(regextext + "+");
        }
Beispiel #2
0
        private static void ParseEmojiList()
        {
            var modifiers_list   = new string[] { "🏻", "🏼", "🏽", "🏾", "🏿" };
            var modifiers_string = "(" + string.Join("|", modifiers_list) + ")";

            var match_group    = new Regex(@"^# group: (.*)");
            var match_subgroup = new Regex(@"^# subgroup: (.*)");
            var match_sequence = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; (fully-|minimally-|un)qualified.*# [^ ]* (.*)");
            var match_modifier = new Regex(modifiers_string);
            var list           = new List <Group>();
            var lookup         = new Dictionary <string, Emoji>();
            var alltext        = new List <string>();

            Group    last_group    = null;
            SubGroup last_subgroup = null;
            Emoji    last_emoji    = null;

            foreach (var line in EmojiDescriptionLines())
            {
                var m = match_group.Match(line);
                if (m.Success)
                {
                    last_group = new Group()
                    {
                        Name = m.Groups[1].ToString()
                    };
                    list.Add(last_group);
                    continue;
                }

                m = match_subgroup.Match(line);
                if (m.Success)
                {
                    last_subgroup = new SubGroup()
                    {
                        Name = m.Groups[1].ToString(), Group = last_group
                    };
                    last_group.SubGroups.Add(last_subgroup);
                    continue;
                }

                m = match_sequence.Match(line);
                if (m.Success)
                {
                    string sequence = m.Groups[1].ToString();
                    string name     = m.Groups[3].ToString();

                    string text = "";
                    foreach (var item in sequence.Split(' '))
                    {
                        int codepoint = Convert.ToInt32(item, 16);
                        text += char.ConvertFromUtf32(codepoint);
                    }

                    // Only include emojis that we know how to render
                    if (!Typeface.CanRender(text))
                    {
                        continue;
                    }

                    bool has_modifier      = false;
                    bool has_high_modifier = false;
                    var  regex_text        = match_modifier.Replace(text, (x) =>
                    {
                        has_modifier       = true;
                        has_high_modifier |= x.Value != modifiers_list[0];
                        return(modifiers_string);
                    });

                    if (!has_high_modifier)
                    {
                        alltext.Add(has_modifier ? regex_text : text);
                    }

                    // Only add fully-qualified characters to the groups, or we will
                    // end with a lot of dupes.
                    if (line.Contains("unqualified") || line.Contains("minimally-qualified"))
                    {
                        // Skip this if there is already a fully qualified version
                        if (lookup.ContainsKey(text + "\ufe0f"))
                        {
                            continue;
                        }
                        if (lookup.ContainsKey(text.Replace("\u20e3", "\ufe0f\u20e3")))
                        {
                            continue;
                        }
                    }

                    var emoji = new Emoji()
                    {
                        Name = name, Text = text, SubGroup = last_subgroup
                    };
                    lookup[text] = emoji;
                    if (has_modifier)
                    {
                        // We assume this is a variation of the previous emoji
                        if (last_emoji.VariationList.Count == 0)
                        {
                            last_emoji.VariationList.Add(last_emoji);
                        }
                        last_emoji.VariationList.Add(emoji);
                    }
                    else
                    {
                        last_emoji = emoji;
                        last_subgroup.EmojiList.Add(emoji);
                    }
                }
            }

            // Remove empty groups, for instance the Components
            for (int i = list.Count; --i > 0;)
            {
                if (list[i].EmojiCount == 0)
                {
                    list.RemoveAt(i);
                }
            }

            AllGroups = list;
            Lookup    = lookup;

            // Build a regex that matches any Emoji
            var textarray = alltext.ToArray();

            Array.Sort(textarray, (a, b) => b.Length - a.Length);
            var regextext = "(" + string.Join("|", textarray).Replace("*", "[*]") + ")";

            MatchOne      = new Regex(regextext);
            MatchMultiple = new Regex(regextext + "+");
        }