private static void ParseEmojiList() { var match_group = new Regex(@"^# group: (.*)"); var match_subgroup = new Regex(@"^# subgroup: (.*)"); var match_sequence = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; *([-a-z]*) *# [^ ]* (E[0-9.]* )?(.*)"); var match_skin_tone = new Regex($"({string.Join("|", SkinToneComponents.ToArray())})"); var match_hair_style = new Regex($"({string.Join("|", HairStyleComponents.ToArray())})"); var adult = "(👨|👩)(🏻|🏼|🏽|🏾|🏿)?"; var child = "(👦|👧|👶)(🏻|🏼|🏽|🏾|🏿)?"; var match_family = new Regex($"{adult}(\u200d{adult})*(\u200d{child})+"); var list = new List <Group>(); var text_lookup = new Dictionary <string, Emoji>(); var name_lookup = new Dictionary <string, Emoji>(); var alltext = new List <string>(); Group current_group = null; SubGroup current_subgroup = null; foreach (var line in EmojiDescriptionLines()) { var m = match_group.Match(line); if (m.Success) { current_group = new Group() { Name = m.Groups[1].ToString() }; list.Add(current_group); continue; } m = match_subgroup.Match(line); if (m.Success) { current_subgroup = new SubGroup() { Name = m.Groups[1].ToString(), Group = current_group }; current_group.SubGroups.Add(current_subgroup); continue; } m = match_sequence.Match(line); if (m.Success) { string sequence = m.Groups[1].ToString(); string name = m.Groups[4].ToString(); string text = string.Join("", from n in sequence.Split(' ') select char.ConvertFromUtf32(Convert.ToInt32(n, 16))); bool has_modifier = false; if (match_family.Match(text).Success) { // If this is a family emoji, no need to add it to our big matching // regex, since the match_family regex is already included. } else { // Construct a regex to replace e.g. "🏻" with "(🏻|🏼|🏽|🏾|🏿)" in a big // regex so that we can match all variations of this Emoji even if they are // not in the standard. bool has_nonfirst_modifier = false; var regex_text = match_skin_tone.Replace( match_hair_style.Replace(text, (x) => { has_modifier = true; has_nonfirst_modifier |= x.Value != HairStyleComponents[0]; return(match_hair_style.ToString()); }), (x) => { has_modifier = true; has_nonfirst_modifier |= x.Value != SkinToneComponents[0]; return(match_skin_tone.ToString()); }); if (!has_nonfirst_modifier) { alltext.Add(has_modifier ? regex_text : text); } } // Only add fully-qualified characters to the groups, or we will // end with a lot of dupes. if (line.Contains("unqualified") || line.Contains("minimally-qualified")) { // Skip this if there is already a fully qualified version if (text_lookup.ContainsKey(text + "\ufe0f")) { continue; } if (text_lookup.ContainsKey(text.Replace("\u20e3", "\ufe0f\u20e3"))) { continue; } } var emoji = new Emoji() { Name = name, Text = text, SubGroup = current_subgroup, Renderable = Typeface.CanRender(text), }; text_lookup[text] = emoji; name_lookup[name] = emoji; // Get the left part of the name and check whether we’re a variation of an existing // emoji. If so, append to that emoji. Otherwise, add to current subgroup. // FIXME: does not work properly because variations can appear before the generic emoji if (has_modifier && name_lookup.TryGetValue(name.Split(':')[0], out var parent_emoji)) { parent_emoji.VariationList.Add(emoji); } else { current_subgroup.EmojiList.Add(emoji); } } } // Remove empty groups, for instance the Components list.RemoveAll(g => g.EmojiCount == 0); AllGroups = list; Lookup = text_lookup; // Build a regex that matches any Emoji var sortedtext = alltext.OrderByDescending(x => x.Length); var regextext = "(" + match_family.ToString() + "|" + string.Join("|", sortedtext).Replace("*", "[*]") + ")"; MatchOne = new Regex(regextext); MatchMultiple = new Regex(regextext + "+"); }
private static void ParseEmojiList() { var modifiers_list = new string[] { "🏻", "🏼", "🏽", "🏾", "🏿" }; var modifiers_string = "(" + string.Join("|", modifiers_list) + ")"; var match_group = new Regex(@"^# group: (.*)"); var match_subgroup = new Regex(@"^# subgroup: (.*)"); var match_sequence = new Regex(@"^([0-9a-fA-F ]+[0-9a-fA-F]).*; (fully-|minimally-|un)qualified.*# [^ ]* (.*)"); var match_modifier = new Regex(modifiers_string); var list = new List <Group>(); var lookup = new Dictionary <string, Emoji>(); var alltext = new List <string>(); Group last_group = null; SubGroup last_subgroup = null; Emoji last_emoji = null; foreach (var line in EmojiDescriptionLines()) { var m = match_group.Match(line); if (m.Success) { last_group = new Group() { Name = m.Groups[1].ToString() }; list.Add(last_group); continue; } m = match_subgroup.Match(line); if (m.Success) { last_subgroup = new SubGroup() { Name = m.Groups[1].ToString(), Group = last_group }; last_group.SubGroups.Add(last_subgroup); continue; } m = match_sequence.Match(line); if (m.Success) { string sequence = m.Groups[1].ToString(); string name = m.Groups[3].ToString(); string text = ""; foreach (var item in sequence.Split(' ')) { int codepoint = Convert.ToInt32(item, 16); text += char.ConvertFromUtf32(codepoint); } // Only include emojis that we know how to render if (!Typeface.CanRender(text)) { continue; } bool has_modifier = false; bool has_high_modifier = false; var regex_text = match_modifier.Replace(text, (x) => { has_modifier = true; has_high_modifier |= x.Value != modifiers_list[0]; return(modifiers_string); }); if (!has_high_modifier) { alltext.Add(has_modifier ? regex_text : text); } // Only add fully-qualified characters to the groups, or we will // end with a lot of dupes. if (line.Contains("unqualified") || line.Contains("minimally-qualified")) { // Skip this if there is already a fully qualified version if (lookup.ContainsKey(text + "\ufe0f")) { continue; } if (lookup.ContainsKey(text.Replace("\u20e3", "\ufe0f\u20e3"))) { continue; } } var emoji = new Emoji() { Name = name, Text = text, SubGroup = last_subgroup }; lookup[text] = emoji; if (has_modifier) { // We assume this is a variation of the previous emoji if (last_emoji.VariationList.Count == 0) { last_emoji.VariationList.Add(last_emoji); } last_emoji.VariationList.Add(emoji); } else { last_emoji = emoji; last_subgroup.EmojiList.Add(emoji); } } } // Remove empty groups, for instance the Components for (int i = list.Count; --i > 0;) { if (list[i].EmojiCount == 0) { list.RemoveAt(i); } } AllGroups = list; Lookup = lookup; // Build a regex that matches any Emoji var textarray = alltext.ToArray(); Array.Sort(textarray, (a, b) => b.Length - a.Length); var regextext = "(" + string.Join("|", textarray).Replace("*", "[*]") + ")"; MatchOne = new Regex(regextext); MatchMultiple = new Regex(regextext + "+"); }