Пример #1
0
        static EmojiAccessor()
        {
            var assembly     = Assembly.GetAssembly(typeof(EmojiAccessor));
            var resourceName = "EmojiSharp.emoji.json";

            using (var stream = assembly.GetManifestResourceStream(resourceName))
                using (var reader = new StreamReader(stream))
                {
                    var result = reader.ReadToEnd();
                    Emojis             = JsonConvert.DeserializeObject <Emojis>(result);
                    GroupsAndSubGroups = new Dictionary <string, IList <string> >();

                    foreach (var g in Emojis.Groups)
                    {
                        GroupsAndSubGroups.Add(g.Name, g.SubGroups.Select(sg => sg.Name).ToList());
                    }
                }
        }
Пример #2
0
        static Emojis LoadEmojis(bool forceDownload = false)
        {
            var emojiList = new Emojis()
            {
                LastUpdate = DateTime.UtcNow
            };

            var url          = "https://unicode.org/emoji/charts/emoji-list.html";
            var htmlFilePath = Path.Combine(AppContext.BaseDirectory, "emoji.html");

            if (forceDownload || !File.Exists(htmlFilePath))
            {
                Console.WriteLine($"Downloading '{url}' (~25MB might take a while!)");
                var sw = new System.Diagnostics.Stopwatch();

                using (var client = new HttpClient())
                {
                    var rawHtml = client.GetStringAsync(url).Result;
                    File.WriteAllText(htmlFilePath, rawHtml);
                }

                sw.Stop();
                Console.WriteLine($"Document downloaded ({sw.ElapsedMilliseconds}ms), processing...");
            }

            var doc = new HtmlDocument();

            using (var fs = new FileStream(htmlFilePath, FileMode.Open, FileAccess.Read))
            {
                doc.Load(fs);
            }

            var header = doc.DocumentNode.SelectSingleNode("//h1");

            emojiList.Version = header?.InnerText ?? string.Empty;

            foreach (var row in doc.DocumentNode.SelectNodes("//tr"))
            {
                if (row.ChildNodes.Count == 1)
                {
                    var categoryNode = row.FirstChild;
                    var categoryName = HtmlEntity.DeEntitize(categoryNode.InnerText);
                    if (!string.IsNullOrEmpty(categoryName))
                    {
                        if (categoryNode.HasClass("bighead"))
                        {
                            Console.WriteLine($"#{categoryName}#");
                            emojiList.Groups.Add(new Group(categoryName));
                        }
                        else if (categoryNode.HasClass("mediumhead"))
                        {
                            Console.WriteLine($"  ##{categoryName}##");
                            emojiList.Groups
                            .Last()
                            .SubGroups
                            .Add(new SubGroup(categoryName));
                        }
                    }
                }
                else
                {
                    var emojiRow = row.SelectNodes("td");
                    if (emojiRow?.Count == 5)
                    {
                        var id          = int.Parse(emojiRow[0].InnerText);
                        var codes       = emojiRow[1].InnerText.Split(" ").Select(c => c.Remove(0, 2));
                        var imgNode     = emojiRow[2].SelectSingleNode("a/img");
                        var emoji       = imgNode.GetAttributeValue("alt", string.Empty);
                        var imageBase64 = imgNode.GetAttributeValue("src", string.Empty)
                                          .Replace("data:image/png;base64,", string.Empty);
                        var cldr     = HtmlEntity.DeEntitize(emojiRow[3].InnerText).TrimStart("⊛ ".ToArray());
                        var keywords = emojiRow[4].InnerText
                                       .Split(" | ")
                                       .Select(s => HtmlEntity.DeEntitize(s));
                        var isNew = emojiRow[3].InnerText.StartsWith("⊛");

                        var newEmoji = new Symbol()
                        {
                            Id          = id,
                            Codes       = codes.ToList(),
                            Emoji       = emoji,
                            ImageBase64 = imageBase64,
                            Cldr        = cldr,
                            Keywords    = keywords.ToList(),
                            IsNew       = isNew
                        };
                        Console.WriteLine($"    {newEmoji}");
                        emojiList.Groups.Last().SubGroups.Last().Emojis.Add(newEmoji);
                    }
                }
            }

            var serializerSettings = new JsonSerializerSettings();

            serializerSettings.ContractResolver = new CamelCasePropertyNamesContractResolver();

            var prettySerializerSettings = new JsonSerializerSettings();

            prettySerializerSettings.ContractResolver = new CamelCasePropertyNamesContractResolver();
            prettySerializerSettings.Formatting       = Formatting.Indented;

            var baseDirectory = Path.Combine(AppContext.BaseDirectory, "../../../../EmojiSharp/");

            File.WriteAllText(
                Path.Combine(baseDirectory, "emoji.json"),
                JsonConvert.SerializeObject(emojiList, serializerSettings));

            File.WriteAllText(
                Path.Combine(baseDirectory, "emoji.pretty.json"),
                JsonConvert.SerializeObject(emojiList, prettySerializerSettings));

            var moji = emojiList.Groups.Select(g =>
                                               string.Join(string.Empty,
                                                           g.SubGroups.SelectMany(sg => sg.Emojis.Select(e => e.Emoji))));

            File.WriteAllLines(
                Path.Combine(baseDirectory, "emoji.palette.txt"),
                moji);

            var emojiLookup = (from g in emojiList.Groups
                               from sg in g.SubGroups
                               from e in sg.Emojis
                               let groupId = emojiList.Groups.IndexOf(g)
                                             let emojiId = e.Id
                                                           select new { Emoji = e.Emoji, GroupId = groupId, EmojiId = emojiId });

            File.WriteAllLines(
                Path.Combine(baseDirectory, "emojiLookup.txt"),
                emojiLookup.Select(i => $"{{@\"{i.Emoji}\",({i.GroupId}, {i.EmojiId})}},"));

            var groupsAndIdRange = (from g in emojiList.Groups
                                    let ids = g.SubGroups.SelectMany(sg => sg.Emojis.Select(e => e.Id))
                                              let groupName = g.Name.Replace(" & ", "-").ToLower()
                                                              let minId = ids.Min()
                                                                          let maxId = ids.Max()
                                                                                      select new { GroupName = groupName, Range = $"{minId}:{maxId}" });

            File.WriteAllLines(
                Path.Combine(baseDirectory, "groups.txt"),
                groupsAndIdRange.Select(g => g.GroupName));

            File.WriteAllLines(
                Path.Combine(baseDirectory, "groupsWithRange.txt"),
                groupsAndIdRange.Select(g => $"{g.GroupName},{g.Range}"));

            File.WriteAllLines(
                Path.Combine(baseDirectory, "subGroups.txt"),
                emojiList.Groups.SelectMany(g => g.SubGroups.Select(sg => sg.Name)));

            return(emojiList);
        }
Пример #3
0
        async static Task CreateTableAndPopulate(Emojis emojisToLoad)
        {
            var table    = EmojiTable.Get();
            var imgTable = EmojiTable.GetImg();

            // Drop and recreate table
            await table.DeleteIfExistsAsync();

            await table.CreateIfNotExistsAsync();

            await imgTable.DeleteIfExistsAsync();

            await imgTable.CreateIfNotExistsAsync();

            //Entities
            var emojis = new List <EmojiEntity>();
            //Create the batch operation
            var batchOps    = new List <TableBatchOperation>();
            var imgBatchOps = new List <TableBatchOperation>();

            foreach (var g in emojisToLoad.Groups)
            {
                var batchOp    = new TableBatchOperation();
                var imgBatchOp = new TableBatchOperation();
                var groupId    = emojisToLoad.Groups.IndexOf(g).ToString(EmojiMetadata.IdFormat);

                foreach (var sg in g.SubGroups)
                {
                    foreach (var e in sg.Emojis)
                    {
                        var entity = new EmojiEntity(groupId, e.Id.ToString(EmojiMetadata.IdFormat))
                        {
                            Group    = g.Name,
                            SubGroup = sg.Name,
                            Code     = e.CodeString,
                            Emoji    = e.Emoji,
                            Cldr     = e.Cldr,
                            Keywords = e.Haystack,
                            IsNew    = e.IsNew
                        };
                        emojis.Add(entity);
                        batchOp.Insert(entity);

                        if (!string.IsNullOrWhiteSpace(e.ImageBase64))
                        {
                            var imgEntity = new EmojiImageEntity(groupId, e.Id.ToString(EmojiMetadata.IdFormat))
                            {
                                Emoji       = e.Emoji,
                                ImageBase64 = e.ImageBase64
                            };

                            imgBatchOp.Insert(imgEntity);
                        }

                        // Maximum operations in a batch
                        if (batchOp.Count == 100)
                        {
                            batchOps.Add(batchOp);
                            batchOp = new TableBatchOperation();

                            imgBatchOps.Add(imgBatchOp);
                            imgBatchOp = new TableBatchOperation();
                        }
                    }
                }

                // Batch can only contain operations in the same partition
                if (batchOp.Count > 0)
                {
                    batchOps.Add(batchOp);
                }
            }

            var sw             = new System.Diagnostics.Stopwatch();
            var processedCount = 0;

            foreach (var bo in batchOps)
            {
                sw.Reset();
                Console.WriteLine($"Inserting batch for group {bo.First().Entity.PartitionKey}, {bo.Count} entries");
                sw.Start();
                var result = await table.ExecuteBatchAsync(bo);

                processedCount += bo.Count;
                sw.Stop();
                Console.WriteLine($"  Insert complete: {sw.ElapsedMilliseconds}ms");
                Console.WriteLine($"  Processed: {processedCount} emojis");
            }

            processedCount = 0;
            Console.WriteLine($"###IMAGES###");
            foreach (var bo in imgBatchOps)
            {
                sw.Reset();
                Console.WriteLine($"Inserting image batch for group {bo.First().Entity.PartitionKey}, {bo.Count} entries");
                sw.Start();
                var result = await imgTable.ExecuteBatchAsync(bo);

                sw.Stop();
                processedCount += bo.Count;
                Console.WriteLine($"  Insert complete: {sw.ElapsedMilliseconds}ms");
                Console.WriteLine($"  Processed: {processedCount} emojis");
            }
        }