public void Can_Parse_Emojis_From_Text()
        {
            var config = new ConfigurationBuilder()
                         .AddInMemoryCollection(new[]
            {
                new KeyValuePair <string, string>(
                    "EmojiData:Url", "./emoji.json")
            }).Build();

            var emojiParser = new EmojiParser(config);

            emojiParser.Initialize();

            var sampleTweetJson = File.ReadAllText("./sample_timeline.json");
            var sampleTweetObj  = JObject.Parse(sampleTweetJson);
            var sampleTweets    = sampleTweetObj["data"]?.Children();

            var actual = new List <IEnumerable <Emoji> >();

            foreach (var sampleTweet in sampleTweets)
            {
                var text         = sampleTweet.Value <string>("text");
                var actualResult = emojiParser.Parse(text);
                Assert.IsNotNull(actualResult);
                actual.Add(actualResult);
            }

            Assert.IsTrue(actual.ElementAt(0).Count() == 1);
            Assert.IsTrue(actual.ElementAt(1).Count() == 1);
            Assert.IsTrue(actual.ElementAt(2).Count() == 2); // Should be 3 as one appears twice...
            Assert.IsTrue(actual.ElementAt(3).Count() == 3);
        }
Exemple #2
0
        public static List <PlainTextSymbol> ParsePlainTextSymbols(string text, bool distinct = false)
        {
            var ptSyms = new List <PlainTextSymbol>();
            List <IndexRange> idxRangs;

            ptSyms.AddRange(_emojiParser.Parse(text, out idxRangs));
            ptSyms.AddRange(_goodsParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_emailParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_urlParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_ipParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_arabParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_chNumberParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_englishParser.Parse(text, idxRangs, out idxRangs));
            ptSyms.AddRange(_chineseParser.Parse(text, idxRangs, out idxRangs));
            if (distinct)
            {
                for (int i = 0; i < text.Length; i++)
                {
                    string ptext = text.Substring(i, 1).Trim();
                    if (ptext.Length == 1)
                    {
                        ptSyms.Add(new PlainTextSymbol(ptext, i, null));
                    }
                }
            }
            ptSyms.Sort((l, r) =>
            {
                int diffVal = l.StartIndex.CompareTo(r.StartIndex);
                if (diffVal == 0)
                {
                    diffVal = l.Length.CompareTo(r.Length);
                }
                return(diffVal);
            });
            if (distinct)
            {
                for (int j = 1; j < ptSyms.Count; j++)
                {
                    var pts1 = ptSyms[j - 1];
                    var pts2 = ptSyms[j];
                    if (pts1.StartIndex == pts2.StartIndex && pts1.Length == pts2.Length)
                    {
                        ptSyms.RemoveAt(j);
                        j--;
                    }
                }
            }
            return(ptSyms);
        }