public void Can_Parse_Emojis_From_Text() { var config = new ConfigurationBuilder() .AddInMemoryCollection(new[] { new KeyValuePair <string, string>( "EmojiData:Url", "./emoji.json") }).Build(); var emojiParser = new EmojiParser(config); emojiParser.Initialize(); var sampleTweetJson = File.ReadAllText("./sample_timeline.json"); var sampleTweetObj = JObject.Parse(sampleTweetJson); var sampleTweets = sampleTweetObj["data"]?.Children(); var actual = new List <IEnumerable <Emoji> >(); foreach (var sampleTweet in sampleTweets) { var text = sampleTweet.Value <string>("text"); var actualResult = emojiParser.Parse(text); Assert.IsNotNull(actualResult); actual.Add(actualResult); } Assert.IsTrue(actual.ElementAt(0).Count() == 1); Assert.IsTrue(actual.ElementAt(1).Count() == 1); Assert.IsTrue(actual.ElementAt(2).Count() == 2); // Should be 3 as one appears twice... Assert.IsTrue(actual.ElementAt(3).Count() == 3); }
public static List <PlainTextSymbol> ParsePlainTextSymbols(string text, bool distinct = false) { var ptSyms = new List <PlainTextSymbol>(); List <IndexRange> idxRangs; ptSyms.AddRange(_emojiParser.Parse(text, out idxRangs)); ptSyms.AddRange(_goodsParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_emailParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_urlParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_ipParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_arabParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_chNumberParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_englishParser.Parse(text, idxRangs, out idxRangs)); ptSyms.AddRange(_chineseParser.Parse(text, idxRangs, out idxRangs)); if (distinct) { for (int i = 0; i < text.Length; i++) { string ptext = text.Substring(i, 1).Trim(); if (ptext.Length == 1) { ptSyms.Add(new PlainTextSymbol(ptext, i, null)); } } } ptSyms.Sort((l, r) => { int diffVal = l.StartIndex.CompareTo(r.StartIndex); if (diffVal == 0) { diffVal = l.Length.CompareTo(r.Length); } return(diffVal); }); if (distinct) { for (int j = 1; j < ptSyms.Count; j++) { var pts1 = ptSyms[j - 1]; var pts2 = ptSyms[j]; if (pts1.StartIndex == pts2.StartIndex && pts1.Length == pts2.Length) { ptSyms.RemoveAt(j); j--; } } } return(ptSyms); }