public void test_terms() { // add some values to the freq var freq = new Frequencies<string>(); freq.Add("a"); freq.Add("b"); // check for equality IEnumerable<string> check = new List<string>() { "a", "b" }; Assert.AreEqual(check, freq.Terms()); }
public void test_create_frequency_object_from_text() { var text = TextExamples.emma(); var freq = new Frequencies<string>(); foreach (var token in Regex.Split(text, @"(\W+)")) { freq.Add(token); } Assert.AreEqual(freq.Count(), 2227); Assert.AreEqual(freq.Get("and"), 47.0); Assert.AreEqual(freq.Terms().Count(), 479); }
private static void frequencies_of_ngrams_in_emma_sample() { var text = TextExamples.emma(); var freq = new Frequencies<string>(); foreach (var token in Regex.Split(text, @"(\W+)").Where((x) => x != ", " && TextTools.not_whitespace.IsMatch(x)).NGram(3)) { freq.Add(token.Aggregate((a, b) => a + " " + b)); } foreach (var term in freq.Generate().OrderBy(p => p.Value).Reverse().Take(10)) { Console.WriteLine(String.Format(@"{0}: {1}", term.Key, term.Value)); } Console.WriteLine(freq.Count()); Console.WriteLine(freq.Terms().Count()); }