/// <summary> /// return a stream of the statistics found when calculating type classification step. /// </summary> /// <param name="period_final_bigram_frequencies"></param> /// <param name="unigram_freqs"></param> /// <param name="N"></param> /// <param name="cw01"></param> /// <param name="cw1"></param> /// <param name="p"></param> /// <param name="p0"></param> /// <param name="p1"></param> /// <returns></returns> public static IEnumerable<Type_Classification> enumerate_type_based_classification( Frequencies<string> period_final_bigram_frequencies , Frequencies<string> unigram_freqs , double N , double cw1 , double p , double p0 , double p1 ) { foreach (var bigram in period_final_bigram_frequencies.Generate()) { var s = new Type_Classifier_Statistics(); s.c_w1 = cw1; s.p = p; s.p0 = p0; s.p1 = p1; s.w1 = "."; s.w01 = bigram.Key; // split off the ending period s.w0 = bigram.Key.Substring(0, bigram.Key.Length-1); s.c_w01 = bigram.Value; // find the unigram frequency of this term. s.c_w0 = unigram_freqs.Get(s.w0); s.llr = LikelihoodRatio.logLambda(N, s.c_w01, s.c_w0, s.c_w1, s.p, s.p0, s.p1); // Calculate the length penalty s.length_penalty = 1/Math.Exp(s.w0.Length - count_internal_periods(s.w0)); // Calculate the Internal periods penalty s.internal_periods_penalty = count_internal_periods(s.w0) + 1; // Calculate occurrances penalty without final period var len = s.w0.Length - count_internal_periods(s.w0); len = (len == 0) ? 1: len; s.with_final_period_penalty = 1/Math.Pow(len, (s.c_w0 - s.c_w01)); //if (s.with_final_period_penalty == 0) // Console.WriteLine(s.w0); // scaled log likelihood s.scaled_llr = s.llr * s.length_penalty * s.internal_periods_penalty * s.with_final_period_penalty; // calculate the final classifier for each sentence final bigram var c = new Type_Classification(); c.statistics = s; // annotate with the classification if (s.scaled_llr < 0.3) { c.classification = "<S>"; } else { if (s.w0.Length - count_internal_periods(s.w0) == 0) c.classification = "<E>"; else c.classification = "<A>"; } yield return c; } }
public void test_create_frequency_object_from_text() { var text = TextExamples.emma(); var freq = new Frequencies<string>(); foreach (var token in Regex.Split(text, @"(\W+)")) { freq.Add(token); } Assert.AreEqual(freq.Count(), 2227); Assert.AreEqual(freq.Get("and"), 47.0); Assert.AreEqual(freq.Terms().Count(), 479); }
public void test_add_to_frequencies() { var freq = new Frequencies<string>(); freq.Add("a"); Assert.AreEqual(1, freq.Get("a")); }