Exemplo n.º 1
0
 private static void get_instances_of_Mr_from_austen()
 {
     var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\Austen");
       var austen = new TextCorpusReader(path);
       var freq = new Frequencies<string>();
       var i = 0;
       foreach (var w in austen.words().NGram(2))
       {
     var term = w.First().Trim().Append(w.Last()).Trim();
     //if (term.Length > 1 && term.Substring(0, 2) == "Mr" && term.Substring(2, 1) != "." && term.Substring(2, 1) != "s")
     if (term.Length > 1 && term.Substring(0, 2) == "Mr")
     {
       freq.Add(term);
     }
       }
       foreach (var t in freq.Generate())
       {
     Console.WriteLine("{0} => {1}", t.Key, t.Value);
       }
 }
Exemplo n.º 2
0
        private static void get_punkt_statistics_from_treebank3()
        {
            //var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\Treebank-3");
              //var treebank = new Treebank3CorpusReader(path);
              var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\Austen");
              var treebank = new TextCorpusReader(path);
              var stats = Punkt.process(treebank.words().Where(a=> a.Length <=7 || a.Substring(0,7)!="Speaker"));

              using (var writer = new StreamWriter("statistics.log"))
              {
            foreach (var stat in stats.type_classifier_results
                .OrderBy(a => a.statistics.scaled_llr)
                )
            {// TODO: output statistics to a log for review.
              writer.WriteLine(@"{0}, {1}, {2}, {3} {4} {5} {6} {7}", stat.statistics.scaled_llr
              , stat.statistics.c_w0, stat.statistics.c_w1, stat.statistics.c_w01, stat.statistics.length_penalty
              , stat.statistics.with_final_period_penalty, stat.statistics.w0, stat.classification);
            //          Console.WriteLine(@"{0}, c(w,.)={1},  c(w,~.)={2} -> {3}", stat.statistics.scaled_llr, stat.statistics.c_w01, stat.statistics.c_w0 - stat.statistics.c_w01, stat.statistics.w01);
            }
              }
        }
Exemplo n.º 3
0
 private static void read_raw_text_from_washingtons_first_inagural_address()
 {
     var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\inaugural");
       var inaugural = new TextCorpusReader(path);
       foreach (var address in inaugural.read_raw(Path.Combine(path, "1789-Washington.txt")))
       {
     Console.WriteLine(address);
       }
 }
Exemplo n.º 4
0
 private static void read_trigram_frequencies_from_inaugural_addresses()
 {
     var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\inaugural");
       var inaugural = new TextCorpusReader(path);
       var f = new Frequencies<string>();
       foreach (var address in inaugural.words().Where((x) => x != ", ").NGram(3))
       {
     f.Add(address.DefaultIfEmpty("").Aggregate((a, b) => a + " " + b));
       }
       foreach (var term in f.Generate().OrderBy(p => p.Value).Reverse().Take(10))
       {
     Console.WriteLine(String.Format(@"{0}: {1}", term.Key, term.Value));
       }
 }
Exemplo n.º 5
0
        private static void inaugural_ngram_conditional_frequencies()
        {
            var path = Path.Combine(Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments), @"Data\inaugural");
              var inaugural = new TextCorpusReader(path);
              var cf = new ConditionalFrequencies();

              foreach (var term in inaugural.words().Where((x) => x != ", ").NGram(3))
              {
            cf.Add(term);
              }
              foreach (var term in cf.Generate())
              {
            if (term.Value.Count() > 1)
            {
              Console.WriteLine(term.Key);
              foreach (var evt in term.Value.Generate())
              {
            Console.WriteLine("\t{0} -> {1}", evt.Key, evt.Value);
              }
            }
              }
        }