/// <summary>Loads treebank grammar from first argument and prints it.</summary> /// <remarks> /// Loads treebank grammar from first argument and prints it. /// Just a demonstration of functionality. <br /> /// <code>usage: java MemoryTreebank treebankFilesPath</code> /// </remarks> /// <param name="args">array of command-line arguments</param> public static void Main(string[] args) { Timing.StartTime(); Treebank treebank = new DiskTreebank(null); Treebank treebank2 = new MemoryTreebank(null); treebank.LoadPath(args[0]); treebank2.LoadPath(args[0]); CompositeTreebank c = new CompositeTreebank(treebank, treebank2); Timing.EndTime(); ITreeTransformer myTransformer = new TransformingTreebank.MyTreeTransformer(); ITreeTransformer myTransformer2 = new TransformingTreebank.MyTreeTransformer2(); ITreeTransformer myTransformer3 = new TransformingTreebank.MyTreeTransformer3(); Treebank tf1 = c.Transform(myTransformer).Transform(myTransformer2).Transform(myTransformer3); Treebank tf2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer), myTransformer2), myTransformer3); ITreeTransformer[] tta = new ITreeTransformer[] { myTransformer, myTransformer2, myTransformer3 }; ITreeTransformer tt3 = new CompositeTreeTransformer(Arrays.AsList(tta)); Treebank tf3 = c.Transform(tt3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK"); System.Console.Out.WriteLine(c); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("SLOWLY TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR"); Treebank tx1 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer); System.Console.Out.WriteLine(tx1); System.Console.Out.WriteLine("-----"); Treebank tx2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx1, myTransformer2); System.Console.Out.WriteLine(tx2); System.Console.Out.WriteLine("-----"); Treebank tx3 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx2, myTransformer3); System.Console.Out.WriteLine(tx3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING Treebank.transform()"); System.Console.Out.WriteLine(tf1); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("PRINTING AGAIN TRANSFORMED TREEBANK, USING Treebank.transform()"); System.Console.Out.WriteLine(tf1); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR"); System.Console.Out.WriteLine(tf2); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING CompositeTreeTransformer"); System.Console.Out.WriteLine(tf3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK"); System.Console.Out.WriteLine(c); System.Console.Out.WriteLine("-------------------------"); }
private static void CountTaggings(Treebank tb, PrintWriter pw) { TwoDimensionalCounter <string, string> wtc = new TwoDimensionalCounter <string, string>(); tb.Apply(null); foreach (string key in wtc.FirstKeySet()) { pw.Print(key); pw.Print('\t'); ICounter <string> ctr = wtc.GetCounter(key); foreach (string k2 in ctr.KeySet()) { pw.Print(k2 + '\t' + ctr.GetCount(k2) + '\t'); } pw.Println(); } }
private static void RunTiming(Treebank treebank) { System.Console.Out.WriteLine(); Timing.StartTime(); int num = 0; foreach (Tree t in treebank) { num += t.Yield().Count; } Timing.EndTime("traversing corpus, counting words with iterator"); log.Info("There were " + num + " words in the treebank."); treebank.Apply(new _ITreeVisitor_352()); // = 0; log.Info(); Timing.EndTime("traversing corpus, counting words with TreeVisitor"); log.Info("There were " + num + " words in the treebank."); log.Info(); Timing.StartTime(); log.Info("This treebank contains " + treebank.Count + " trees."); Timing.EndTime("size of corpus"); }
// end main() private static void PrintPunct(Treebank treebank, ITreebankLanguagePack tlp, PrintWriter pw) { if (tlp == null) { log.Info("The -punct option requires you to specify -tlp"); } else { IPredicate <string> punctTagFilter = tlp.PunctuationTagAcceptFilter(); foreach (Tree t in treebank) { IList <TaggedWord> tws = t.TaggedYield(); foreach (TaggedWord tw in tws) { if (punctTagFilter.Test(tw.Tag())) { pw.Println(tw); } } } } }
public FilteringTreebank(Treebank treebank, IPredicate <Tree> filter) { this.filter = filter; this.treebank = treebank; }
private static void SentenceLengths(Treebank treebank, string name, string range, PrintWriter pw) { int maxleng = 150; int[] lengthCounts = new int[maxleng + 2]; int numSents = 0; int longestSeen = 0; int totalWords = 0; string longSent = string.Empty; double median = 0.0; NumberFormat nf = new DecimalFormat("0.0"); bool foundMedian = false; foreach (Tree t in treebank) { numSents++; int len = t.Yield().Count; if (len <= maxleng) { lengthCounts[len]++; } else { lengthCounts[maxleng + 1]++; } totalWords += len; if (len > longestSeen) { longestSeen = len; longSent = t.ToString(); } } System.Console.Out.Write("Files " + name + ' '); if (range != null) { System.Console.Out.Write(range + ' '); } System.Console.Out.WriteLine("consists of " + numSents + " sentences"); int runningTotal = 0; for (int i = 0; i <= maxleng; i++) { runningTotal += lengthCounts[i]; System.Console.Out.WriteLine(" " + lengthCounts[i] + " of length " + i + " (running total: " + runningTotal + ')'); if (!foundMedian && runningTotal > numSents / 2) { if (numSents % 2 == 0 && runningTotal == numSents / 2 + 1) { // right on the boundary int j = i - 1; while (j > 0 && lengthCounts[j] == 0) { j--; } median = ((double)i + j) / 2; } else { median = i; } foundMedian = true; } } if (lengthCounts[maxleng + 1] > 0) { runningTotal += lengthCounts[maxleng + 1]; System.Console.Out.WriteLine(" " + lengthCounts[maxleng + 1] + " of length " + (maxleng + 1) + " to " + longestSeen + " (running total: " + runningTotal + ')'); } System.Console.Out.WriteLine("Average length: " + nf.Format(((double)totalWords) / numSents) + "; median length: " + nf.Format(median)); System.Console.Out.WriteLine("Longest sentence is of length: " + longestSeen); pw.Println(longSent); }
/// <summary> /// Create a new TransformingTreebank from a base Treebank that will /// transform trees with the given TreeTransformer. /// </summary> /// <remarks> /// Create a new TransformingTreebank from a base Treebank that will /// transform trees with the given TreeTransformer. /// This is the constructor that you should use. /// </remarks> /// <param name="tb">The base Treebank</param> /// <param name="transformer">The TreeTransformer applied to each Tree.</param> public TransformingTreebank(Treebank tb, ITreeTransformer transformer) { this.tb = tb; this.transformer = transformer; }
public CompositeTreebank(Treebank t1, Treebank t2) { this.t1 = t1; this.t2 = t2; }