Example #1
0
        /// <summary>Loads treebank grammar from first argument and prints it.</summary>
        /// <remarks>
        /// Loads treebank grammar from first argument and prints it.
        /// Just a demonstration of functionality. <br />
        /// <code>usage: java MemoryTreebank treebankFilesPath</code>
        /// </remarks>
        /// <param name="args">array of command-line arguments</param>
        public static void Main(string[] args)
        {
            Timing.StartTime();
            Treebank treebank  = new DiskTreebank(null);
            Treebank treebank2 = new MemoryTreebank(null);

            treebank.LoadPath(args[0]);
            treebank2.LoadPath(args[0]);
            CompositeTreebank c = new CompositeTreebank(treebank, treebank2);

            Timing.EndTime();
            ITreeTransformer myTransformer  = new TransformingTreebank.MyTreeTransformer();
            ITreeTransformer myTransformer2 = new TransformingTreebank.MyTreeTransformer2();
            ITreeTransformer myTransformer3 = new TransformingTreebank.MyTreeTransformer3();
            Treebank         tf1            = c.Transform(myTransformer).Transform(myTransformer2).Transform(myTransformer3);
            Treebank         tf2            = new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer), myTransformer2), myTransformer3);

            ITreeTransformer[] tta = new ITreeTransformer[] { myTransformer, myTransformer2, myTransformer3 };
            ITreeTransformer   tt3 = new CompositeTreeTransformer(Arrays.AsList(tta));
            Treebank           tf3 = c.Transform(tt3);

            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK");
            System.Console.Out.WriteLine(c);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("SLOWLY TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR");
            Treebank tx1 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer);

            System.Console.Out.WriteLine(tx1);
            System.Console.Out.WriteLine("-----");
            Treebank tx2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx1, myTransformer2);

            System.Console.Out.WriteLine(tx2);
            System.Console.Out.WriteLine("-----");
            Treebank tx3 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx2, myTransformer3);

            System.Console.Out.WriteLine(tx3);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING Treebank.transform()");
            System.Console.Out.WriteLine(tf1);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("PRINTING AGAIN TRANSFORMED TREEBANK, USING Treebank.transform()");
            System.Console.Out.WriteLine(tf1);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR");
            System.Console.Out.WriteLine(tf2);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING CompositeTreeTransformer");
            System.Console.Out.WriteLine(tf3);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK");
            System.Console.Out.WriteLine(c);
            System.Console.Out.WriteLine("-------------------------");
        }
        private static void CountTaggings(Treebank tb, PrintWriter pw)
        {
            TwoDimensionalCounter <string, string> wtc = new TwoDimensionalCounter <string, string>();

            tb.Apply(null);
            foreach (string key in wtc.FirstKeySet())
            {
                pw.Print(key);
                pw.Print('\t');
                ICounter <string> ctr = wtc.GetCounter(key);
                foreach (string k2 in ctr.KeySet())
                {
                    pw.Print(k2 + '\t' + ctr.GetCount(k2) + '\t');
                }
                pw.Println();
            }
        }
        private static void RunTiming(Treebank treebank)
        {
            System.Console.Out.WriteLine();
            Timing.StartTime();
            int num = 0;

            foreach (Tree t in treebank)
            {
                num += t.Yield().Count;
            }
            Timing.EndTime("traversing corpus, counting words with iterator");
            log.Info("There were " + num + " words in the treebank.");
            treebank.Apply(new _ITreeVisitor_352());
            // = 0;
            log.Info();
            Timing.EndTime("traversing corpus, counting words with TreeVisitor");
            log.Info("There were " + num + " words in the treebank.");
            log.Info();
            Timing.StartTime();
            log.Info("This treebank contains " + treebank.Count + " trees.");
            Timing.EndTime("size of corpus");
        }
 // end main()
 private static void PrintPunct(Treebank treebank, ITreebankLanguagePack tlp, PrintWriter pw)
 {
     if (tlp == null)
     {
         log.Info("The -punct option requires you to specify -tlp");
     }
     else
     {
         IPredicate <string> punctTagFilter = tlp.PunctuationTagAcceptFilter();
         foreach (Tree t in treebank)
         {
             IList <TaggedWord> tws = t.TaggedYield();
             foreach (TaggedWord tw in tws)
             {
                 if (punctTagFilter.Test(tw.Tag()))
                 {
                     pw.Println(tw);
                 }
             }
         }
     }
 }
Example #5
0
 public FilteringTreebank(Treebank treebank, IPredicate <Tree> filter)
 {
     this.filter   = filter;
     this.treebank = treebank;
 }
        private static void SentenceLengths(Treebank treebank, string name, string range, PrintWriter pw)
        {
            int maxleng = 150;

            int[]        lengthCounts = new int[maxleng + 2];
            int          numSents     = 0;
            int          longestSeen  = 0;
            int          totalWords   = 0;
            string       longSent     = string.Empty;
            double       median       = 0.0;
            NumberFormat nf           = new DecimalFormat("0.0");
            bool         foundMedian  = false;

            foreach (Tree t in treebank)
            {
                numSents++;
                int len = t.Yield().Count;
                if (len <= maxleng)
                {
                    lengthCounts[len]++;
                }
                else
                {
                    lengthCounts[maxleng + 1]++;
                }
                totalWords += len;
                if (len > longestSeen)
                {
                    longestSeen = len;
                    longSent    = t.ToString();
                }
            }
            System.Console.Out.Write("Files " + name + ' ');
            if (range != null)
            {
                System.Console.Out.Write(range + ' ');
            }
            System.Console.Out.WriteLine("consists of " + numSents + " sentences");
            int runningTotal = 0;

            for (int i = 0; i <= maxleng; i++)
            {
                runningTotal += lengthCounts[i];
                System.Console.Out.WriteLine("  " + lengthCounts[i] + " of length " + i + " (running total: " + runningTotal + ')');
                if (!foundMedian && runningTotal > numSents / 2)
                {
                    if (numSents % 2 == 0 && runningTotal == numSents / 2 + 1)
                    {
                        // right on the boundary
                        int j = i - 1;
                        while (j > 0 && lengthCounts[j] == 0)
                        {
                            j--;
                        }
                        median = ((double)i + j) / 2;
                    }
                    else
                    {
                        median = i;
                    }
                    foundMedian = true;
                }
            }
            if (lengthCounts[maxleng + 1] > 0)
            {
                runningTotal += lengthCounts[maxleng + 1];
                System.Console.Out.WriteLine("  " + lengthCounts[maxleng + 1] + " of length " + (maxleng + 1) + " to " + longestSeen + " (running total: " + runningTotal + ')');
            }
            System.Console.Out.WriteLine("Average length: " + nf.Format(((double)totalWords) / numSents) + "; median length: " + nf.Format(median));
            System.Console.Out.WriteLine("Longest sentence is of length: " + longestSeen);
            pw.Println(longSent);
        }
Example #7
0
 /// <summary>
 /// Create a new TransformingTreebank from a base Treebank that will
 /// transform trees with the given TreeTransformer.
 /// </summary>
 /// <remarks>
 /// Create a new TransformingTreebank from a base Treebank that will
 /// transform trees with the given TreeTransformer.
 /// This is the constructor that you should use.
 /// </remarks>
 /// <param name="tb">The base Treebank</param>
 /// <param name="transformer">The TreeTransformer applied to each Tree.</param>
 public TransformingTreebank(Treebank tb, ITreeTransformer transformer)
 {
     this.tb          = tb;
     this.transformer = transformer;
 }
Example #8
0
 public CompositeTreebank(Treebank t1, Treebank t2)
 {
     this.t1 = t1;
     this.t2 = t2;
 }