/// <summary>Go through trees and determine their heads and print them.</summary>
        /// <remarks>
        /// Go through trees and determine their heads and print them.
        /// Just for debuggin'. <br />
        /// Usage: <code>
        /// java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath
        /// </code>
        /// </remarks>
        /// <param name="args">The treebankFilePath</param>
        public static void Main(string[] args)
        {
            Treebank treebank = new DiskTreebank();

            CategoryWordTag.suppressTerminalDetails = true;
            treebank.LoadPath(args[0]);
            IHeadFinder chf = new Edu.Stanford.Nlp.Trees.CollinsHeadFinder();

            treebank.Apply(null);
        }
예제 #2
0
        /// <summary>Loads treebank grammar from first argument and prints it.</summary>
        /// <remarks>
        /// Loads treebank grammar from first argument and prints it.
        /// Just a demonstration of functionality. <br />
        /// <code>usage: java MemoryTreebank treebankFilesPath</code>
        /// </remarks>
        /// <param name="args">array of command-line arguments</param>
        public static void Main(string[] args)
        {
            Timing.StartTime();
            Treebank treebank  = new DiskTreebank(null);
            Treebank treebank2 = new MemoryTreebank(null);

            treebank.LoadPath(args[0]);
            treebank2.LoadPath(args[0]);
            CompositeTreebank c = new CompositeTreebank(treebank, treebank2);

            Timing.EndTime();
            ITreeTransformer myTransformer  = new TransformingTreebank.MyTreeTransformer();
            ITreeTransformer myTransformer2 = new TransformingTreebank.MyTreeTransformer2();
            ITreeTransformer myTransformer3 = new TransformingTreebank.MyTreeTransformer3();
            Treebank         tf1            = c.Transform(myTransformer).Transform(myTransformer2).Transform(myTransformer3);
            Treebank         tf2            = new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer), myTransformer2), myTransformer3);

            ITreeTransformer[] tta = new ITreeTransformer[] { myTransformer, myTransformer2, myTransformer3 };
            ITreeTransformer   tt3 = new CompositeTreeTransformer(Arrays.AsList(tta));
            Treebank           tf3 = c.Transform(tt3);

            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK");
            System.Console.Out.WriteLine(c);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("SLOWLY TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR");
            Treebank tx1 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer);

            System.Console.Out.WriteLine(tx1);
            System.Console.Out.WriteLine("-----");
            Treebank tx2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx1, myTransformer2);

            System.Console.Out.WriteLine(tx2);
            System.Console.Out.WriteLine("-----");
            Treebank tx3 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx2, myTransformer3);

            System.Console.Out.WriteLine(tx3);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING Treebank.transform()");
            System.Console.Out.WriteLine(tf1);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("PRINTING AGAIN TRANSFORMED TREEBANK, USING Treebank.transform()");
            System.Console.Out.WriteLine(tf1);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR");
            System.Console.Out.WriteLine(tf2);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING CompositeTreeTransformer");
            System.Console.Out.WriteLine(tf3);
            System.Console.Out.WriteLine("-------------------------");
            System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK");
            System.Console.Out.WriteLine(c);
            System.Console.Out.WriteLine("-------------------------");
        }
예제 #3
0
        /// <summary>Reads, stems, and prints the trees in the file.</summary>
        /// <param name="args">Usage: WordStemmer file</param>
        public static void Main(string[] args)
        {
            Treebank treebank = new DiskTreebank();

            treebank.LoadPath(args[0]);
            Edu.Stanford.Nlp.Trees.WordStemmer ls = new Edu.Stanford.Nlp.Trees.WordStemmer();
            foreach (Tree tree in treebank)
            {
                ls.VisitTree(tree);
                System.Console.Out.WriteLine(tree);
            }
        }
 private DiskTreebankIterator(DiskTreebank _enclosing)
 {
     this._enclosing = _enclosing;
     // null means iterator is exhausted (or not yet constructed)
     //Create local copies so that calls to loadPath() in the parent class
     //don't cause exceptions i.e., this iterator is valid over the state of DiskTreebank
     //when the iterator is created.
     this.localPathList   = new List <File>(this._enclosing.filePaths);
     this.localFilterList = new List <IFileFilter>(this._enclosing.fileFilters);
     if (this.PrimeNextPath() && this.PrimeNextFile())
     {
         this.storedTree = this.PrimeNextTree();
     }
 }
예제 #5
0
        public static void Main(string[] args)
        {
            // Args specified with -tagSeparator, -encoding, etc are assigned
            // to the appropriate option.  Otherwise, the first arg found is
            // the sentence to look for, and all other args are paths in which
            // to look for that sentence.
            string         needle       = string.Empty;
            string         tagSeparator = "_";
            string         encoding     = "utf-8";
            string         fileRegex    = string.Empty;
            IList <string> paths        = new List <string>();

            for (int i = 0; i < args.Length; ++i)
            {
                if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tagSeparator") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--tagSeparator")) && i + 1 < args.Length)
                {
                    tagSeparator = args[i + 1];
                    ++i;
                }
                else
                {
                    if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-encoding") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--encoding")) && i + 1 < args.Length)
                    {
                        encoding = args[i + 1];
                        ++i;
                    }
                    else
                    {
                        if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-fileRegex") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--fileRegex")) && i + 1 < args.Length)
                        {
                            fileRegex = args[i + 1];
                            ++i;
                        }
                        else
                        {
                            if (needle.Equals(string.Empty))
                            {
                                needle = args[i].Trim();
                            }
                            else
                            {
                                paths.Add(args[i]);
                            }
                        }
                    }
                }
            }
            ITreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
            // If the user specified a regex, here we make a filter using that
            // regex.  We just use an anonymous class for the filter
            IFileFilter filter = null;

            if (!fileRegex.Equals(string.Empty))
            {
                Pattern filePattern = Pattern.Compile(fileRegex);
                filter = null;
            }
            foreach (string path in paths)
            {
                // Start a new treebank with the given path, encoding, filter, etc
                DiskTreebank treebank = new DiskTreebank(trf, encoding);
                treebank.LoadPath(path, filter);
                IEnumerator <Tree> treeIterator = treebank.GetEnumerator();
                int    treeCount   = 0;
                string currentFile = string.Empty;
                while (treeIterator.MoveNext())
                {
                    // the treebank might be a directory, not a single file, so
                    // keep track of which file we are currently looking at
                    if (!currentFile.Equals(treebank.GetCurrentFilename()))
                    {
                        currentFile = treebank.GetCurrentFilename();
                        treeCount   = 0;
                    }
                    ++treeCount;
                    Tree tree = treeIterator.Current;
                    IList <TaggedWord> sentence = tree.TaggedYield();
                    bool found = false;
                    // The tree can match in one of three ways: tagged, untagged,
                    // or untagged and unsegmented (which is useful for Chinese,
                    // for example)
                    string haystack = SentenceUtils.ListToString(sentence, true);
                    found    = needle.Equals(haystack);
                    haystack = haystack.ReplaceAll(" ", string.Empty);
                    found    = found || needle.Equals(haystack);
                    haystack = SentenceUtils.ListToString(sentence, false, tagSeparator);
                    found    = found || needle.Equals(haystack);
                    if (found)
                    {
                        System.Console.Out.WriteLine("needle found in " + currentFile + " tree " + treeCount);
                    }
                }
            }
        }
예제 #6
0
        /// <summary>Loads treebank and prints it.</summary>
        /// <remarks>
        /// Loads treebank and prints it.
        /// All files below the designated
        /// <c>filePath</c>
        /// within the given
        /// number range if any are loaded.  You can normalize the trees or not
        /// (English-specific) and print trees one per line up to a certain length
        /// (for EVALB).
        /// <p>
        /// Usage:
        /// <c>java edu.stanford.nlp.trees.Treebanks [-maxLength n|-normalize|-treeReaderFactory class] filePath [numberRanges]</c>
        /// </remarks>
        /// <param name="args">Array of command-line arguments</param>
        /// <exception cref="System.IO.IOException">If there is a treebank file access problem</exception>
        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                PrintUsage();
                return;
            }
            int                        i = 0;
            int                        maxLength;
            int                        minLength;
            int                        maxL              = int.MaxValue;
            int                        minL              = -1;
            bool                       normalized        = false;
            bool                       decimate          = false;
            bool                       pennPrintTrees    = false;
            bool                       oneLinePrint      = false;
            bool                       printTaggedWords  = false;
            bool                       printWords        = false;
            bool                       correct           = false;
            string                     annotationOptions = null;
            bool                       summary           = false;
            bool                       timing            = false;
            bool                       yield             = false;
            bool                       punct             = false;
            bool                       sentenceLengths   = false;
            bool                       countTaggings     = false;
            bool                       removeCodeTrees   = false;
            string                     decimatePrefix    = null;
            string                     encoding          = TreebankLanguagePackConstants.DefaultEncoding;
            string                     suffix            = Treebank.DefaultTreeFileSuffix;
            ITreeReaderFactory         trf     = null;
            ITreebankLanguagePack      tlp     = null;
            IList <IPredicate <Tree> > filters = new List <IPredicate <Tree> >();

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (args[i].Equals("-maxLength") && i + 1 < args.Length)
                {
                    maxL = System.Convert.ToInt32(args[i + 1]);
                    i   += 2;
                }
                else
                {
                    if (args[i].Equals("-minLength") && i + 1 < args.Length)
                    {
                        minL = System.Convert.ToInt32(args[i + 1]);
                        i   += 2;
                    }
                    else
                    {
                        if (args[i].Equals("-h") || args[i].Equals("-help"))
                        {
                            PrintUsage();
                            i++;
                        }
                        else
                        {
                            if (args[i].Equals("-normalized"))
                            {
                                normalized = true;
                                i         += 1;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlp"))
                                {
                                    try
                                    {
                                        object o = Sharpen.Runtime.GetType(args[i + 1]).GetDeclaredConstructor().NewInstance();
                                        tlp = (ITreebankLanguagePack)o;
                                        trf = tlp.TreeReaderFactory();
                                    }
                                    catch (Exception)
                                    {
                                        log.Info("Couldn't instantiate as TreebankLanguagePack: " + args[i + 1]);
                                        return;
                                    }
                                    i += 2;
                                }
                                else
                                {
                                    if (args[i].Equals("-treeReaderFactory") || args[i].Equals("-trf"))
                                    {
                                        try
                                        {
                                            object o = Sharpen.Runtime.GetType(args[i + 1]).GetDeclaredConstructor().NewInstance();
                                            trf = (ITreeReaderFactory)o;
                                        }
                                        catch (Exception)
                                        {
                                            log.Info("Couldn't instantiate as TreeReaderFactory: " + args[i + 1]);
                                            return;
                                        }
                                        i += 2;
                                    }
                                    else
                                    {
                                        if (args[i].Equals("-suffix"))
                                        {
                                            suffix = args[i + 1];
                                            i     += 2;
                                        }
                                        else
                                        {
                                            if (args[i].Equals("-decimate"))
                                            {
                                                decimate       = true;
                                                decimatePrefix = args[i + 1];
                                                i += 2;
                                            }
                                            else
                                            {
                                                if (args[i].Equals("-encoding"))
                                                {
                                                    encoding = args[i + 1];
                                                    i       += 2;
                                                }
                                                else
                                                {
                                                    if (args[i].Equals("-correct"))
                                                    {
                                                        correct = true;
                                                        i      += 1;
                                                    }
                                                    else
                                                    {
                                                        if (args[i].Equals("-summary"))
                                                        {
                                                            summary = true;
                                                            i      += 1;
                                                        }
                                                        else
                                                        {
                                                            if (args[i].Equals("-yield"))
                                                            {
                                                                yield = true;
                                                                i    += 1;
                                                            }
                                                            else
                                                            {
                                                                if (args[i].Equals("-punct"))
                                                                {
                                                                    punct = true;
                                                                    i    += 1;
                                                                }
                                                                else
                                                                {
                                                                    if (args[i].Equals("-pennPrint"))
                                                                    {
                                                                        pennPrintTrees = true;
                                                                        i++;
                                                                    }
                                                                    else
                                                                    {
                                                                        if (args[i].Equals("-oneLine"))
                                                                        {
                                                                            oneLinePrint = true;
                                                                            i++;
                                                                        }
                                                                        else
                                                                        {
                                                                            if (args[i].Equals("-taggedWords"))
                                                                            {
                                                                                printTaggedWords = true;
                                                                                i++;
                                                                            }
                                                                            else
                                                                            {
                                                                                if (args[i].Equals("-words"))
                                                                                {
                                                                                    printWords = true;
                                                                                    i++;
                                                                                }
                                                                                else
                                                                                {
                                                                                    if (args[i].Equals("-annotate"))
                                                                                    {
                                                                                        annotationOptions = args[i + 1];
                                                                                        i += 2;
                                                                                    }
                                                                                    else
                                                                                    {
                                                                                        if (args[i].Equals("-timing"))
                                                                                        {
                                                                                            timing = true;
                                                                                            i++;
                                                                                        }
                                                                                        else
                                                                                        {
                                                                                            if (args[i].Equals("-countTaggings"))
                                                                                            {
                                                                                                countTaggings = true;
                                                                                                i++;
                                                                                            }
                                                                                            else
                                                                                            {
                                                                                                if (args[i].Equals("-sentenceLengths"))
                                                                                                {
                                                                                                    sentenceLengths = true;
                                                                                                    i++;
                                                                                                }
                                                                                                else
                                                                                                {
                                                                                                    if (args[i].Equals("-removeCodeTrees"))
                                                                                                    {
                                                                                                        removeCodeTrees = true;
                                                                                                        i++;
                                                                                                    }
                                                                                                    else
                                                                                                    {
                                                                                                        if (args[i].Equals("-filter"))
                                                                                                        {
                                                                                                            IPredicate <Tree> filter = ReflectionLoading.LoadByReflection(args[i + 1]);
                                                                                                            filters.Add(filter);
                                                                                                            i += 2;
                                                                                                        }
                                                                                                        else
                                                                                                        {
                                                                                                            log.Info("Unknown option: " + args[i]);
                                                                                                            i++;
                                                                                                        }
                                                                                                    }
                                                                                                }
                                                                                            }
                                                                                        }
                                                                                    }
                                                                                }
                                                                            }
                                                                        }
                                                                    }
                                                                }
                                                            }
                                                        }
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            maxLength = maxL;
            minLength = minL;
            Treebank treebank;

            if (trf == null)
            {
                trf = null;
            }
            if (normalized)
            {
                treebank = new DiskTreebank();
            }
            else
            {
                treebank = new DiskTreebank(trf, encoding);
            }
            foreach (IPredicate <Tree> filter_1 in filters)
            {
                treebank = new FilteringTreebank(treebank, filter_1);
            }
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, encoding), true);

            if (i + 1 < args.Length)
            {
                treebank.LoadPath(args[i], new NumberRangesFileFilter(args[i + 1], true));
            }
            else
            {
                if (i < args.Length)
                {
                    treebank.LoadPath(args[i], suffix, true);
                }
                else
                {
                    PrintUsage();
                    return;
                }
            }
            // log.info("Loaded " + treebank.size() + " trees from " + args[i]);
            if (annotationOptions != null)
            {
                // todo Not yet implemented
                log.Info("annotationOptions not yet implemented");
            }
            if (summary)
            {
                System.Console.Out.WriteLine(treebank.TextualSummary());
            }
            if (sentenceLengths)
            {
                SentenceLengths(treebank, args[i], ((i + 1) < args.Length ? args[i + 1] : null), pw);
            }
            if (punct)
            {
                PrintPunct(treebank, tlp, pw);
            }
            if (correct)
            {
                treebank = new EnglishPTBTreebankCorrector().TransformTrees(treebank);
            }
            if (pennPrintTrees)
            {
                treebank.Apply(null);
            }
            if (oneLinePrint)
            {
                treebank.Apply(null);
            }
            if (printWords)
            {
                TreeNormalizer tn = new BobChrisTreeNormalizer();
                treebank.Apply(null);
            }
            if (printTaggedWords)
            {
                TreeNormalizer tn = new BobChrisTreeNormalizer();
                treebank.Apply(null);
            }
            if (countTaggings)
            {
                CountTaggings(treebank, pw);
            }
            if (yield)
            {
                treebank.Apply(null);
            }
            if (decimate)
            {
                TextWriter w1 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-train.txt"), encoding));
                TextWriter w2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-dev.txt"), encoding));
                TextWriter w3 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-test.txt"), encoding));
                treebank.Decimate(w1, w2, w3);
            }
            if (timing)
            {
                RunTiming(treebank);
            }
            if (removeCodeTrees)
            {
                // this is a bit of a hack. It only works on an individual file
                if (new File(args[i]).IsDirectory())
                {
                    throw new Exception("-removeCodeTrees only works on a single file");
                }
                string treebankStr = IOUtils.SlurpFile(args[i]);
                treebankStr = treebankStr.ReplaceAll("\\( \\(CODE <[^>]+>\\)\\)", string.Empty);
                TextWriter w = new OutputStreamWriter(new FileOutputStream(args[i]), encoding);
                w.Write(treebankStr);
                w.Close();
            }
        }