/// <summary>Go through trees and determine their heads and print them.</summary> /// <remarks> /// Go through trees and determine their heads and print them. /// Just for debuggin'. <br /> /// Usage: <code> /// java edu.stanford.nlp.trees.CollinsHeadFinder treebankFilePath /// </code> /// </remarks> /// <param name="args">The treebankFilePath</param> public static void Main(string[] args) { Treebank treebank = new DiskTreebank(); CategoryWordTag.suppressTerminalDetails = true; treebank.LoadPath(args[0]); IHeadFinder chf = new Edu.Stanford.Nlp.Trees.CollinsHeadFinder(); treebank.Apply(null); }
/// <summary>Loads treebank grammar from first argument and prints it.</summary> /// <remarks> /// Loads treebank grammar from first argument and prints it. /// Just a demonstration of functionality. <br /> /// <code>usage: java MemoryTreebank treebankFilesPath</code> /// </remarks> /// <param name="args">array of command-line arguments</param> public static void Main(string[] args) { Timing.StartTime(); Treebank treebank = new DiskTreebank(null); Treebank treebank2 = new MemoryTreebank(null); treebank.LoadPath(args[0]); treebank2.LoadPath(args[0]); CompositeTreebank c = new CompositeTreebank(treebank, treebank2); Timing.EndTime(); ITreeTransformer myTransformer = new TransformingTreebank.MyTreeTransformer(); ITreeTransformer myTransformer2 = new TransformingTreebank.MyTreeTransformer2(); ITreeTransformer myTransformer3 = new TransformingTreebank.MyTreeTransformer3(); Treebank tf1 = c.Transform(myTransformer).Transform(myTransformer2).Transform(myTransformer3); Treebank tf2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer), myTransformer2), myTransformer3); ITreeTransformer[] tta = new ITreeTransformer[] { myTransformer, myTransformer2, myTransformer3 }; ITreeTransformer tt3 = new CompositeTreeTransformer(Arrays.AsList(tta)); Treebank tf3 = c.Transform(tt3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK"); System.Console.Out.WriteLine(c); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("SLOWLY TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR"); Treebank tx1 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(c, myTransformer); System.Console.Out.WriteLine(tx1); System.Console.Out.WriteLine("-----"); Treebank tx2 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx1, myTransformer2); System.Console.Out.WriteLine(tx2); System.Console.Out.WriteLine("-----"); Treebank tx3 = new Edu.Stanford.Nlp.Trees.TransformingTreebank(tx2, myTransformer3); System.Console.Out.WriteLine(tx3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING Treebank.transform()"); System.Console.Out.WriteLine(tf1); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("PRINTING AGAIN TRANSFORMED TREEBANK, USING Treebank.transform()"); System.Console.Out.WriteLine(tf1); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING TransformingTreebank() CONSTRUCTOR"); System.Console.Out.WriteLine(tf2); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("TRANSFORMED TREEBANK, USING CompositeTreeTransformer"); System.Console.Out.WriteLine(tf3); System.Console.Out.WriteLine("-------------------------"); System.Console.Out.WriteLine("COMPOSITE (DISK THEN MEMORY REPEATED VERSION OF) INPUT TREEBANK"); System.Console.Out.WriteLine(c); System.Console.Out.WriteLine("-------------------------"); }
/// <summary>Reads, stems, and prints the trees in the file.</summary> /// <param name="args">Usage: WordStemmer file</param> public static void Main(string[] args) { Treebank treebank = new DiskTreebank(); treebank.LoadPath(args[0]); Edu.Stanford.Nlp.Trees.WordStemmer ls = new Edu.Stanford.Nlp.Trees.WordStemmer(); foreach (Tree tree in treebank) { ls.VisitTree(tree); System.Console.Out.WriteLine(tree); } }
private DiskTreebankIterator(DiskTreebank _enclosing) { this._enclosing = _enclosing; // null means iterator is exhausted (or not yet constructed) //Create local copies so that calls to loadPath() in the parent class //don't cause exceptions i.e., this iterator is valid over the state of DiskTreebank //when the iterator is created. this.localPathList = new List <File>(this._enclosing.filePaths); this.localFilterList = new List <IFileFilter>(this._enclosing.fileFilters); if (this.PrimeNextPath() && this.PrimeNextFile()) { this.storedTree = this.PrimeNextTree(); } }
public static void Main(string[] args) { // Args specified with -tagSeparator, -encoding, etc are assigned // to the appropriate option. Otherwise, the first arg found is // the sentence to look for, and all other args are paths in which // to look for that sentence. string needle = string.Empty; string tagSeparator = "_"; string encoding = "utf-8"; string fileRegex = string.Empty; IList <string> paths = new List <string>(); for (int i = 0; i < args.Length; ++i) { if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tagSeparator") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--tagSeparator")) && i + 1 < args.Length) { tagSeparator = args[i + 1]; ++i; } else { if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-encoding") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--encoding")) && i + 1 < args.Length) { encoding = args[i + 1]; ++i; } else { if ((Sharpen.Runtime.EqualsIgnoreCase(args[i], "-fileRegex") || Sharpen.Runtime.EqualsIgnoreCase(args[i], "--fileRegex")) && i + 1 < args.Length) { fileRegex = args[i + 1]; ++i; } else { if (needle.Equals(string.Empty)) { needle = args[i].Trim(); } else { paths.Add(args[i]); } } } } } ITreeReaderFactory trf = new LabeledScoredTreeReaderFactory(); // If the user specified a regex, here we make a filter using that // regex. We just use an anonymous class for the filter IFileFilter filter = null; if (!fileRegex.Equals(string.Empty)) { Pattern filePattern = Pattern.Compile(fileRegex); filter = null; } foreach (string path in paths) { // Start a new treebank with the given path, encoding, filter, etc DiskTreebank treebank = new DiskTreebank(trf, encoding); treebank.LoadPath(path, filter); IEnumerator <Tree> treeIterator = treebank.GetEnumerator(); int treeCount = 0; string currentFile = string.Empty; while (treeIterator.MoveNext()) { // the treebank might be a directory, not a single file, so // keep track of which file we are currently looking at if (!currentFile.Equals(treebank.GetCurrentFilename())) { currentFile = treebank.GetCurrentFilename(); treeCount = 0; } ++treeCount; Tree tree = treeIterator.Current; IList <TaggedWord> sentence = tree.TaggedYield(); bool found = false; // The tree can match in one of three ways: tagged, untagged, // or untagged and unsegmented (which is useful for Chinese, // for example) string haystack = SentenceUtils.ListToString(sentence, true); found = needle.Equals(haystack); haystack = haystack.ReplaceAll(" ", string.Empty); found = found || needle.Equals(haystack); haystack = SentenceUtils.ListToString(sentence, false, tagSeparator); found = found || needle.Equals(haystack); if (found) { System.Console.Out.WriteLine("needle found in " + currentFile + " tree " + treeCount); } } } }
/// <summary>Loads treebank and prints it.</summary> /// <remarks> /// Loads treebank and prints it. /// All files below the designated /// <c>filePath</c> /// within the given /// number range if any are loaded. You can normalize the trees or not /// (English-specific) and print trees one per line up to a certain length /// (for EVALB). /// <p> /// Usage: /// <c>java edu.stanford.nlp.trees.Treebanks [-maxLength n|-normalize|-treeReaderFactory class] filePath [numberRanges]</c> /// </remarks> /// <param name="args">Array of command-line arguments</param> /// <exception cref="System.IO.IOException">If there is a treebank file access problem</exception> public static void Main(string[] args) { if (args.Length == 0) { PrintUsage(); return; } int i = 0; int maxLength; int minLength; int maxL = int.MaxValue; int minL = -1; bool normalized = false; bool decimate = false; bool pennPrintTrees = false; bool oneLinePrint = false; bool printTaggedWords = false; bool printWords = false; bool correct = false; string annotationOptions = null; bool summary = false; bool timing = false; bool yield = false; bool punct = false; bool sentenceLengths = false; bool countTaggings = false; bool removeCodeTrees = false; string decimatePrefix = null; string encoding = TreebankLanguagePackConstants.DefaultEncoding; string suffix = Treebank.DefaultTreeFileSuffix; ITreeReaderFactory trf = null; ITreebankLanguagePack tlp = null; IList <IPredicate <Tree> > filters = new List <IPredicate <Tree> >(); while (i < args.Length && args[i].StartsWith("-")) { if (args[i].Equals("-maxLength") && i + 1 < args.Length) { maxL = System.Convert.ToInt32(args[i + 1]); i += 2; } else { if (args[i].Equals("-minLength") && i + 1 < args.Length) { minL = System.Convert.ToInt32(args[i + 1]); i += 2; } else { if (args[i].Equals("-h") || args[i].Equals("-help")) { PrintUsage(); i++; } else { if (args[i].Equals("-normalized")) { normalized = true; i += 1; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlp")) { try { object o = Sharpen.Runtime.GetType(args[i + 1]).GetDeclaredConstructor().NewInstance(); tlp = (ITreebankLanguagePack)o; trf = tlp.TreeReaderFactory(); } catch (Exception) { log.Info("Couldn't instantiate as TreebankLanguagePack: " + args[i + 1]); return; } i += 2; } else { if (args[i].Equals("-treeReaderFactory") || args[i].Equals("-trf")) { try { object o = Sharpen.Runtime.GetType(args[i + 1]).GetDeclaredConstructor().NewInstance(); trf = (ITreeReaderFactory)o; } catch (Exception) { log.Info("Couldn't instantiate as TreeReaderFactory: " + args[i + 1]); return; } i += 2; } else { if (args[i].Equals("-suffix")) { suffix = args[i + 1]; i += 2; } else { if (args[i].Equals("-decimate")) { decimate = true; decimatePrefix = args[i + 1]; i += 2; } else { if (args[i].Equals("-encoding")) { encoding = args[i + 1]; i += 2; } else { if (args[i].Equals("-correct")) { correct = true; i += 1; } else { if (args[i].Equals("-summary")) { summary = true; i += 1; } else { if (args[i].Equals("-yield")) { yield = true; i += 1; } else { if (args[i].Equals("-punct")) { punct = true; i += 1; } else { if (args[i].Equals("-pennPrint")) { pennPrintTrees = true; i++; } else { if (args[i].Equals("-oneLine")) { oneLinePrint = true; i++; } else { if (args[i].Equals("-taggedWords")) { printTaggedWords = true; i++; } else { if (args[i].Equals("-words")) { printWords = true; i++; } else { if (args[i].Equals("-annotate")) { annotationOptions = args[i + 1]; i += 2; } else { if (args[i].Equals("-timing")) { timing = true; i++; } else { if (args[i].Equals("-countTaggings")) { countTaggings = true; i++; } else { if (args[i].Equals("-sentenceLengths")) { sentenceLengths = true; i++; } else { if (args[i].Equals("-removeCodeTrees")) { removeCodeTrees = true; i++; } else { if (args[i].Equals("-filter")) { IPredicate <Tree> filter = ReflectionLoading.LoadByReflection(args[i + 1]); filters.Add(filter); i += 2; } else { log.Info("Unknown option: " + args[i]); i++; } } } } } } } } } } } } } } } } } } } } } } } } maxLength = maxL; minLength = minL; Treebank treebank; if (trf == null) { trf = null; } if (normalized) { treebank = new DiskTreebank(); } else { treebank = new DiskTreebank(trf, encoding); } foreach (IPredicate <Tree> filter_1 in filters) { treebank = new FilteringTreebank(treebank, filter_1); } PrintWriter pw = new PrintWriter(new OutputStreamWriter(System.Console.Out, encoding), true); if (i + 1 < args.Length) { treebank.LoadPath(args[i], new NumberRangesFileFilter(args[i + 1], true)); } else { if (i < args.Length) { treebank.LoadPath(args[i], suffix, true); } else { PrintUsage(); return; } } // log.info("Loaded " + treebank.size() + " trees from " + args[i]); if (annotationOptions != null) { // todo Not yet implemented log.Info("annotationOptions not yet implemented"); } if (summary) { System.Console.Out.WriteLine(treebank.TextualSummary()); } if (sentenceLengths) { SentenceLengths(treebank, args[i], ((i + 1) < args.Length ? args[i + 1] : null), pw); } if (punct) { PrintPunct(treebank, tlp, pw); } if (correct) { treebank = new EnglishPTBTreebankCorrector().TransformTrees(treebank); } if (pennPrintTrees) { treebank.Apply(null); } if (oneLinePrint) { treebank.Apply(null); } if (printWords) { TreeNormalizer tn = new BobChrisTreeNormalizer(); treebank.Apply(null); } if (printTaggedWords) { TreeNormalizer tn = new BobChrisTreeNormalizer(); treebank.Apply(null); } if (countTaggings) { CountTaggings(treebank, pw); } if (yield) { treebank.Apply(null); } if (decimate) { TextWriter w1 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-train.txt"), encoding)); TextWriter w2 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-dev.txt"), encoding)); TextWriter w3 = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(decimatePrefix + "-test.txt"), encoding)); treebank.Decimate(w1, w2, w3); } if (timing) { RunTiming(treebank); } if (removeCodeTrees) { // this is a bit of a hack. It only works on an individual file if (new File(args[i]).IsDirectory()) { throw new Exception("-removeCodeTrees only works on a single file"); } string treebankStr = IOUtils.SlurpFile(args[i]); treebankStr = treebankStr.ReplaceAll("\\( \\(CODE <[^>]+>\\)\\)", string.Empty); TextWriter w = new OutputStreamWriter(new FileOutputStream(args[i]), encoding); w.Write(treebankStr); w.Close(); } }