/// <summary> /// An example of a command line is /// <br /> /// java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202 /// <br /> /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6 /// <br /> /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed 026-270,301-499,600-999 /// </summary> /// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string parserModel = null; string output = null; IList <Pair <string, IFileFilter> > treebanks = Generics.NewArrayList(); int dvKBest = 200; int numThreads = 1; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dvKBest")) { dvKBest = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser") || args[argIndex].Equals("-model")) { parserModel = args[argIndex + 1]; argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { output = args[argIndex + 1]; argIndex += 2; continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank")) { Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-treebank"); argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1; treebanks.Add(treebankDescription); continue; } if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-numThreads")) { numThreads = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; continue; } throw new ArgumentException("Unknown argument " + args[argIndex]); } if (parserModel == null) { throw new ArgumentException("Need to supply a parser model with -model"); } if (output == null) { throw new ArgumentException("Need to supply an output filename with -output"); } if (treebanks.IsEmpty()) { throw new ArgumentException("Need to supply a treebank with -treebank"); } log.Info("Writing output to " + output); log.Info("Loading parser model " + parserModel); log.Info("Writing " + dvKBest + " hypothesis trees for each tree"); LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel, "-dvKBest", int.ToString(dvKBest))); CacheParseHypotheses cacher = new CacheParseHypotheses(parser); ITreeTransformer transformer = DVParser.BuildTrainTransformer(parser.GetOp()); IList <Tree> sentences = new List <Tree>(); foreach (Pair <string, IFileFilter> description in treebanks) { log.Info("Reading trees from " + description.first); Treebank treebank = parser.GetOp().tlpParams.MemoryTreebank(); treebank.LoadPath(description.first, description.second); treebank = treebank.Transform(transformer); Sharpen.Collections.AddAll(sentences, treebank); } log.Info("Processing " + sentences.Count + " trees"); IList <Pair <Tree, byte[]> > cache = Generics.NewArrayList(); transformer = new SynchronizedTreeTransformer(transformer); MulticoreWrapper <Tree, Pair <Tree, byte[]> > wrapper = new MulticoreWrapper <Tree, Pair <Tree, byte[]> >(numThreads, new CacheParseHypotheses.CacheProcessor(cacher, parser, dvKBest, transformer)); foreach (Tree tree in sentences) { wrapper.Put(tree); while (wrapper.Peek()) { cache.Add(wrapper.Poll()); if (cache.Count % 10 == 0) { System.Console.Out.WriteLine("Processed " + cache.Count + " trees"); } } } wrapper.Join(); while (wrapper.Peek()) { cache.Add(wrapper.Poll()); if (cache.Count % 10 == 0) { System.Console.Out.WriteLine("Processed " + cache.Count + " trees"); } } System.Console.Out.WriteLine("Finished processing " + cache.Count + " trees"); IOUtils.WriteObjectToFile(cache, output); }