/// <summary>Lets you test out the TreeBinarizer on the command line.</summary> /// <remarks> /// Lets you test out the TreeBinarizer on the command line. /// This main method doesn't yet handle as many flags as one would like. /// But it does have: /// <ul> /// <li> -tlp TreebankLanguagePack /// <li>-tlpp TreebankLangParserParams /// <li>-insideFactor /// <li>-markovOrder /// </ul> /// </remarks> /// <param name="args"> /// Command line arguments: flags as above, as above followed by /// treebankPath /// </param> public static void Main(string[] args) { ITreebankLangParserParams tlpp = null; // TreebankLangParserParams tlpp = new EnglishTreebankParserParams(); // TreeReaderFactory trf = new LabeledScoredTreeReaderFactory(); // Looks like it must build CategoryWordTagFactory!! ITreeReaderFactory trf = null; string fileExt = "mrg"; IHeadFinder hf = new ModCollinsHeadFinder(); ITreebankLanguagePack tlp = new PennTreebankLanguagePack(); bool insideFactor = false; bool mf = false; int mo = 1; bool uwl = false; bool uat = false; double sst = 20.0; bool mfs = false; bool simpleLabels = false; bool noRebinarization = false; int i = 0; while (i < args.Length && args[i].StartsWith("-")) { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlp") && i + 1 < args.Length) { try { tlp = (ITreebankLanguagePack)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (Exception e) { log.Info("Couldn't instantiate: " + args[i + 1]); throw new Exception(e); } i++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tlpp") && i + 1 < args.Length) { try { tlpp = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1])); } catch (Exception e) { log.Info("Couldn't instantiate: " + args[i + 1]); throw new Exception(e); } i++; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-insideFactor")) { insideFactor = true; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-markovOrder") && i + 1 < args.Length) { i++; mo = System.Convert.ToInt32(args[i]); } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-simpleLabels")) { simpleLabels = true; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-noRebinarization")) { noRebinarization = true; } else { log.Info("Unknown option:" + args[i]); } } } } } } i++; } if (i >= args.Length) { log.Info("usage: java TreeBinarizer [-tlpp class|-markovOrder int|...] treebankPath"); System.Environment.Exit(0); } Treebank treebank; if (tlpp != null) { treebank = tlpp.MemoryTreebank(); tlp = tlpp.TreebankLanguagePack(); fileExt = tlp.TreebankFileExtension(); hf = tlpp.HeadFinder(); } else { treebank = new DiskTreebank(trf); } treebank.LoadPath(args[i], fileExt, true); ITreeTransformer tt = new Edu.Stanford.Nlp.Parser.Lexparser.TreeBinarizer(hf, tlp, insideFactor, mf, mo, uwl, uat, sst, mfs, simpleLabels, noRebinarization); foreach (Tree t in treebank) { Tree newT = tt.TransformTree(t); System.Console.Out.WriteLine("Original tree:"); t.PennPrint(); System.Console.Out.WriteLine("Binarized tree:"); newT.PennPrint(); System.Console.Out.WriteLine(); } }