public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(Usage()); System.Environment.Exit(-1); } Properties options = StringUtils.ArgsToProperties(args, ArgDefs()); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; DiskTreebank tb = null; string encoding = options.GetProperty("l", "UTF-8"); bool removeBracket = PropertiesUtils.GetBool(options, "b", false); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); tb = tlpp.DiskTreebank(); string[] files = options.GetProperty(string.Empty, string.Empty).Split("\\s+"); if (files.Length != 0) { foreach (string filename in files) { tb.LoadPath(filename); } } else { log.Info(Usage()); System.Environment.Exit(-1); } PrintWriter pwo = tlpp.Pw(); string startSymbol = tlpp.TreebankLanguagePack().StartSymbol(); ITreeFactory tf = new LabeledScoredTreeFactory(); int nTrees = 0; foreach (Tree t in tb) { if (removeBracket) { if (t.Value().Equals(startSymbol)) { t = t.FirstChild(); } } else { if (!t.Value().Equals(startSymbol)) { //Add a bracket if it isn't already there t = tf.NewTreeNode(startSymbol, Java.Util.Collections.SingletonList(t)); } } pwo.Println(t.ToString()); nTrees++; } pwo.Close(); System.Console.Error.Printf("Processed %d trees.%n", nTrees); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } // Process command-line options Properties options = StringUtils.ArgsToProperties(args, optionArgDefinitions); string fileName = options.GetProperty(string.Empty); if (fileName == null || fileName.Equals(string.Empty)) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; string encoding = options.GetProperty("e", "UTF-8"); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); DiskTreebank tb = tlpp.DiskTreebank(); tb.LoadPath(fileName); // Statistics ICounter <string> binaryRuleTypes = new ClassicCounter <string>(20000); IList <int> branchingFactors = new List <int>(20000); int nTrees = 0; int nUnaryRules = 0; int nBinaryRules = 0; int binaryBranchingFactors = 0; // Read the treebank PrintWriter pw = tlpp.Pw(); foreach (Tree tree in tb) { if (tree.Value().Equals("ROOT")) { tree = tree.FirstChild(); } ++nTrees; foreach (Tree subTree in tree) { if (subTree.IsPhrasal()) { if (subTree.NumChildren() > 1) { ++nBinaryRules; branchingFactors.Add(subTree.NumChildren()); binaryBranchingFactors += subTree.NumChildren(); binaryRuleTypes.IncrementCount(TreeToRuleString(subTree)); } else { ++nUnaryRules; } } } } double mean = (double)binaryBranchingFactors / (double)nBinaryRules; System.Console.Out.Printf("#trees:\t%d%n", nTrees); System.Console.Out.Printf("#binary:\t%d%n", nBinaryRules); System.Console.Out.Printf("#binary types:\t%d%n", binaryRuleTypes.KeySet().Count); System.Console.Out.Printf("mean branching:\t%.4f%n", mean); System.Console.Out.Printf("stddev branching:\t%.4f%n", StandardDeviation(branchingFactors, mean)); System.Console.Out.Printf("rule entropy:\t%.5f%n", Counters.Entropy(binaryRuleTypes)); System.Console.Out.Printf("#unaries:\t%d%n", nUnaryRules); }
public static void Main(string[] args) { if (args.Length < minArgs) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } // Process command-line options Properties options = StringUtils.ArgsToProperties(args, optionArgDefinitions); string fileName = options.GetProperty(string.Empty); if (fileName == null || fileName.Equals(string.Empty)) { System.Console.Out.WriteLine(usage); System.Environment.Exit(-1); } int maxLen = PropertiesUtils.GetInt(options, "y", int.MaxValue); bool printTrees = PropertiesUtils.GetBool(options, "p", false); bool flattenTrees = PropertiesUtils.GetBool(options, "f", false); bool printPOS = PropertiesUtils.GetBool(options, "a", false); bool printTnT = PropertiesUtils.GetBool(options, "t", false); Language language = PropertiesUtils.Get(options, "l", Language.English, typeof(Language)); ITreebankLangParserParams tlpp = language.@params; string encoding = options.GetProperty("e", "UTF-8"); tlpp.SetInputEncoding(encoding); tlpp.SetOutputEncoding(encoding); DiskTreebank tb = tlpp.DiskTreebank(); tb.LoadPath(fileName); // Read the treebank PrintWriter pw = tlpp.Pw(); int numTrees = 0; foreach (Tree tree in tb) { if (tree.Yield().Count > maxLen) { continue; } ++numTrees; if (printTrees) { pw.Println(tree.ToString()); } else { if (flattenTrees) { pw.Println(SentenceUtils.ListToString(tree.Yield())); } else { if (printPOS) { pw.Println(SentenceUtils.ListToString(tree.PreTerminalYield())); } else { if (printTnT) { IList <CoreLabel> yield = tree.TaggedLabeledYield(); foreach (CoreLabel label in yield) { pw.Printf("%s\t%s%n", label.Word(), label.Tag()); } pw.Println(); } } } } } System.Console.Error.Printf("Read %d trees.%n", numTrees); }