/// <summary>Call this method to get a String array of categories to split on.</summary> /// <remarks> /// Call this method to get a String array of categories to split on. /// It calculates parent annotation statistics suitable for doing /// selective parent splitting in the PCFGParser inside /// FactoredParser. <p> /// If tlp is non-null tlp.basicCategory() will be called on parent and /// grandparent nodes. <p> /// <i>Implementation note:</i> This method is not designed for concurrent /// invocation: it uses static state variables. /// </remarks> public static ICollection <string> GetSplitCategories(Treebank t, bool doTags, int algorithm, double phrasalCutOff, double tagCutOff, ITreebankLanguagePack tlp) { Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats pas = new Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats(tlp, doTags); t.Apply(pas); ICollection <string> splitters = Generics.NewHashSet(); Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats.GetSplitters(phrasalCutOff, pas.nodeRules, pas.pRules, pas.gPRules, splitters); Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats.GetSplitters(tagCutOff, pas.tagNodeRules, pas.tagPRules, pas.tagGPRules, splitters); return(splitters); }
/// <summary> /// Calculate parent annotation statistics suitable for doing /// selective parent splitting in the PCFGParser inside /// FactoredParser. /// </summary> /// <remarks> /// Calculate parent annotation statistics suitable for doing /// selective parent splitting in the PCFGParser inside /// FactoredParser. <p> /// Usage: java edu.stanford.nlp.parser.lexparser.ParentAnnotationStats /// [-tags] treebankPath /// </remarks> /// <param name="args">One argument: path to the Treebank</param> public static void Main(string[] args) { bool doTags = false; if (args.Length < 1) { System.Console.Out.WriteLine("Usage: java edu.stanford.nlp.parser.lexparser.ParentAnnotationStats [-tags] treebankPath"); } else { int i = 0; bool useCutOff = false; double cutOff = 0.0; while (args[i].StartsWith("-")) { if (args[i].Equals("-tags")) { doTags = true; i++; } else { if (args[i].Equals("-cutOff") && i + 1 < args.Length) { useCutOff = true; cutOff = double.ParseDouble(args[i + 1]); i += 2; } else { log.Info("Unknown option: " + args[i]); i++; } } } Treebank treebank = new DiskTreebank(null); treebank.LoadPath(args[i]); if (useCutOff) { ICollection <string> splitters = GetSplitCategories(treebank, doTags, 0, cutOff, cutOff, null); System.Console.Out.WriteLine(splitters); } else { Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats pas = new Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats(null, doTags); treebank.Apply(pas); pas.PrintStats(); } } }