Exemplo n.º 1
0
        /// <summary>Call this method to get a String array of categories to split on.</summary>
        /// <remarks>
        /// Call this method to get a String array of categories to split on.
        /// It calculates parent annotation statistics suitable for doing
        /// selective parent splitting in the PCFGParser inside
        /// FactoredParser.  <p>
        /// If tlp is non-null tlp.basicCategory() will be called on parent and
        /// grandparent nodes. <p>
        /// <i>Implementation note:</i> This method is not designed for concurrent
        /// invocation: it uses static state variables.
        /// </remarks>
        public static ICollection <string> GetSplitCategories(Treebank t, bool doTags, int algorithm, double phrasalCutOff, double tagCutOff, ITreebankLanguagePack tlp)
        {
            Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats pas = new Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats(tlp, doTags);
            t.Apply(pas);
            ICollection <string> splitters = Generics.NewHashSet();

            Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats.GetSplitters(phrasalCutOff, pas.nodeRules, pas.pRules, pas.gPRules, splitters);
            Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats.GetSplitters(tagCutOff, pas.tagNodeRules, pas.tagPRules, pas.tagGPRules, splitters);
            return(splitters);
        }
Exemplo n.º 2
0
        /// <summary>
        /// Calculate parent annotation statistics suitable for doing
        /// selective parent splitting in the PCFGParser inside
        /// FactoredParser.
        /// </summary>
        /// <remarks>
        /// Calculate parent annotation statistics suitable for doing
        /// selective parent splitting in the PCFGParser inside
        /// FactoredParser.  <p>
        /// Usage: java edu.stanford.nlp.parser.lexparser.ParentAnnotationStats
        /// [-tags] treebankPath
        /// </remarks>
        /// <param name="args">One argument: path to the Treebank</param>
        public static void Main(string[] args)
        {
            bool doTags = false;

            if (args.Length < 1)
            {
                System.Console.Out.WriteLine("Usage: java edu.stanford.nlp.parser.lexparser.ParentAnnotationStats [-tags] treebankPath");
            }
            else
            {
                int    i         = 0;
                bool   useCutOff = false;
                double cutOff    = 0.0;
                while (args[i].StartsWith("-"))
                {
                    if (args[i].Equals("-tags"))
                    {
                        doTags = true;
                        i++;
                    }
                    else
                    {
                        if (args[i].Equals("-cutOff") && i + 1 < args.Length)
                        {
                            useCutOff = true;
                            cutOff    = double.ParseDouble(args[i + 1]);
                            i        += 2;
                        }
                        else
                        {
                            log.Info("Unknown option: " + args[i]);
                            i++;
                        }
                    }
                }
                Treebank treebank = new DiskTreebank(null);
                treebank.LoadPath(args[i]);
                if (useCutOff)
                {
                    ICollection <string> splitters = GetSplitCategories(treebank, doTags, 0, cutOff, cutOff, null);
                    System.Console.Out.WriteLine(splitters);
                }
                else
                {
                    Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats pas = new Edu.Stanford.Nlp.Parser.Lexparser.ParentAnnotationStats(null, doTags);
                    treebank.Apply(pas);
                    pas.PrintStats();
                }
            }
        }