// just static main
        public static void Main(string[] args)
        {
            string treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
            // Typically the tree is constructed by parsing or reading a
            // treebank.  This is just for example purposes
            Tree tree = Tree.ValueOf(treeString);
            // This creates English uncollapsed dependencies as a
            // SemanticGraph.  If you are creating many SemanticGraphs, you
            // should use a GrammaticalStructureFactory and use it to generate
            // the intermediate GrammaticalStructure instead
            SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree);
            // Alternatively, this could have been the Chinese params or any
            // other language supported.  As of 2014, only English and Chinese
            ITreebankLangParserParams    @params = new EnglishTreebankParserParams();
            IGrammaticalStructureFactory gsf     = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder());
            GrammaticalStructure         gs      = gsf.NewGrammaticalStructure(tree);

            log.Info(graph);
            SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B");
            SemgrexMatcher matcher = semgrex.Matcher(graph);

            // This will produce two results on the given tree: "likes" is an
            // ancestor of both "dog" and "my" via the nsubj relation
            while (matcher.Find())
            {
                log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B"));
            }
        }
Exemple #2
0
        /// <summary>
        /// This is hardwired to calculate the split categories from English
        /// Penn Treebank sections 2-21 with a default cutoff of 300 (as used
        /// in ACL03PCFG).
        /// </summary>
        /// <remarks>
        /// This is hardwired to calculate the split categories from English
        /// Penn Treebank sections 2-21 with a default cutoff of 300 (as used
        /// in ACL03PCFG).  It was added to upgrading of code in cases where no
        /// Treebank was available, and the pre-stored list was being used).
        /// </remarks>
        public static ICollection <string> GetEnglishSplitCategories(string treebankRoot)
        {
            ITreebankLangParserParams tlpParams = new EnglishTreebankParserParams();
            Treebank trees = tlpParams.MemoryTreebank();

            trees.LoadPath(treebankRoot, new NumberRangeFileFilter(200, 2199, true));
            return(GetSplitCategories(trees, 300.0, tlpParams.TreebankLanguagePack()));
        }