Exemplos de código com IGrammaticalStructureFactory em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

        /// <summary>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.
        /// </summary>
        /// <remarks>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.  Note that the trees are printed by calling pennPrint on
        /// the Tree object.  It is also possible to pass a PrintWriter to
        /// pennPrint if you want to capture the output.
        /// This code will work with any supported language.
        /// </remarks>
        public static void DemoDP(LexicalizedParser lp, string filename)
        {
            // This option shows loading, sentence-segmenting and tokenizing
            // a file using DocumentPreprocessor.
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // a PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = null;

            if (tlp.SupportsGrammaticalStructures())
            {
                gsf = tlp.GrammaticalStructureFactory();
            }
            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename))
            {
                Tree parse = lp.Apply(sentence);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                if (gsf != null)
                {
                    GrammaticalStructure gs  = gsf.NewGrammaticalStructure(parse);
                    ICollection          tdl = gs.TypedDependenciesCCprocessed();
                    System.Console.Out.WriteLine(tdl);
                    System.Console.Out.WriteLine();
                }
            }
        }

Exemplo n.º 2

0

Exibir arquivo

        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: SemgrexDemo.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        // just static main
        public static void Main(string[] args)
        {
            string treeString = "(ROOT  (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))";
            // Typically the tree is constructed by parsing or reading a
            // treebank.  This is just for example purposes
            Tree tree = Tree.ValueOf(treeString);
            // This creates English uncollapsed dependencies as a
            // SemanticGraph.  If you are creating many SemanticGraphs, you
            // should use a GrammaticalStructureFactory and use it to generate
            // the intermediate GrammaticalStructure instead
            SemanticGraph graph = SemanticGraphFactory.GenerateUncollapsedDependencies(tree);
            // Alternatively, this could have been the Chinese params or any
            // other language supported.  As of 2014, only English and Chinese
            ITreebankLangParserParams    @params = new EnglishTreebankParserParams();
            IGrammaticalStructureFactory gsf     = @params.TreebankLanguagePack().GrammaticalStructureFactory(@params.TreebankLanguagePack().PunctuationWordRejectFilter(), @params.TypedDependencyHeadFinder());
            GrammaticalStructure         gs      = gsf.NewGrammaticalStructure(tree);

            log.Info(graph);
            SemgrexPattern semgrex = SemgrexPattern.Compile("{}=A <<nsubj {}=B");
            SemgrexMatcher matcher = semgrex.Matcher(graph);

            // This will produce two results on the given tree: "likes" is an
            // ancestor of both "dog" and "my" via the nsubj relation
            while (matcher.Find())
            {
                log.Info(matcher.GetNode("A") + " <<nsubj " + matcher.GetNode("B"));
            }
        }

Exemplo n.º 4

0

Exibir arquivo

        public ParserAnnotator(string annotatorName, Properties props)
        {
            string model = props.GetProperty(annotatorName + ".model", LexicalizedParser.DefaultParserLoc);

            if (model == null)
            {
                throw new ArgumentException("No model specified for Parser annotator " + annotatorName);
            }
            this.Verbose = PropertiesUtils.GetBool(props, annotatorName + ".debug", false);
            string[] flags = ConvertFlagsToArray(props.GetProperty(annotatorName + ".flags"));
            this.parser            = LoadModel(model, Verbose, flags);
            this.maxSentenceLength = PropertiesUtils.GetInt(props, annotatorName + ".maxlen", -1);
            string treeMapClass = props.GetProperty(annotatorName + ".treemap");

            if (treeMapClass == null)
            {
                this.treeMap = null;
            }
            else
            {
                this.treeMap = ReflectionLoading.LoadByReflection(treeMapClass, props);
            }
            this.maxParseTime = PropertiesUtils.GetLong(props, annotatorName + ".maxtime", -1);
            this.kBest        = PropertiesUtils.GetInt(props, annotatorName + ".kbest", 1);
            this.keepPunct    = PropertiesUtils.GetBool(props, annotatorName + ".keepPunct", true);
            string buildGraphsProperty = annotatorName + ".buildgraphs";

            if (!this.parser.GetTLPParams().SupportsBasicDependencies())
            {
                if (PropertiesUtils.GetBool(props, buildGraphsProperty))
                {
                    log.Info("WARNING: " + buildGraphsProperty + " set to true, but " + this.parser.GetTLPParams().GetType() + " does not support dependencies");
                }
                this.BuildGraphs = false;
            }
            else
            {
                this.BuildGraphs = PropertiesUtils.GetBool(props, buildGraphsProperty, true);
            }
            if (this.BuildGraphs)
            {
                bool generateOriginalDependencies = PropertiesUtils.GetBool(props, annotatorName + ".originalDependencies", false);
                parser.GetTLPParams().SetGenerateOriginalDependencies(generateOriginalDependencies);
                ITreebankLanguagePack tlp         = parser.GetTLPParams().TreebankLanguagePack();
                IPredicate <string>   punctFilter = this.keepPunct ? Filters.AcceptFilter() : tlp.PunctuationWordRejectFilter();
                this.gsf = tlp.GrammaticalStructureFactory(punctFilter, parser.GetTLPParams().TypedDependencyHeadFinder());
            }
            else
            {
                this.gsf = null;
            }
            this.nThreads = PropertiesUtils.GetInt(props, annotatorName + ".nthreads", PropertiesUtils.GetInt(props, "nthreads", 1));
            bool usesBinary = StanfordCoreNLP.UsesBinaryTrees(props);

            this.saveBinaryTrees   = PropertiesUtils.GetBool(props, annotatorName + ".binaryTrees", usesBinary);
            this.noSquash          = PropertiesUtils.GetBool(props, annotatorName + ".nosquash", false);
            this.extraDependencies = MetaClass.Cast(props.GetProperty(annotatorName + ".extradependencies", "NONE"), typeof(GrammaticalStructure.Extras));
        }

Exemplo n.º 5

0

Exibir arquivo

        // static methods
        /// <summary>
        /// Put the tree in the CoreMap for the sentence, also add any
        /// dependency graphs to the sentence, and fill in missing tag annotations.
        /// </summary>
        /// <remarks>
        /// Put the tree in the CoreMap for the sentence, also add any
        /// dependency graphs to the sentence, and fill in missing tag annotations.
        /// Thread safety note: nothing special is done to ensure the thread
        /// safety of the GrammaticalStructureFactory.  However, both the
        /// EnglishGrammaticalStructureFactory and the
        /// ChineseGrammaticalStructureFactory are thread safe.
        /// </remarks>
        public static void FillInParseAnnotations(bool verbose, bool buildGraphs, IGrammaticalStructureFactory gsf, ICoreMap sentence, IList <Tree> trees, GrammaticalStructure.Extras extras)
        {
            bool first = true;

            foreach (Tree tree in trees)
            {
                // make sure all tree nodes are CoreLabels
                // TODO: why isn't this always true? something fishy is going on
                Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(tree);
                // index nodes, i.e., add start and end token positions to all nodes
                // this is needed by other annotators down stream, e.g., the NFLAnnotator
                tree.IndexSpans(0);
                if (first)
                {
                    sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree);
                    if (verbose)
                    {
                        log.Info("Tree is:");
                        tree.PennPrint(System.Console.Error);
                    }
                    SetMissingTags(sentence, tree);
                    if (buildGraphs)
                    {
                        // generate the dependency graph
                        // unfortunately, it is necessary to make the
                        // GrammaticalStructure three times, as the dependency
                        // conversion changes the given data structure
                        SemanticGraph deps                 = SemanticGraphFactory.GenerateCollapsedDependencies(gsf.NewGrammaticalStructure(tree), extras);
                        SemanticGraph uncollapsedDeps      = SemanticGraphFactory.GenerateUncollapsedDependencies(gsf.NewGrammaticalStructure(tree), extras);
                        SemanticGraph ccDeps               = SemanticGraphFactory.GenerateCCProcessedDependencies(gsf.NewGrammaticalStructure(tree), extras);
                        SemanticGraph enhancedDeps         = SemanticGraphFactory.GenerateEnhancedDependencies(gsf.NewGrammaticalStructure(tree));
                        SemanticGraph enhancedPlusPlusDeps = SemanticGraphFactory.GenerateEnhancedPlusPlusDependencies(gsf.NewGrammaticalStructure(tree));
                        if (verbose)
                        {
                            log.Info("SDs:");
                            log.Info(deps.ToString(SemanticGraph.OutputFormat.List));
                        }
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation), deps);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation), uncollapsedDeps);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation), ccDeps);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation), enhancedDeps);
                        sentence.Set(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation), enhancedPlusPlusDeps);
                    }
                    first = false;
                }
            }
            if (trees.Count > 1)
            {
                sentence.Set(typeof(TreeCoreAnnotations.KBestTreesAnnotation), trees);
            }
        }

Exemplo n.º 6

0

Exibir arquivo

 public ParserAnnotator(ParserGrammar parser, bool verbose, int maxSent, Func <Tree, Tree> treeMap)
 {
     this.Verbose           = verbose;
     this.BuildGraphs       = parser.GetTLPParams().SupportsBasicDependencies();
     this.parser            = parser;
     this.maxSentenceLength = maxSent;
     this.treeMap           = treeMap;
     this.maxParseTime      = 0;
     this.kBest             = 1;
     this.keepPunct         = true;
     if (this.BuildGraphs)
     {
         ITreebankLanguagePack tlp = parser.GetTLPParams().TreebankLanguagePack();
         this.gsf = tlp.GrammaticalStructureFactory(tlp.PunctuationWordRejectFilter(), parser.GetTLPParams().TypedDependencyHeadFinder());
     }
     else
     {
         this.gsf = null;
     }
     this.nThreads          = 1;
     this.saveBinaryTrees   = false;
     this.noSquash          = false;
     this.extraDependencies = GrammaticalStructure.Extras.None;
 }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ParserDemo2.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <summary>This example shows a few more ways of providing input to a parser.</summary>
        /// <remarks>
        /// This example shows a few more ways of providing input to a parser.
        /// Usage: ParserDemo2 [grammar [textFile]]
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string grammar = args.Length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";

            string[]                        options = new string[] { "-maxLength", "80", "-retainTmpSubcategories" };
            LexicalizedParser               lp      = ((LexicalizedParser)LexicalizedParser.LoadModel(grammar, options));
            ITreebankLanguagePack           tlp     = lp.GetOp().Langpack();
            IGrammaticalStructureFactory    gsf     = tlp.GrammaticalStructureFactory();
            IEnumerable <IList <IHasWord> > sentences;

            if (args.Length > 1)
            {
                DocumentPreprocessor      dp  = new DocumentPreprocessor(args[1]);
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                foreach (IList <IHasWord> sentence in dp)
                {
                    tmp.Add(sentence);
                }
                sentences = tmp;
            }
            else
            {
                // Showing tokenization and parsing in code a couple of different ways.
                string[]         sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
                IList <IHasWord> sentence = new List <IHasWord>();
                foreach (string word in sent)
                {
                    sentence.Add(new Word(word));
                }
                string sent2 = ("This is a slightly longer and more complex " + "sentence requiring tokenization.");
                // Use the default tokenizer for this TreebankLanguagePack
                ITokenizer <IHasWord> toke      = tlp.GetTokenizerFactory().GetTokenizer(new StringReader(sent2));
                IList <IHasWord>      sentence2 = toke.Tokenize();
                string[] sent3 = new string[] { "It", "can", "can", "it", "." };
                string[] tag3  = new string[] { "PRP", "MD", "VB", "PRP", "." };
                // Parser gets second "can" wrong without help
                IList <TaggedWord> sentence3 = new List <TaggedWord>();
                for (int i = 0; i < sent3.Length; i++)
                {
                    sentence3.Add(new TaggedWord(sent3[i], tag3[i]));
                }
                Tree parse = lp.Parse(sentence3);
                parse.PennPrint();
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                tmp.Add(sentence);
                tmp.Add(sentence2);
                tmp.Add(sentence3);
                sentences = tmp;
            }
            foreach (IList <IHasWord> sentence_1 in sentences)
            {
                Tree parse = lp.Parse(sentence_1);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                GrammaticalStructure    gs  = gsf.NewGrammaticalStructure(parse);
                IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed();
                System.Console.Out.WriteLine(tdl);
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("The words of the sentence:");
                foreach (ILabel lab in parse.Yield())
                {
                    if (lab is CoreLabel)
                    {
                        System.Console.Out.WriteLine(((CoreLabel)lab).ToString(CoreLabel.OutputFormat.ValueMap));
                    }
                    else
                    {
                        System.Console.Out.WriteLine(lab);
                    }
                }
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine(parse.TaggedYield());
                System.Console.Out.WriteLine();
            }
            // This method turns the String into a single sentence using the
            // default tokenizer for the TreebankLanguagePack.
            string sent3_1 = "This is one last test!";

            lp.Parse(sent3_1).PennPrint();
        }

Exemplos de IGrammaticalStructureFactory em C# (CSharp)