Ejemplos de LexicalizedParser.TreebankLanguagePack en C# (CSharp)

Lenguaje de programación: C# (CSharp)

Clase / Tipo: LexicalizedParser

Método / Función: TreebankLanguagePack

Ejemplos en hotexamples.com: 5

C# (CSharp) LexicalizedParser.TreebankLanguagePack - 5 ejemplos encontrados. Estos son los ejemplos en C# (CSharp) del mundo real mejor valorados de LexicalizedParser.TreebankLanguagePack extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

LoadModel(20)

GetOp(14)

loadModel(11)

apply(8)

TreebankLanguagePack(5)

SaveParserToSerialized(5)

ParserQuery(4)

BuildTrainTransformer(3)

Apply(3)

ParseTree(2)

TrainFromTreebank(2)

CopyLexicalizedParser(2)

LoadModelFromZip(1)

LexicalizedParserQuery(1)

GetTLPParams(1)

GetLexicon(1)

getLexicon(1)

Parse(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: PrintTagList.cs Proyecto: awesomedotnetcore/Stanford.CoreNLP.NET

        public static void Main(string[] args)
        {
            string parserFile = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    parserFile = args[argIndex + 1];
                    argIndex  += 2;
                }
                else
                {
                    string error = "Unknown argument " + args[argIndex];
                    log.Info(error);
                    throw new Exception(error);
                }
            }
            if (parserFile == null)
            {
                log.Info("Must specify a model file with -model");
                System.Environment.Exit(2);
            }
            LexicalizedParser    parser = ((LexicalizedParser)LexicalizedParser.LoadModel(parserFile));
            ICollection <string> tags   = Generics.NewTreeSet();

            foreach (string tag in parser.tagIndex)
            {
                tags.Add(parser.TreebankLanguagePack().BasicCategory(tag));
            }
            System.Console.Out.WriteLine("Basic tags: " + tags.Count);
            foreach (string tag_1 in tags)
            {
                System.Console.Out.Write("  " + tag_1);
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("All tags size: " + parser.tagIndex.Size());
            ICollection <string> states = Generics.NewTreeSet();

            foreach (string state in parser.stateIndex)
            {
                states.Add(parser.TreebankLanguagePack().BasicCategory(state));
            }
            System.Console.Out.WriteLine("Basic states: " + states.Count);
            foreach (string tag_2 in states)
            {
                System.Console.Out.Write("  " + tag_2);
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("All states size: " + parser.stateIndex.Size());
            System.Console.Out.WriteLine("Unary grammar size: " + parser.ug.NumRules());
            System.Console.Out.WriteLine("Binary grammar size: " + parser.bg.NumRules());
        }

Ejemplo n.º 2

Mostrar archivo

        /// <summary>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.
        /// </summary>
        /// <remarks>
        /// demoDP demonstrates turning a file into tokens and then parse
        /// trees.  Note that the trees are printed by calling pennPrint on
        /// the Tree object.  It is also possible to pass a PrintWriter to
        /// pennPrint if you want to capture the output.
        /// This code will work with any supported language.
        /// </remarks>
        public static void DemoDP(LexicalizedParser lp, string filename)
        {
            // This option shows loading, sentence-segmenting and tokenizing
            // a file using DocumentPreprocessor.
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // a PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = null;

            if (tlp.SupportsGrammaticalStructures())
            {
                gsf = tlp.GrammaticalStructureFactory();
            }
            // You could also create a tokenizer here (as below) and pass it
            // to DocumentPreprocessor
            foreach (IList <IHasWord> sentence in new DocumentPreprocessor(filename))
            {
                Tree parse = lp.Apply(sentence);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                if (gsf != null)
                {
                    GrammaticalStructure gs  = gsf.NewGrammaticalStructure(parse);
                    ICollection          tdl = gs.TypedDependenciesCCprocessed();
                    System.Console.Out.WriteLine(tdl);
                    System.Console.Out.WriteLine();
                }
            }
        }

Ejemplo n.º 3

Mostrar archivo

        /// <summary>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.
        /// </summary>
        /// <remarks>
        /// demoAPI demonstrates other ways of calling the parser with
        /// already tokenized text, or in some cases, raw text that needs to
        /// be tokenized as a single sentence.  Output is handled with a
        /// TreePrint object.  Note that the options used when creating the
        /// TreePrint can determine what results to print out.  Once again,
        /// one can capture the output by passing a PrintWriter to
        /// TreePrint.printTree. This code is for English.
        /// </remarks>
        public static void DemoAPI(LexicalizedParser lp)
        {
            // This option shows parsing a list of correctly tokenized words
            string[]          sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
            IList <CoreLabel> rawWords = SentenceUtils.ToCoreLabelList(sent);
            Tree parse = lp.Apply(rawWords);

            parse.PennPrint();
            System.Console.Out.WriteLine();
            // This option shows loading and using an explicit tokenizer
            string sent2 = "This is another sentence.";
            ITokenizerFactory <CoreLabel> tokenizerFactory = PTBTokenizer.Factory(new CoreLabelTokenFactory(), string.Empty);
            ITokenizer <CoreLabel>        tok       = tokenizerFactory.GetTokenizer(new StringReader(sent2));
            IList <CoreLabel>             rawWords2 = tok.Tokenize();

            parse = lp.Apply(rawWords2);
            ITreebankLanguagePack tlp = lp.TreebankLanguagePack();
            // PennTreebankLanguagePack for English
            IGrammaticalStructureFactory gsf = tlp.GrammaticalStructureFactory();
            GrammaticalStructure         gs  = gsf.NewGrammaticalStructure(parse);
            IList <TypedDependency>      tdl = gs.TypedDependenciesCCprocessed();

            System.Console.Out.WriteLine(tdl);
            System.Console.Out.WriteLine();
            // You can also use a TreePrint object to print trees and dependencies
            TreePrint tp = new TreePrint("penn,typedDependenciesCollapsed");

            tp.PrintTree(parse);
        }

Ejemplo n.º 4

Mostrar archivo

 public CacheParseHypotheses(LexicalizedParser parser)
 {
     treeBasicCategories = new BasicCategoryTreeTransformer(parser.TreebankLanguagePack());
     treeFilter          = new FilterConfusingRules(parser);
 }

Ejemplo n.º 5

Mostrar archivo

Archivo: BuildBinarizedDataset.cs Proyecto: zerouid/Stanford.CoreNLP.NET

        /// <summary>
        /// Turns a text file into trees for use in a RNTN classifier such as
        /// the treebank used in the Sentiment project.
        /// </summary>
        /// <remarks>
        /// Turns a text file into trees for use in a RNTN classifier such as
        /// the treebank used in the Sentiment project.
        /// <br />
        /// The expected input file is one sentence per line, with sentences
        /// separated by blank lines. The first line has the main label of the sentence together with the full sentence.
        /// Lines after the first sentence line but before
        /// the blank line will be treated as labeled sub-phrases.  The
        /// labels should start with the label and then contain a list of
        /// tokens the label applies to. All phrases that do not have their own label will take on the main sentence label!
        /// For example:
        /// <br />
        /// <code>
        /// 1 Today is not a good day.<br />
        /// 3 good<br />
        /// 3 good day <br />
        /// 3 a good day <br />
        /// <br />
        /// (next block starts here) <br />
        /// </code>
        /// By default the englishPCFG parser is used.  This can be changed
        /// with the
        /// <c>-parserModel</c>
        /// flag.  Specify an input file
        /// with
        /// <c>-input</c>
        /// .
        /// <br />
        /// If a sentiment model is provided with -sentimentModel, that model
        /// will be used to prelabel the sentences.  Any spans with given
        /// labels will then be used to adjust those labels.
        /// </remarks>
        public static void Main(string[] args)
        {
            CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();
            string         parserModel           = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
            string         inputPath             = null;
            string         sentimentModelPath    = null;
            SentimentModel sentimentModel        = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                {
                    inputPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parserModel"))
                    {
                        parserModel = args[argIndex + 1];
                        argIndex   += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-sentimentModel"))
                        {
                            sentimentModelPath = args[argIndex + 1];
                            argIndex          += 2;
                        }
                        else
                        {
                            log.Info("Unknown argument " + args[argIndex]);
                            System.Environment.Exit(2);
                        }
                    }
                }
            }
            if (inputPath == null)
            {
                throw new ArgumentException("Must specify input file with -input");
            }
            LexicalizedParser parser    = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel));
            TreeBinarizer     binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack());

            if (sentimentModelPath != null)
            {
                sentimentModel = SentimentModel.LoadSerialized(sentimentModelPath);
            }
            string text = IOUtils.SlurpFileNoExceptions(inputPath);

            string[] chunks = text.Split("\\n\\s*\\n+");
            // need blank line to make a new chunk
            foreach (string chunk in chunks)
            {
                if (chunk.Trim().IsEmpty())
                {
                    continue;
                }
                // The expected format is that line 0 will be the text of the
                // sentence, and each subsequence line, if any, will be a value
                // followed by the sequence of tokens that get that value.
                // Here we take the first line and tokenize it as one sentence.
                string[]             lines    = chunk.Trim().Split("\\n");
                string               sentence = lines[0];
                StringReader         sin      = new StringReader(sentence);
                DocumentPreprocessor document = new DocumentPreprocessor(sin);
                document.SetSentenceFinalPuncWords(new string[] { "\n" });
                IList <IHasWord> tokens = document.GetEnumerator().Current;
                int mainLabel           = System.Convert.ToInt32(tokens[0].Word());
                //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
                tokens = tokens.SubList(1, tokens.Count);
                //log.info(tokens);
                IDictionary <Pair <int, int>, string> spanToLabels = Generics.NewHashMap();
                for (int i = 1; i < lines.Length; ++i)
                {
                    ExtractLabels(spanToLabels, tokens, lines[i]);
                }
                // TODO: add an option which treats the spans as constraints when parsing
                Tree tree           = parser.Apply(tokens);
                Tree binarized      = binarizer.TransformTree(tree);
                Tree collapsedUnary = transformer.TransformTree(binarized);
                // if there is a sentiment model for use in prelabeling, we
                // label here and then use the user given labels to adjust
                if (sentimentModel != null)
                {
                    Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(collapsedUnary);
                    SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null);
                    scorer.ForwardPropagateTree(collapsedUnary);
                    SetPredictedLabels(collapsedUnary);
                }
                else
                {
                    SetUnknownLabels(collapsedUnary, mainLabel);
                }
                Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(collapsedUnary);
                collapsedUnary.IndexSpans();
                foreach (KeyValuePair <Pair <int, int>, string> pairStringEntry in spanToLabels)
                {
                    SetSpanLabel(collapsedUnary, pairStringEntry.Key, pairStringEntry.Value);
                }
                System.Console.Out.WriteLine(collapsedUnary);
            }
        }