Exemplos de código com LexicalizedParser.LoadModel em C# (CSharp)

Exemplo n.º 1

0

Exibir arquivo

        public static void Main(string[] args)
        {
            string         input     = null;
            string         output    = null;
            IList <string> extraArgs = Generics.NewArrayList();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                {
                    input     = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                    {
                        output    = args[argIndex + 1];
                        argIndex += 2;
                    }
                    else
                    {
                        extraArgs.Add(args[argIndex++]);
                    }
                }
            }
            LexicalizedParser parser = LexicalizedParser.LoadModel(input, extraArgs);

            parser.SaveParserToSerialized(output);
        }

Exemplo n.º 2

0

Exibir arquivo

Arquivo: PrintTagList.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        public static void Main(string[] args)
        {
            string parserFile = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    parserFile = args[argIndex + 1];
                    argIndex  += 2;
                }
                else
                {
                    string error = "Unknown argument " + args[argIndex];
                    log.Info(error);
                    throw new Exception(error);
                }
            }
            if (parserFile == null)
            {
                log.Info("Must specify a model file with -model");
                System.Environment.Exit(2);
            }
            LexicalizedParser    parser = ((LexicalizedParser)LexicalizedParser.LoadModel(parserFile));
            ICollection <string> tags   = Generics.NewTreeSet();

            foreach (string tag in parser.tagIndex)
            {
                tags.Add(parser.TreebankLanguagePack().BasicCategory(tag));
            }
            System.Console.Out.WriteLine("Basic tags: " + tags.Count);
            foreach (string tag_1 in tags)
            {
                System.Console.Out.Write("  " + tag_1);
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("All tags size: " + parser.tagIndex.Size());
            ICollection <string> states = Generics.NewTreeSet();

            foreach (string state in parser.stateIndex)
            {
                states.Add(parser.TreebankLanguagePack().BasicCategory(state));
            }
            System.Console.Out.WriteLine("Basic states: " + states.Count);
            foreach (string tag_2 in states)
            {
                System.Console.Out.Write("  " + tag_2);
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine("All states size: " + parser.stateIndex.Size());
            System.Console.Out.WriteLine("Unary grammar size: " + parser.ug.NumRules());
            System.Console.Out.WriteLine("Binary grammar size: " + parser.bg.NumRules());
        }

Exemplo n.º 3

0

Exibir arquivo

Arquivo: AddTaggerToParser.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            string taggerFile = null;
            string inputFile  = null;
            string outputFile = null;
            double weight     = 1.0;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-tagger"))
                {
                    taggerFile = args[argIndex + 1];
                    argIndex  += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                    {
                        inputFile = args[argIndex + 1];
                        argIndex += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                        {
                            outputFile = args[argIndex + 1];
                            argIndex  += 2;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-weight"))
                            {
                                weight    = double.ValueOf(args[argIndex + 1]);
                                argIndex += 2;
                            }
                            else
                            {
                                throw new ArgumentException("Unknown argument: " + args[argIndex]);
                            }
                        }
                    }
                }
            }
            LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(inputFile));
            MaxentTagger      tagger = new MaxentTagger(taggerFile);

            parser.reranker = new TaggerReranker(tagger, parser.GetOp());
            parser.SaveParserToSerialized(outputFile);
        }

Exemplo n.º 4

0

Exibir arquivo

        public virtual void RunTest(string[] args)
        {
            // get a parser from file
            LexicalizedParser pd = ((LexicalizedParser)LexicalizedParser.LoadModel(args[0]));

            op = pd.GetOp();
            // in case a serialized options was read in
            Treebank testTreebank = op.tlpParams.MemoryTreebank();
            int      testlow      = System.Convert.ToInt32(args[2]);
            int      testhigh     = System.Convert.ToInt32(args[3]);

            testTreebank.LoadPath(args[1], new NumberRangeFileFilter(testlow, testhigh, true));
            op.SetOptionsOrWarn(args, 4, args.Length);
            TestOnTreebank(pd, new EnglishTreebankParserParams(), testTreebank, args[1], pd.stateIndex);
        }

Exemplo n.º 5

0

Exibir arquivo

        /// <summary>The main method demonstrates the easiest way to load a parser.</summary>
        /// <remarks>
        /// The main method demonstrates the easiest way to load a parser.
        /// Simply call loadModel and specify the path of a serialized grammar
        /// model, which can be a file, a resource on the classpath, or even a URL.
        /// For example, this demonstrates loading a grammar from the models jar
        /// file, which you therefore need to include on the classpath for ParserDemo
        /// to work.
        /// Usage:
        /// <c>java ParserDemo [[model] textFile]</c>
        /// e.g.: java ParserDemo edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz data/chinese-onesent-utf8.txt
        /// </remarks>
        public static void Main(string[] args)
        {
            string parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";

            if (args.Length > 0)
            {
                parserModel = args[0];
            }
            LexicalizedParser lp = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel));

            if (args.Length == 0)
            {
                DemoAPI(lp);
            }
            else
            {
                string textFile = (args.Length > 1) ? args[1] : args[0];
                DemoDP(lp, textFile);
            }
        }

Exemplo n.º 6

0

Exibir arquivo

Arquivo: ParserPanel.cs Projeto: zerouid/Stanford.CoreNLP.NET

 public override void Run()
 {
     try
     {
         if (this.zipFilename != null)
         {
             this._enclosing.parser = LexicalizedParser.LoadModelFromZip(this.zipFilename, this.filename);
         }
         else
         {
             this._enclosing.parser = ((LexicalizedParser)LexicalizedParser.LoadModel(this.filename));
         }
     }
     catch (Exception)
     {
         JOptionPane.ShowMessageDialog(this._enclosing, "Error loading parser: " + this.filename, null, JOptionPane.ErrorMessage);
         this._enclosing.SetStatus("Error loading parser");
         this._enclosing.parser = null;
     }
     catch (OutOfMemoryException)
     {
         JOptionPane.ShowMessageDialog(this._enclosing, "Could not load parser. Out of memory.", null, JOptionPane.ErrorMessage);
         this._enclosing.SetStatus("Error loading parser");
         this._enclosing.parser = null;
     }
     this._enclosing.StopProgressMonitor();
     if (this._enclosing.parser != null)
     {
         this._enclosing.SetStatus("Loaded parser.");
         this._enclosing.parserFileLabel.SetText("Parser: " + this.filename);
         this._enclosing.parseButton.SetEnabled(true);
         this._enclosing.parseNextButton.SetEnabled(true);
         this._enclosing.saveOutputButton.SetEnabled(true);
         ParserPanel.tlp          = this._enclosing.parser.GetOp().Langpack();
         this._enclosing.encoding = ParserPanel.tlp.GetEncoding();
     }
 }

Exemplo n.º 7

0

Exibir arquivo

Arquivo: ParserDemo2.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <summary>This example shows a few more ways of providing input to a parser.</summary>
        /// <remarks>
        /// This example shows a few more ways of providing input to a parser.
        /// Usage: ParserDemo2 [grammar [textFile]]
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string grammar = args.Length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";

            string[]                        options = new string[] { "-maxLength", "80", "-retainTmpSubcategories" };
            LexicalizedParser               lp      = ((LexicalizedParser)LexicalizedParser.LoadModel(grammar, options));
            ITreebankLanguagePack           tlp     = lp.GetOp().Langpack();
            IGrammaticalStructureFactory    gsf     = tlp.GrammaticalStructureFactory();
            IEnumerable <IList <IHasWord> > sentences;

            if (args.Length > 1)
            {
                DocumentPreprocessor      dp  = new DocumentPreprocessor(args[1]);
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                foreach (IList <IHasWord> sentence in dp)
                {
                    tmp.Add(sentence);
                }
                sentences = tmp;
            }
            else
            {
                // Showing tokenization and parsing in code a couple of different ways.
                string[]         sent     = new string[] { "This", "is", "an", "easy", "sentence", "." };
                IList <IHasWord> sentence = new List <IHasWord>();
                foreach (string word in sent)
                {
                    sentence.Add(new Word(word));
                }
                string sent2 = ("This is a slightly longer and more complex " + "sentence requiring tokenization.");
                // Use the default tokenizer for this TreebankLanguagePack
                ITokenizer <IHasWord> toke      = tlp.GetTokenizerFactory().GetTokenizer(new StringReader(sent2));
                IList <IHasWord>      sentence2 = toke.Tokenize();
                string[] sent3 = new string[] { "It", "can", "can", "it", "." };
                string[] tag3  = new string[] { "PRP", "MD", "VB", "PRP", "." };
                // Parser gets second "can" wrong without help
                IList <TaggedWord> sentence3 = new List <TaggedWord>();
                for (int i = 0; i < sent3.Length; i++)
                {
                    sentence3.Add(new TaggedWord(sent3[i], tag3[i]));
                }
                Tree parse = lp.Parse(sentence3);
                parse.PennPrint();
                IList <IList <IHasWord> > tmp = new List <IList <IHasWord> >();
                tmp.Add(sentence);
                tmp.Add(sentence2);
                tmp.Add(sentence3);
                sentences = tmp;
            }
            foreach (IList <IHasWord> sentence_1 in sentences)
            {
                Tree parse = lp.Parse(sentence_1);
                parse.PennPrint();
                System.Console.Out.WriteLine();
                GrammaticalStructure    gs  = gsf.NewGrammaticalStructure(parse);
                IList <TypedDependency> tdl = gs.TypedDependenciesCCprocessed();
                System.Console.Out.WriteLine(tdl);
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine("The words of the sentence:");
                foreach (ILabel lab in parse.Yield())
                {
                    if (lab is CoreLabel)
                    {
                        System.Console.Out.WriteLine(((CoreLabel)lab).ToString(CoreLabel.OutputFormat.ValueMap));
                    }
                    else
                    {
                        System.Console.Out.WriteLine(lab);
                    }
                }
                System.Console.Out.WriteLine();
                System.Console.Out.WriteLine(parse.TaggedYield());
                System.Console.Out.WriteLine();
            }
            // This method turns the String into a single sentence using the
            // default tokenizer for the TreebankLanguagePack.
            string sent3_1 = "This is one last test!";

            lp.Parse(sent3_1).PennPrint();
        }

Exemplo n.º 8

0

Exibir arquivo

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            string         dvmodelFile        = null;
            string         lexparserFile      = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = new List <string>();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-lexparser"))
                {
                    lexparserFile = args[argIndex + 1];
                    argIndex     += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        unusedArgs.Add(args[argIndex++]);
                    }
                }
            }
            log.Info("Loading lexparser from: " + lexparserFile);
            string[]          newArgs   = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser lexparser = ((LexicalizedParser)LexicalizedParser.LoadModel(lexparserFile, newArgs));

            log.Info("... done");
            Treebank testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = lexparser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
            }
            double[] labelResults = new double[weights.Length];
            double[] tagResults   = new double[weights.Length];
            for (int i = 0; i < weights.Length; ++i)
            {
                lexparser.GetOp().baseParserWeight = weights[i];
                EvaluateTreebank evaluator         = new EvaluateTreebank(lexparser);
                evaluator.TestOnTreebank(testTreebank);
                labelResults[i] = evaluator.GetLBScore();
                tagResults[i]   = evaluator.GetTagScore();
            }
            for (int i_1 = 0; i_1 < weights.Length; ++i_1)
            {
                log.Info("LexicalizedParser weight " + weights[i_1] + ": labeled " + labelResults[i_1] + " tag " + tagResults[i_1]);
            }
        }

Exemplo n.º 9

0

Exibir arquivo

        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            bool   mergeType   = Usesum;
            bool   prettyPrint = true;
            bool   debug       = false;
            string parseGram   = null;
            string filename    = args[0];

            for (int i = 1; i < args.Length; i++)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-debug"))
                {
                    debug = true;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-useMax"))
                    {
                        mergeType = Usemax;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-useSum"))
                        {
                            mergeType = Usesum;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-noPrettyPrint"))
                            {
                                prettyPrint = false;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-parser"))
                                {
                                    parseGram = args[++i];
                                }
                                else
                                {
                                    log.Info("unrecognized flag: " + args[i]);
                                    log.Info("usage: java LatticeReader <file> [ -debug ] [ -useMax ] [ -useSum ] [ -noPrettyPrint ] [ -parser parserFile ]");
                                    System.Environment.Exit(0);
                                }
                            }
                        }
                    }
                }
            }
            Edu.Stanford.Nlp.Parser.Lexparser.HTKLatticeReader lr = new Edu.Stanford.Nlp.Parser.Lexparser.HTKLatticeReader(filename, mergeType, debug, prettyPrint);
            if (parseGram != null)
            {
                Options op = new Options();
                // TODO: these options all get clobbered by the Options object
                // stored in the LexicalizedParser (unless it's a text file?)
                op.doDep = false;
                op.testOptions.maxLength      = 80;
                op.testOptions.maxSpanForTags = 80;
                LexicalizedParser lp = LexicalizedParser.LoadModel(parseGram, op);
                // TODO: somehow merge this into ParserQuery instead of being
                // LexicalizedParserQuery specific
                LexicalizedParserQuery pq = lp.LexicalizedParserQuery();
                pq.Parse(lr);
                Tree t = pq.GetBestParse();
                t.PennPrint();
            }
        }

Exemplo n.º 10

0

Exibir arquivo

Arquivo: BuildBinarizedDataset.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <summary>
        /// Turns a text file into trees for use in a RNTN classifier such as
        /// the treebank used in the Sentiment project.
        /// </summary>
        /// <remarks>
        /// Turns a text file into trees for use in a RNTN classifier such as
        /// the treebank used in the Sentiment project.
        /// <br />
        /// The expected input file is one sentence per line, with sentences
        /// separated by blank lines. The first line has the main label of the sentence together with the full sentence.
        /// Lines after the first sentence line but before
        /// the blank line will be treated as labeled sub-phrases.  The
        /// labels should start with the label and then contain a list of
        /// tokens the label applies to. All phrases that do not have their own label will take on the main sentence label!
        /// For example:
        /// <br />
        /// <code>
        /// 1 Today is not a good day.<br />
        /// 3 good<br />
        /// 3 good day <br />
        /// 3 a good day <br />
        /// <br />
        /// (next block starts here) <br />
        /// </code>
        /// By default the englishPCFG parser is used.  This can be changed
        /// with the
        /// <c>-parserModel</c>
        /// flag.  Specify an input file
        /// with
        /// <c>-input</c>
        /// .
        /// <br />
        /// If a sentiment model is provided with -sentimentModel, that model
        /// will be used to prelabel the sentences.  Any spans with given
        /// labels will then be used to adjust those labels.
        /// </remarks>
        public static void Main(string[] args)
        {
            CollapseUnaryTransformer transformer = new CollapseUnaryTransformer();
            string         parserModel           = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
            string         inputPath             = null;
            string         sentimentModelPath    = null;
            SentimentModel sentimentModel        = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                {
                    inputPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parserModel"))
                    {
                        parserModel = args[argIndex + 1];
                        argIndex   += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-sentimentModel"))
                        {
                            sentimentModelPath = args[argIndex + 1];
                            argIndex          += 2;
                        }
                        else
                        {
                            log.Info("Unknown argument " + args[argIndex]);
                            System.Environment.Exit(2);
                        }
                    }
                }
            }
            if (inputPath == null)
            {
                throw new ArgumentException("Must specify input file with -input");
            }
            LexicalizedParser parser    = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel));
            TreeBinarizer     binarizer = TreeBinarizer.SimpleTreeBinarizer(parser.GetTLPParams().HeadFinder(), parser.TreebankLanguagePack());

            if (sentimentModelPath != null)
            {
                sentimentModel = SentimentModel.LoadSerialized(sentimentModelPath);
            }
            string text = IOUtils.SlurpFileNoExceptions(inputPath);

            string[] chunks = text.Split("\\n\\s*\\n+");
            // need blank line to make a new chunk
            foreach (string chunk in chunks)
            {
                if (chunk.Trim().IsEmpty())
                {
                    continue;
                }
                // The expected format is that line 0 will be the text of the
                // sentence, and each subsequence line, if any, will be a value
                // followed by the sequence of tokens that get that value.
                // Here we take the first line and tokenize it as one sentence.
                string[]             lines    = chunk.Trim().Split("\\n");
                string               sentence = lines[0];
                StringReader         sin      = new StringReader(sentence);
                DocumentPreprocessor document = new DocumentPreprocessor(sin);
                document.SetSentenceFinalPuncWords(new string[] { "\n" });
                IList <IHasWord> tokens = document.GetEnumerator().Current;
                int mainLabel           = System.Convert.ToInt32(tokens[0].Word());
                //System.out.print("Main Sentence Label: " + mainLabel.toString() + "; ");
                tokens = tokens.SubList(1, tokens.Count);
                //log.info(tokens);
                IDictionary <Pair <int, int>, string> spanToLabels = Generics.NewHashMap();
                for (int i = 1; i < lines.Length; ++i)
                {
                    ExtractLabels(spanToLabels, tokens, lines[i]);
                }
                // TODO: add an option which treats the spans as constraints when parsing
                Tree tree           = parser.Apply(tokens);
                Tree binarized      = binarizer.TransformTree(tree);
                Tree collapsedUnary = transformer.TransformTree(binarized);
                // if there is a sentiment model for use in prelabeling, we
                // label here and then use the user given labels to adjust
                if (sentimentModel != null)
                {
                    Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(collapsedUnary);
                    SentimentCostAndGradient scorer = new SentimentCostAndGradient(sentimentModel, null);
                    scorer.ForwardPropagateTree(collapsedUnary);
                    SetPredictedLabels(collapsedUnary);
                }
                else
                {
                    SetUnknownLabels(collapsedUnary, mainLabel);
                }
                Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(collapsedUnary);
                collapsedUnary.IndexSpans();
                foreach (KeyValuePair <Pair <int, int>, string> pairStringEntry in spanToLabels)
                {
                    SetSpanLabel(collapsedUnary, pairStringEntry.Key, pairStringEntry.Value);
                }
                System.Console.Out.WriteLine(collapsedUnary);
            }
        }

Exemplo n.º 11

0

Exibir arquivo

Arquivo: ParseAndPrintMatrices.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            string         outputPath         = null;
            string         inputPath          = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = Generics.NewArrayList();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                    {
                        outputPath = args[argIndex + 1];
                        argIndex  += 2;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                        {
                            inputPath = args[argIndex + 1];
                            argIndex += 2;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                            {
                                Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                                argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                                testTreebankPath   = treebankDescription.First();
                                testTreebankFilter = treebankDescription.Second();
                            }
                            else
                            {
                                unusedArgs.Add(args[argIndex++]);
                            }
                        }
                    }
                }
            }
            string[]          newArgs = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser parser  = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
            DVModel           model   = DVParser.GetModelFromLexicalizedParser(parser);
            File outputFile           = new File(outputPath);

            FileSystem.CheckNotExistsOrFail(outputFile);
            FileSystem.MkdirOrFail(outputFile);
            int count = 0;

            if (inputPath != null)
            {
                Reader input = new BufferedReader(new FileReader(inputPath));
                DocumentPreprocessor processor = new DocumentPreprocessor(input);
                foreach (IList <IHasWord> sentence in processor)
                {
                    count++;
                    // index from 1
                    IParserQuery pq = parser.ParserQuery();
                    if (!(pq is RerankingParserQuery))
                    {
                        throw new ArgumentException("Expected a RerankingParserQuery");
                    }
                    RerankingParserQuery rpq = (RerankingParserQuery)pq;
                    if (!rpq.Parse(sentence))
                    {
                        throw new Exception("Unparsable sentence: " + sentence);
                    }
                    IRerankerQuery reranker = rpq.RerankerQuery();
                    if (!(reranker is DVModelReranker.Query))
                    {
                        throw new ArgumentException("Expected a DVModelReranker");
                    }
                    DeepTree deepTree = ((DVModelReranker.Query)reranker).GetDeepTrees()[0];
                    IdentityHashMap <Tree, SimpleMatrix> vectors = deepTree.GetVectors();
                    foreach (KeyValuePair <Tree, SimpleMatrix> entry in vectors)
                    {
                        log.Info(entry.Key + "   " + entry.Value);
                    }
                    FileWriter     fout = new FileWriter(outputPath + File.separator + "sentence" + count + ".txt");
                    BufferedWriter bout = new BufferedWriter(fout);
                    bout.Write(SentenceUtils.ListToString(sentence));
                    bout.NewLine();
                    bout.Write(deepTree.GetTree().ToString());
                    bout.NewLine();
                    foreach (IHasWord word in sentence)
                    {
                        OutputMatrix(bout, model.GetWordVector(word.Word()));
                    }
                    Tree rootTree = FindRootTree(vectors);
                    OutputTreeMatrices(bout, rootTree, vectors);
                    bout.Flush();
                    fout.Close();
                }
            }
        }

Exemplo n.º 12

0

Exibir arquivo

        /// <summary>
        /// Command line arguments for this program:
        /// <br />
        /// -output: the model file to output
        /// -input: a list of model files to input
        /// </summary>
        public static void Main(string[] args)
        {
            string         outputModelFilename = null;
            IList <string> inputModelFilenames = Generics.NewArrayList();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                {
                    outputModelFilename = args[argIndex + 1];
                    argIndex           += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input"))
                    {
                        for (++argIndex; argIndex < args.Length && !args[argIndex].StartsWith("-"); ++argIndex)
                        {
                            Sharpen.Collections.AddAll(inputModelFilenames, Arrays.AsList(args[argIndex].Split(",")));
                        }
                    }
                    else
                    {
                        throw new Exception("Unknown argument " + args[argIndex]);
                    }
                }
            }
            if (outputModelFilename == null)
            {
                log.Info("Need to specify output model name with -output");
                System.Environment.Exit(2);
            }
            if (inputModelFilenames.Count == 0)
            {
                log.Info("Need to specify input model names with -input");
                System.Environment.Exit(2);
            }
            log.Info("Averaging " + inputModelFilenames);
            log.Info("Outputting result to " + outputModelFilename);
            LexicalizedParser lexparser = null;
            IList <DVModel>   models    = Generics.NewArrayList();

            foreach (string filename in inputModelFilenames)
            {
                LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(filename));
                if (lexparser == null)
                {
                    lexparser = parser;
                }
                models.Add(DVParser.GetModelFromLexicalizedParser(parser));
            }
            IList <TwoDimensionalMap <string, string, SimpleMatrix> > binaryTransformMaps = CollectionUtils.TransformAsList(models, null);
            IList <TwoDimensionalMap <string, string, SimpleMatrix> > binaryScoreMaps     = CollectionUtils.TransformAsList(models, null);
            IList <IDictionary <string, SimpleMatrix> >      unaryTransformMaps           = CollectionUtils.TransformAsList(models, null);
            IList <IDictionary <string, SimpleMatrix> >      unaryScoreMaps          = CollectionUtils.TransformAsList(models, null);
            IList <IDictionary <string, SimpleMatrix> >      wordMaps                = CollectionUtils.TransformAsList(models, null);
            TwoDimensionalMap <string, string, SimpleMatrix> binaryTransformAverages = AverageBinaryMatrices(binaryTransformMaps);
            TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreAverages     = AverageBinaryMatrices(binaryScoreMaps);
            IDictionary <string, SimpleMatrix> unaryTransformAverages                = AverageUnaryMatrices(unaryTransformMaps);
            IDictionary <string, SimpleMatrix> unaryScoreAverages = AverageUnaryMatrices(unaryScoreMaps);
            IDictionary <string, SimpleMatrix> wordAverages       = AverageUnaryMatrices(wordMaps);
            DVModel  newModel  = new DVModel(binaryTransformAverages, unaryTransformAverages, binaryScoreAverages, unaryScoreAverages, wordAverages, lexparser.GetOp());
            DVParser newParser = new DVParser(newModel, lexparser);

            newParser.SaveModel(outputModelFilename);
        }

Exemplo n.º 13

0

Exibir arquivo

Arquivo: SemanticGraphPrinter.cs Projeto: zerouid/Stanford.CoreNLP.NET

        // main method only
        public static void Main(string[] args)
        {
            Treebank   tb           = new MemoryTreebank();
            Properties props        = StringUtils.ArgsToProperties(args);
            string     treeFileName = props.GetProperty("treeFile");
            string     sentFileName = props.GetProperty("sentFile");
            string     testGraph    = props.GetProperty("testGraph");

            if (testGraph == null)
            {
                testGraph = "false";
            }
            string load = props.GetProperty("load");
            string save = props.GetProperty("save");

            if (load != null)
            {
                log.Info("Load not implemented!");
                return;
            }
            if (sentFileName == null && treeFileName == null)
            {
                log.Info("Usage: java SemanticGraph [-sentFile file|-treeFile file] [-testGraph]");
                Tree t = Tree.ValueOf("(ROOT (S (NP (NP (DT An) (NN attempt)) (PP (IN on) (NP (NP (NNP Andres) (NNP Pastrana) (POS 's)) (NN life)))) (VP (VBD was) (VP (VBN carried) (PP (IN out) (S (VP (VBG using) (NP (DT a) (JJ powerful) (NN bomb))))))) (. .)))"
                                      );
                tb.Add(t);
            }
            else
            {
                if (treeFileName != null)
                {
                    tb.LoadPath(treeFileName);
                }
                else
                {
                    string[]          options = new string[] { "-retainNPTmpSubcategories" };
                    LexicalizedParser lp      = ((LexicalizedParser)LexicalizedParser.LoadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options));
                    BufferedReader    reader  = null;
                    try
                    {
                        reader = IOUtils.ReaderFromString(sentFileName);
                    }
                    catch (IOException e)
                    {
                        throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
                    }
                    try
                    {
                        System.Console.Out.WriteLine("Processing sentence file " + sentFileName);
                        for (string line; (line = reader.ReadLine()) != null;)
                        {
                            System.Console.Out.WriteLine("Processing sentence: " + line);
                            PTBTokenizer <Word> ptb   = PTBTokenizer.NewPTBTokenizer(new StringReader(line));
                            IList <Word>        words = ptb.Tokenize();
                            Tree parseTree            = lp.ParseTree(words);
                            tb.Add(parseTree);
                        }
                        reader.Close();
                    }
                    catch (Exception e)
                    {
                        throw new Exception("Exception reading key file " + sentFileName, e);
                    }
                }
            }
            foreach (Tree t_1 in tb)
            {
                SemanticGraph sg = SemanticGraphFactory.GenerateUncollapsedDependencies(t_1);
                System.Console.Out.WriteLine(sg.ToString());
                System.Console.Out.WriteLine(sg.ToCompactString());
                if (testGraph.Equals("true"))
                {
                    SemanticGraph g1 = SemanticGraphFactory.GenerateCollapsedDependencies(t_1);
                    System.Console.Out.WriteLine("TEST SEMANTIC GRAPH - graph ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString());
                    System.Console.Out.WriteLine("readable ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString(SemanticGraph.OutputFormat.Readable));
                    System.Console.Out.WriteLine("List of dependencies ----------------------------");
                    System.Console.Out.WriteLine(g1.ToList());
                    System.Console.Out.WriteLine("xml ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString(SemanticGraph.OutputFormat.Xml));
                    System.Console.Out.WriteLine("dot ----------------------------");
                    System.Console.Out.WriteLine(g1.ToDotFormat());
                    System.Console.Out.WriteLine("dot (simple) ----------------------------");
                    System.Console.Out.WriteLine(g1.ToDotFormat("Simple", CoreLabel.OutputFormat.Value));
                }
            }
            // System.out.println(" graph ----------------------------");
            // System.out.println(t.allTypedDependenciesCCProcessed(false));
            if (save != null)
            {
                log.Info("Save not implemented!");
            }
        }

Exemplo n.º 14

0

Exibir arquivo

Arquivo: CharacterLevelTagExtender.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <summary>for testing -- CURRENTLY BROKEN!!!</summary>
        /// <param name="args">input dir and output filename</param>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                throw new Exception("args: treebankPath trainNums testNums");
            }
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            ctpp.charTags = true;
            // TODO: these options are getting clobbered by reading in the
            // parser object (unless it's a text file parser?)
            Options op = new Options(ctpp);

            op.doDep = false;
            op.testOptions.maxLength = 90;
            LexicalizedParser lp;

            try
            {
                IFileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
                lp = LexicalizedParser.TrainFromTreebank(args[0], trainFilt, op);
                try
                {
                    string filename = "chineseCharTagPCFG.ser.gz";
                    log.Info("Writing parser in serialized format to file " + filename + " ");
                    System.Console.Error.Flush();
                    ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                    @out.WriteObject(lp);
                    @out.Close();
                    log.Info("done.");
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            catch (ArgumentException)
            {
                lp = LexicalizedParser.LoadModel(args[1], op);
            }
            IFileFilter    testFilt     = new NumberRangesFileFilter(args[2], false);
            MemoryTreebank testTreebank = ctpp.MemoryTreebank();

            testTreebank.LoadPath(new File(args[0]), testFilt);
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
            WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
            WordCatEqualityChecker    eqcheck = new WordCatEqualityChecker();
            EquivalenceClassEval      eval    = new EquivalenceClassEval(eqclass, eqcheck);

            //    System.out.println("Preterminals:" + preterminals);
            System.Console.Out.WriteLine("Testing...");
            foreach (Tree gold in testTreebank)
            {
                Tree tree;
                try
                {
                    tree = lp.ParseTree(gold.YieldHasWord());
                    if (tree == null)
                    {
                        System.Console.Out.WriteLine("Failed to parse " + gold.YieldHasWord());
                        continue;
                    }
                }
                catch (Exception e)
                {
                    Sharpen.Runtime.PrintStackTrace(e);
                    continue;
                }
                gold = gold.FirstChild();
                pw.Println(SentenceUtils.ListToString(gold.PreTerminalYield()));
                pw.Println(SentenceUtils.ListToString(gold.Yield()));
                gold.PennPrint(pw);
                pw.Println(tree.PreTerminalYield());
                pw.Println(tree.Yield());
                tree.PennPrint(pw);
                //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
                //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
                //      eval.eval(allBrackets, goldBrackets);
                eval.DisplayLast();
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine();
            eval.Display();
        }

Exemplo n.º 15

0

Exibir arquivo

        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string modelPath = null;
            string outputDir = null;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                    {
                        outputDir = args[argIndex + 1];
                        argIndex += 2;
                    }
                    else
                    {
                        log.Info("Unknown argument " + args[argIndex]);
                        Help();
                    }
                }
            }
            if (outputDir == null || modelPath == null)
            {
                Help();
            }
            File outputFile = new File(outputDir);

            FileSystem.CheckNotExistsOrFail(outputFile);
            FileSystem.MkdirOrFail(outputFile);
            LexicalizedParser parser     = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath));
            DVModel           model      = DVParser.GetModelFromLexicalizedParser(parser);
            string            binaryWDir = outputDir + File.separator + "binaryW";

            FileSystem.MkdirOrFail(binaryWDir);
            foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in model.binaryTransform)
            {
                string filename = binaryWDir + File.separator + entry.GetFirstKey() + "_" + entry.GetSecondKey() + ".txt";
                DumpMatrix(filename, entry.GetValue());
            }
            string binaryScoreDir = outputDir + File.separator + "binaryScore";

            FileSystem.MkdirOrFail(binaryScoreDir);
            foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry_1 in model.binaryScore)
            {
                string filename = binaryScoreDir + File.separator + entry_1.GetFirstKey() + "_" + entry_1.GetSecondKey() + ".txt";
                DumpMatrix(filename, entry_1.GetValue());
            }
            string unaryWDir = outputDir + File.separator + "unaryW";

            FileSystem.MkdirOrFail(unaryWDir);
            foreach (KeyValuePair <string, SimpleMatrix> entry_2 in model.unaryTransform)
            {
                string filename = unaryWDir + File.separator + entry_2.Key + ".txt";
                DumpMatrix(filename, entry_2.Value);
            }
            string unaryScoreDir = outputDir + File.separator + "unaryScore";

            FileSystem.MkdirOrFail(unaryScoreDir);
            foreach (KeyValuePair <string, SimpleMatrix> entry_3 in model.unaryScore)
            {
                string filename = unaryScoreDir + File.separator + entry_3.Key + ".txt";
                DumpMatrix(filename, entry_3.Value);
            }
            string         embeddingFile = outputDir + File.separator + "embeddings.txt";
            FileWriter     fout          = new FileWriter(embeddingFile);
            BufferedWriter bout          = new BufferedWriter(fout);

            foreach (KeyValuePair <string, SimpleMatrix> entry_4 in model.wordVectors)
            {
                bout.Write(entry_4.Key);
                SimpleMatrix vector = entry_4.Value;
                for (int i = 0; i < vector.NumRows(); ++i)
                {
                    bout.Write("  " + vector.Get(i, 0));
                }
                bout.Write("\n");
            }
            bout.Close();
            fout.Close();
        }

Exemplo n.º 16

0

Exibir arquivo

        /// <summary>
        /// An example of a command line is
        /// <br />
        /// java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz  -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202
        /// <br />
        /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6
        /// <br />
        /// java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed  026-270,301-499,600-999
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            string parserModel = null;
            string output      = null;
            IList <Pair <string, IFileFilter> > treebanks = Generics.NewArrayList();
            int dvKBest    = 200;
            int numThreads = 1;

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-dvKBest"))
                {
                    dvKBest   = System.Convert.ToInt32(args[argIndex + 1]);
                    argIndex += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser") || args[argIndex].Equals("-model"))
                {
                    parserModel = args[argIndex + 1];
                    argIndex   += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                {
                    output    = args[argIndex + 1];
                    argIndex += 2;
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))
                {
                    Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-treebank");
                    argIndex = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                    treebanks.Add(treebankDescription);
                    continue;
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-numThreads"))
                {
                    numThreads = System.Convert.ToInt32(args[argIndex + 1]);
                    argIndex  += 2;
                    continue;
                }
                throw new ArgumentException("Unknown argument " + args[argIndex]);
            }
            if (parserModel == null)
            {
                throw new ArgumentException("Need to supply a parser model with -model");
            }
            if (output == null)
            {
                throw new ArgumentException("Need to supply an output filename with -output");
            }
            if (treebanks.IsEmpty())
            {
                throw new ArgumentException("Need to supply a treebank with -treebank");
            }
            log.Info("Writing output to " + output);
            log.Info("Loading parser model " + parserModel);
            log.Info("Writing " + dvKBest + " hypothesis trees for each tree");
            LexicalizedParser    parser      = ((LexicalizedParser)LexicalizedParser.LoadModel(parserModel, "-dvKBest", int.ToString(dvKBest)));
            CacheParseHypotheses cacher      = new CacheParseHypotheses(parser);
            ITreeTransformer     transformer = DVParser.BuildTrainTransformer(parser.GetOp());
            IList <Tree>         sentences   = new List <Tree>();

            foreach (Pair <string, IFileFilter> description in treebanks)
            {
                log.Info("Reading trees from " + description.first);
                Treebank treebank = parser.GetOp().tlpParams.MemoryTreebank();
                treebank.LoadPath(description.first, description.second);
                treebank = treebank.Transform(transformer);
                Sharpen.Collections.AddAll(sentences, treebank);
            }
            log.Info("Processing " + sentences.Count + " trees");
            IList <Pair <Tree, byte[]> > cache = Generics.NewArrayList();

            transformer = new SynchronizedTreeTransformer(transformer);
            MulticoreWrapper <Tree, Pair <Tree, byte[]> > wrapper = new MulticoreWrapper <Tree, Pair <Tree, byte[]> >(numThreads, new CacheParseHypotheses.CacheProcessor(cacher, parser, dvKBest, transformer));

            foreach (Tree tree in sentences)
            {
                wrapper.Put(tree);
                while (wrapper.Peek())
                {
                    cache.Add(wrapper.Poll());
                    if (cache.Count % 10 == 0)
                    {
                        System.Console.Out.WriteLine("Processed " + cache.Count + " trees");
                    }
                }
            }
            wrapper.Join();
            while (wrapper.Peek())
            {
                cache.Add(wrapper.Poll());
                if (cache.Count % 10 == 0)
                {
                    System.Console.Out.WriteLine("Processed " + cache.Count + " trees");
                }
            }
            System.Console.Out.WriteLine("Finished processing " + cache.Count + " trees");
            IOUtils.WriteObjectToFile(cache, output);
        }

Exemplo n.º 17

0

Exibir arquivo

Arquivo: DVParser.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <summary>
        /// An example command line for training a new parser:
        /// <br />
        /// nohup java -mx6g edu.stanford.nlp.parser.dvparser.DVParser -cachedTrees /scr/nlp/data/dvparser/wsj/cached.wsj.train.simple.ser.gz -train -testTreebank  /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj/22 2200-2219 -debugOutputFrequency 400 -nofilter -trainingThreads 5 -parser /u/nlp/data/lexparser/wsjPCFG.nocompact.simple.ser.gz -trainingIterations 40 -batchSize 25 -model /scr/nlp/data/dvparser/wsj/wsj.combine.v2.ser.gz -unkWord "*UNK*" -dvCombineCategories &gt; /scr/nlp/data/dvparser/wsj/wsj.combine.v2.out 2&gt;&amp;1 &amp;
        /// </summary>
        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            if (args.Length == 0)
            {
                Help();
                System.Environment.Exit(2);
            }
            log.Info("Running DVParser with arguments:");
            foreach (string arg in args)
            {
                log.Info("  " + arg);
            }
            log.Info();
            string         parserPath           = null;
            string         trainTreebankPath    = null;
            IFileFilter    trainTreebankFilter  = null;
            string         cachedTrainTreesPath = null;
            bool           runGradientCheck     = false;
            bool           runTraining          = false;
            string         testTreebankPath     = null;
            IFileFilter    testTreebankFilter   = null;
            string         initialModelPath     = null;
            string         modelPath            = null;
            bool           filter            = true;
            string         resultsRecordPath = null;
            IList <string> unusedArgs        = new List <string>();
            // These parameters can be null or 0 if the model was not
            // serialized with the new parameters.  Setting the options at the
            // command line will override these defaults.
            // TODO: if/when we integrate back into the main branch and
            // rebuild models, we can get rid of this
            IList <string> argsWithDefaults = new List <string>(Arrays.AsList(new string[] { "-wordVectorFile", Options.LexOptions.DefaultWordVectorFile, "-dvKBest", int.ToString(TrainOptions.DefaultKBest), "-batchSize", int.ToString(TrainOptions.DefaultBatchSize
                                                                                                                                                                                                                                          ), "-trainingIterations", int.ToString(TrainOptions.DefaultTrainingIterations), "-qnIterationsPerBatch", int.ToString(TrainOptions.DefaultQnIterationsPerBatch), "-regCost", double.ToString(TrainOptions.DefaultRegcost), "-learningRate", double
                                                                                             .ToString(TrainOptions.DefaultLearningRate), "-deltaMargin", double.ToString(TrainOptions.DefaultDeltaMargin), "-unknownNumberVector", "-unknownDashedWordVectors", "-unknownCapsVector", "-unknownchinesepercentvector", "-unknownchinesenumbervector"
                                                                                             , "-unknownchineseyearvector", "-unkWord", "*UNK*", "-transformMatrixType", "DIAGONAL", "-scalingForInit", double.ToString(TrainOptions.DefaultScalingForInit), "-trainWordVectors" }));

            Sharpen.Collections.AddAll(argsWithDefaults, Arrays.AsList(args));
            args = Sharpen.Collections.ToArray(argsWithDefaults, new string[argsWithDefaults.Count]);
            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-parser"))
                {
                    parserPath = args[argIndex + 1];
                    argIndex  += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-treebank"))
                        {
                            Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-treebank");
                            argIndex            = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                            trainTreebankPath   = treebankDescription.First();
                            trainTreebankFilter = treebankDescription.Second();
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-cachedTrees"))
                            {
                                cachedTrainTreesPath = args[argIndex + 1];
                                argIndex            += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-runGradientCheck"))
                                {
                                    runGradientCheck = true;
                                    argIndex++;
                                }
                                else
                                {
                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-train"))
                                    {
                                        runTraining = true;
                                        argIndex++;
                                    }
                                    else
                                    {
                                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                                        {
                                            modelPath = args[argIndex + 1];
                                            argIndex += 2;
                                        }
                                        else
                                        {
                                            if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-nofilter"))
                                            {
                                                filter = false;
                                                argIndex++;
                                            }
                                            else
                                            {
                                                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-continueTraining"))
                                                {
                                                    runTraining      = true;
                                                    filter           = false;
                                                    initialModelPath = args[argIndex + 1];
                                                    argIndex        += 2;
                                                }
                                                else
                                                {
                                                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-resultsRecord"))
                                                    {
                                                        resultsRecordPath = args[argIndex + 1];
                                                        argIndex         += 2;
                                                    }
                                                    else
                                                    {
                                                        unusedArgs.Add(args[argIndex++]);
                                                    }
                                                }
                                            }
                                        }
                                    }
                                }
                            }
                        }
                    }
                }
            }
            if (parserPath == null && modelPath == null)
            {
                throw new ArgumentException("Must supply either a base parser model with -parser or a serialized DVParser with -model");
            }
            if (!runTraining && modelPath == null && !runGradientCheck)
            {
                throw new ArgumentException("Need to either train a new model, run the gradient check or specify a model to load with -model");
            }
            string[] newArgs = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            Edu.Stanford.Nlp.Parser.Dvparser.DVParser dvparser = null;
            LexicalizedParser lexparser = null;

            if (initialModelPath != null)
            {
                lexparser = ((LexicalizedParser)LexicalizedParser.LoadModel(initialModelPath, newArgs));
                DVModel model = GetModelFromLexicalizedParser(lexparser);
                dvparser = new Edu.Stanford.Nlp.Parser.Dvparser.DVParser(model, lexparser);
            }
            else
            {
                if (runTraining || runGradientCheck)
                {
                    lexparser = ((LexicalizedParser)LexicalizedParser.LoadModel(parserPath, newArgs));
                    dvparser  = new Edu.Stanford.Nlp.Parser.Dvparser.DVParser(lexparser);
                }
                else
                {
                    if (modelPath != null)
                    {
                        lexparser = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
                        DVModel model = GetModelFromLexicalizedParser(lexparser);
                        dvparser = new Edu.Stanford.Nlp.Parser.Dvparser.DVParser(model, lexparser);
                    }
                }
            }
            IList <Tree> trainSentences = new List <Tree>();
            IdentityHashMap <Tree, byte[]> trainCompressedParses = Generics.NewIdentityHashMap();

            if (cachedTrainTreesPath != null)
            {
                foreach (string path in cachedTrainTreesPath.Split(","))
                {
                    IList <Pair <Tree, byte[]> > cache = IOUtils.ReadObjectFromFile(path);
                    foreach (Pair <Tree, byte[]> pair in cache)
                    {
                        trainSentences.Add(pair.First());
                        trainCompressedParses[pair.First()] = pair.Second();
                    }
                    log.Info("Read in " + cache.Count + " trees from " + path);
                }
            }
            if (trainTreebankPath != null)
            {
                // TODO: make the transformer a member of the model?
                ITreeTransformer transformer = BuildTrainTransformer(dvparser.GetOp());
                Treebank         treebank    = dvparser.GetOp().tlpParams.MemoryTreebank();
                treebank.LoadPath(trainTreebankPath, trainTreebankFilter);
                treebank = treebank.Transform(transformer);
                log.Info("Read in " + treebank.Count + " trees from " + trainTreebankPath);
                CacheParseHypotheses cacher = new CacheParseHypotheses(dvparser.parser);
                CacheParseHypotheses.CacheProcessor processor = new CacheParseHypotheses.CacheProcessor(cacher, lexparser, dvparser.op.trainOptions.dvKBest, transformer);
                foreach (Tree tree in treebank)
                {
                    trainSentences.Add(tree);
                    trainCompressedParses[tree] = processor.Process(tree).second;
                }
                //System.out.println(tree);
                log.Info("Finished parsing " + treebank.Count + " trees, getting " + dvparser.op.trainOptions.dvKBest + " hypotheses each");
            }
            if ((runTraining || runGradientCheck) && filter)
            {
                log.Info("Filtering rules for the given training set");
                dvparser.dvModel.SetRulesForTrainingSet(trainSentences, trainCompressedParses);
                log.Info("Done filtering rules; " + dvparser.dvModel.numBinaryMatrices + " binary matrices, " + dvparser.dvModel.numUnaryMatrices + " unary matrices, " + dvparser.dvModel.wordVectors.Count + " word vectors");
            }
            //dvparser.dvModel.printAllMatrices();
            Treebank testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = dvparser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
            }
            //    runGradientCheck= true;
            if (runGradientCheck)
            {
                log.Info("Running gradient check on " + trainSentences.Count + " trees");
                dvparser.RunGradientCheck(trainSentences, trainCompressedParses);
            }
            if (runTraining)
            {
                log.Info("Training the RNN parser");
                log.Info("Current train options: " + dvparser.GetOp().trainOptions);
                dvparser.Train(trainSentences, trainCompressedParses, testTreebank, modelPath, resultsRecordPath);
                if (modelPath != null)
                {
                    dvparser.SaveModel(modelPath);
                }
            }
            if (testTreebankPath != null)
            {
                EvaluateTreebank evaluator = new EvaluateTreebank(dvparser.AttachModelToLexicalizedParser());
                evaluator.TestOnTreebank(testTreebank);
            }
            log.Info("Successfully ran DVParser");
        }

Exemplo n.º 18

0

Exibir arquivo

Arquivo: CombineDVModels.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        /// <exception cref="System.TypeLoadException"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            IList <string> baseModelPaths     = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = new List <string>();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-baseModels"))
                        {
                            argIndex++;
                            baseModelPaths = new List <string>();
                            while (argIndex < args.Length && args[argIndex][0] != '-')
                            {
                                baseModelPaths.Add(args[argIndex++]);
                            }
                            if (baseModelPaths.Count == 0)
                            {
                                throw new ArgumentException("Found an argument -baseModels with no actual models named");
                            }
                        }
                        else
                        {
                            unusedArgs.Add(args[argIndex++]);
                        }
                    }
                }
            }
            string[]          newArgs          = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser underlyingParser = null;
            Options           options          = null;
            LexicalizedParser combinedParser   = null;

            if (baseModelPaths != null)
            {
                IList <DVModel> dvparsers = new List <DVModel>();
                foreach (string baseModelPath in baseModelPaths)
                {
                    log.Info("Loading serialized DVParser from " + baseModelPath);
                    LexicalizedParser dvparser = ((LexicalizedParser)LexicalizedParser.LoadModel(baseModelPath));
                    IReranker         reranker = dvparser.reranker;
                    if (!(reranker is DVModelReranker))
                    {
                        throw new ArgumentException("Expected parsers with DVModel embedded");
                    }
                    dvparsers.Add(((DVModelReranker)reranker).GetModel());
                    if (underlyingParser == null)
                    {
                        underlyingParser = dvparser;
                        options          = underlyingParser.GetOp();
                        // TODO: other parser's options?
                        options.SetOptions(newArgs);
                    }
                    log.Info("... done");
                }
                combinedParser = LexicalizedParser.CopyLexicalizedParser(underlyingParser);
                CombinedDVModelReranker reranker_1 = new CombinedDVModelReranker(options, dvparsers);
                combinedParser.reranker = reranker_1;
                combinedParser.SaveParserToSerialized(modelPath);
            }
            else
            {
                throw new ArgumentException("Need to specify -model to load an already prepared CombinedParser");
            }
            Treebank testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = combinedParser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
                EvaluateTreebank evaluator = new EvaluateTreebank(combinedParser.GetOp(), null, combinedParser);
                evaluator.TestOnTreebank(testTreebank);
            }
        }

Exemplo n.º 19

0

Exibir arquivo

Arquivo: ChineseCharacterBasedLexiconTraining.cs Projeto: zerouid/Stanford.CoreNLP.NET

        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap();

            flagsToNumArgs["-parser"]        = int.Parse(3);
            flagsToNumArgs["-lex"]           = int.Parse(3);
            flagsToNumArgs["-test"]          = int.Parse(2);
            flagsToNumArgs["-out"]           = int.Parse(1);
            flagsToNumArgs["-lengthPenalty"] = int.Parse(1);
            flagsToNumArgs["-penaltyType"]   = int.Parse(1);
            flagsToNumArgs["-maxLength"]     = int.Parse(1);
            flagsToNumArgs["-stats"]         = int.Parse(2);
            IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs);
            bool        eval = argMap.Contains("-eval");
            PrintWriter pw   = null;

            if (argMap.Contains("-out"))
            {
                pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true);
            }
            log.Info("ChineseCharacterBasedLexicon called with args:");
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            for (int i = 0; i < args.Length; i++)
            {
                ctpp.SetOptionFlag(args, i);
                log.Info(" " + args[i]);
            }
            log.Info();
            Options op = new Options(ctpp);

            if (argMap.Contains("-stats"))
            {
                string[]       statArgs         = (argMap["-stats"]);
                MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    trainFilt        = new NumberRangesFileFilter(statArgs[1], false);
                rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt);
                log.Info("Done reading trees.");
                MemoryTreebank trainTreebank;
                if (argMap.Contains("-annotate"))
                {
                    trainTreebank = new MemoryTreebank();
                    TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                    foreach (Tree tree in rawTrainTreebank)
                    {
                        trainTreebank.Add(annotator.TransformTree(tree));
                    }
                    log.Info("Done annotating trees.");
                }
                else
                {
                    trainTreebank = rawTrainTreebank;
                }
                PrintStats(trainTreebank, pw);
                System.Environment.Exit(0);
            }
            int maxLength = 1000000;

            //    Test.verbose = true;
            if (argMap.Contains("-norm"))
            {
                op.testOptions.lengthNormalization = true;
            }
            if (argMap.Contains("-maxLength"))
            {
                maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]);
            }
            op.testOptions.maxLength = 120;
            bool combo = argMap.Contains("-combo");

            if (combo)
            {
                ctpp.useCharacterBasedLexicon = true;
                op.testOptions.maxSpanForTags = 10;
                op.doDep  = false;
                op.dcTags = false;
            }
            LexicalizedParser lp  = null;
            ILexicon          lex = null;

            if (argMap.Contains("-parser"))
            {
                string[] parserArgs = (argMap["-parser"]);
                if (parserArgs.Length > 1)
                {
                    IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
                    lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op);
                    if (parserArgs.Length == 3)
                    {
                        string filename = parserArgs[2];
                        log.Info("Writing parser in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lp);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string parserFile = parserArgs[0];
                    lp = LexicalizedParser.LoadModel(parserFile, op);
                }
                lex  = lp.GetLexicon();
                op   = lp.GetOp();
                ctpp = (ChineseTreebankParserParams)op.tlpParams;
            }
            if (argMap.Contains("-rad"))
            {
                ctpp.useUnknownCharacterModel = true;
            }
            if (argMap.Contains("-lengthPenalty"))
            {
                ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]);
            }
            if (argMap.Contains("-penaltyType"))
            {
                ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]);
            }
            if (argMap.Contains("-lex"))
            {
                string[] lexArgs = (argMap["-lex"]);
                if (lexArgs.Length > 1)
                {
                    IIndex <string> wordIndex = new HashIndex <string>();
                    IIndex <string> tagIndex  = new HashIndex <string>();
                    lex = ctpp.Lex(op, wordIndex, tagIndex);
                    MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                    IFileFilter    trainFilt        = new NumberRangesFileFilter(lexArgs[1], false);
                    rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt);
                    log.Info("Done reading trees.");
                    MemoryTreebank trainTreebank;
                    if (argMap.Contains("-annotate"))
                    {
                        trainTreebank = new MemoryTreebank();
                        TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                        foreach (Tree tree in rawTrainTreebank)
                        {
                            tree = annotator.TransformTree(tree);
                            trainTreebank.Add(tree);
                        }
                        log.Info("Done annotating trees.");
                    }
                    else
                    {
                        trainTreebank = rawTrainTreebank;
                    }
                    lex.InitializeTraining(trainTreebank.Count);
                    lex.Train(trainTreebank);
                    lex.FinishTraining();
                    log.Info("Done training lexicon.");
                    if (lexArgs.Length == 3)
                    {
                        string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                        log.Info("Writing lexicon in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lex);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
                    log.Info("Reading Lexicon from file " + lexFile);
                    ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile);
                    try
                    {
                        lex = (ILexicon)@in.ReadObject();
                    }
                    catch (TypeLoadException)
                    {
                        throw new Exception("Bad serialized file: " + lexFile);
                    }
                    @in.Close();
                }
            }
            if (argMap.Contains("-test"))
            {
                bool segmentWords = ctpp.segment;
                bool parse        = lp != null;
                System.Diagnostics.Debug.Assert((parse || segmentWords));
                //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
                //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
                IWordSegmenter seg = null;
                if (segmentWords)
                {
                    seg = (IWordSegmenter)lex;
                }
                string[]       testArgs     = (argMap["-test"]);
                MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    testFilt     = new NumberRangesFileFilter(testArgs[1], false);
                testTreebank.LoadPath(new File(testArgs[0]), testFilt);
                ITreeTransformer          subcategoryStripper = op.tlpParams.SubcategoryStripper();
                ITreeTransformer          collinizer          = ctpp.Collinizer();
                WordCatEquivalenceClasser eqclass             = new WordCatEquivalenceClasser();
                WordCatEqualityChecker    eqcheck             = new WordCatEqualityChecker();
                EquivalenceClassEval      basicEval           = new EquivalenceClassEval(eqclass, eqcheck, "basic");
                EquivalenceClassEval      collinsEval         = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
                IList <string>            evalTypes           = new List <string>(3);
                bool goodPOS = false;
                if (segmentWords)
                {
                    evalTypes.Add(WordCatConstituent.wordType);
                    if (ctpp.segmentMarkov && !parse)
                    {
                        evalTypes.Add(WordCatConstituent.tagType);
                        goodPOS = true;
                    }
                }
                if (parse)
                {
                    evalTypes.Add(WordCatConstituent.tagType);
                    evalTypes.Add(WordCatConstituent.catType);
                    if (combo)
                    {
                        evalTypes.Add(WordCatConstituent.wordType);
                        goodPOS = true;
                    }
                }
                TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
                log.Info("Testing...");
                foreach (Tree goldTop in testTreebank)
                {
                    Tree             gold         = goldTop.FirstChild();
                    IList <IHasWord> goldSentence = gold.YieldHasWord();
                    if (goldSentence.Count > maxLength)
                    {
                        log.Info("Skipping sentence; too long: " + goldSentence.Count);
                        continue;
                    }
                    else
                    {
                        log.Info("Processing sentence; length: " + goldSentence.Count);
                    }
                    IList <IHasWord> s;
                    if (segmentWords)
                    {
                        StringBuilder goldCharBuf = new StringBuilder();
                        foreach (IHasWord aGoldSentence in goldSentence)
                        {
                            StringLabel word = (StringLabel)aGoldSentence;
                            goldCharBuf.Append(word.Value());
                        }
                        string goldChars = goldCharBuf.ToString();
                        s = seg.Segment(goldChars);
                    }
                    else
                    {
                        s = goldSentence;
                    }
                    Tree tree;
                    if (parse)
                    {
                        tree = lp.ParseTree(s);
                        if (tree == null)
                        {
                            throw new Exception("PARSER RETURNED NULL!!!");
                        }
                    }
                    else
                    {
                        tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s);
                        tree = subcategoryStripper.TransformTree(tree);
                    }
                    if (pw != null)
                    {
                        if (parse)
                        {
                            tree.PennPrint(pw);
                        }
                        else
                        {
                            IEnumerator sentIter = s.GetEnumerator();
                            for (; ;)
                            {
                                Word word = (Word)sentIter.Current;
                                pw.Print(word.Word());
                                if (sentIter.MoveNext())
                                {
                                    pw.Print(" ");
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        pw.Println();
                    }
                    if (eval)
                    {
                        ICollection ourBrackets;
                        ICollection goldBrackets;
                        ourBrackets  = proc.AllBrackets(tree);
                        goldBrackets = proc.AllBrackets(gold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree));
                        }
                        basicEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nScores:");
                        basicEval.DisplayLast();
                        Tree collinsTree = collinizer.TransformTree(tree);
                        Tree collinsGold = collinizer.TransformTree(gold);
                        ourBrackets  = proc.AllBrackets(collinsTree);
                        goldBrackets = proc.AllBrackets(collinsGold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree));
                        }
                        collinsEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nCollinized scores:");
                        collinsEval.DisplayLast();
                        System.Console.Out.WriteLine();
                    }
                }
                if (eval)
                {
                    basicEval.Display();
                    System.Console.Out.WriteLine();
                    collinsEval.Display();
                }
            }
        }

Exemplo n.º 20

0

Exibir arquivo

Arquivo: FindNearestNeighbors.cs Projeto: awesomedotnetcore/Stanford.CoreNLP.NET

        /// <exception cref="System.Exception"/>
        public static void Main(string[] args)
        {
            string         modelPath          = null;
            string         outputPath         = null;
            string         testTreebankPath   = null;
            IFileFilter    testTreebankFilter = null;
            IList <string> unusedArgs         = new List <string>();

            for (int argIndex = 0; argIndex < args.Length;)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-model"))
                {
                    modelPath = args[argIndex + 1];
                    argIndex += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-testTreebank"))
                    {
                        Pair <string, IFileFilter> treebankDescription = ArgUtils.GetTreebankDescription(args, argIndex, "-testTreebank");
                        argIndex           = argIndex + ArgUtils.NumSubArgs(args, argIndex) + 1;
                        testTreebankPath   = treebankDescription.First();
                        testTreebankFilter = treebankDescription.Second();
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output"))
                        {
                            outputPath = args[argIndex + 1];
                            argIndex  += 2;
                        }
                        else
                        {
                            unusedArgs.Add(args[argIndex++]);
                        }
                    }
                }
            }
            if (modelPath == null)
            {
                throw new ArgumentException("Need to specify -model");
            }
            if (testTreebankPath == null)
            {
                throw new ArgumentException("Need to specify -testTreebank");
            }
            if (outputPath == null)
            {
                throw new ArgumentException("Need to specify -output");
            }
            string[]          newArgs      = Sharpen.Collections.ToArray(unusedArgs, new string[unusedArgs.Count]);
            LexicalizedParser lexparser    = ((LexicalizedParser)LexicalizedParser.LoadModel(modelPath, newArgs));
            Treebank          testTreebank = null;

            if (testTreebankPath != null)
            {
                log.Info("Reading in trees from " + testTreebankPath);
                if (testTreebankFilter != null)
                {
                    log.Info("Filtering on " + testTreebankFilter);
                }
                testTreebank = lexparser.GetOp().tlpParams.MemoryTreebank();
                testTreebank.LoadPath(testTreebankPath, testTreebankFilter);
                log.Info("Read in " + testTreebank.Count + " trees for testing");
            }
            FileWriter     @out = new FileWriter(outputPath);
            BufferedWriter bout = new BufferedWriter(@out);

            log.Info("Parsing " + testTreebank.Count + " trees");
            int count = 0;
            IList <FindNearestNeighbors.ParseRecord> records = Generics.NewArrayList();

            foreach (Tree goldTree in testTreebank)
            {
                IList <Word> tokens      = goldTree.YieldWords();
                IParserQuery parserQuery = lexparser.ParserQuery();
                if (!parserQuery.Parse(tokens))
                {
                    throw new AssertionError("Could not parse: " + tokens);
                }
                if (!(parserQuery is RerankingParserQuery))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a Reranker attached");
                }
                RerankingParserQuery rpq = (RerankingParserQuery)parserQuery;
                if (!(rpq.RerankerQuery() is DVModelReranker.Query))
                {
                    throw new ArgumentException("Expected a LexicalizedParser with a DVModel attached");
                }
                DeepTree     tree       = ((DVModelReranker.Query)rpq.RerankerQuery()).GetDeepTrees()[0];
                SimpleMatrix rootVector = null;
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in tree.GetVectors())
                {
                    if (entry.Key.Label().Value().Equals("ROOT"))
                    {
                        rootVector = entry.Value;
                        break;
                    }
                }
                if (rootVector == null)
                {
                    throw new AssertionError("Could not find root nodevector");
                }
                @out.Write(tokens + "\n");
                @out.Write(tree.GetTree() + "\n");
                for (int i = 0; i < rootVector.GetNumElements(); ++i)
                {
                    @out.Write("  " + rootVector.Get(i));
                }
                @out.Write("\n\n\n");
                count++;
                if (count % 10 == 0)
                {
                    log.Info("  " + count);
                }
                records.Add(new FindNearestNeighbors.ParseRecord(tokens, goldTree, tree.GetTree(), rootVector, tree.GetVectors()));
            }
            log.Info("  done parsing");
            IList <Pair <Tree, SimpleMatrix> > subtrees = Generics.NewArrayList();

            foreach (FindNearestNeighbors.ParseRecord record in records)
            {
                foreach (KeyValuePair <Tree, SimpleMatrix> entry in record.nodeVectors)
                {
                    if (entry.Key.GetLeaves().Count <= maxLength)
                    {
                        subtrees.Add(Pair.MakePair(entry.Key, entry.Value));
                    }
                }
            }
            log.Info("There are " + subtrees.Count + " subtrees in the set of trees");
            PriorityQueue <ScoredObject <Pair <Tree, Tree> > > bestmatches = new PriorityQueue <ScoredObject <Pair <Tree, Tree> > >(101, ScoredComparator.DescendingComparator);

            for (int i_1 = 0; i_1 < subtrees.Count; ++i_1)
            {
                log.Info(subtrees[i_1].First().YieldWords());
                log.Info(subtrees[i_1].First());
                for (int j = 0; j < subtrees.Count; ++j)
                {
                    if (i_1 == j)
                    {
                        continue;
                    }
                    // TODO: look at basic category?
                    double normF = subtrees[i_1].Second().Minus(subtrees[j].Second()).NormF();
                    bestmatches.Add(new ScoredObject <Pair <Tree, Tree> >(Pair.MakePair(subtrees[i_1].First(), subtrees[j].First()), normF));
                    if (bestmatches.Count > 100)
                    {
                        bestmatches.Poll();
                    }
                }
                IList <ScoredObject <Pair <Tree, Tree> > > ordered = Generics.NewArrayList();
                while (bestmatches.Count > 0)
                {
                    ordered.Add(bestmatches.Poll());
                }
                Java.Util.Collections.Reverse(ordered);
                foreach (ScoredObject <Pair <Tree, Tree> > pair in ordered)
                {
                    log.Info(" MATCHED " + pair.Object().second.YieldWords() + " ... " + pair.Object().Second() + " with a score of " + pair.Score());
                }
                log.Info();
                log.Info();
                bestmatches.Clear();
            }

            /*
             * for (int i = 0; i < records.size(); ++i) {
             * if (i % 10 == 0) {
             * log.info("  " + i);
             * }
             * List<ScoredObject<ParseRecord>> scored = Generics.newArrayList();
             * for (int j = 0; j < records.size(); ++j) {
             * if (i == j) continue;
             *
             * double score = 0.0;
             * int matches = 0;
             * for (Map.Entry<Tree, SimpleMatrix> first : records.get(i).nodeVectors.entrySet()) {
             * for (Map.Entry<Tree, SimpleMatrix> second : records.get(j).nodeVectors.entrySet()) {
             * String firstBasic = dvparser.dvModel.basicCategory(first.getKey().label().value());
             * String secondBasic = dvparser.dvModel.basicCategory(second.getKey().label().value());
             * if (firstBasic.equals(secondBasic)) {
             ++matches;
             * double normF = first.getValue().minus(second.getValue()).normF();
             * score += normF * normF;
             * }
             * }
             * }
             * if (matches == 0) {
             * score = Double.POSITIVE_INFINITY;
             * } else {
             * score = score / matches;
             * }
             * //double score = records.get(i).vector.minus(records.get(j).vector).normF();
             * scored.add(new ScoredObject<ParseRecord>(records.get(j), score));
             * }
             * Collections.sort(scored, ScoredComparator.ASCENDING_COMPARATOR);
             *
             * out.write(records.get(i).sentence.toString() + "\n");
             * for (int j = 0; j < numNeighbors; ++j) {
             * out.write("   " + scored.get(j).score() + ": " + scored.get(j).object().sentence + "\n");
             * }
             * out.write("\n\n");
             * }
             * log.info();
             */
            bout.Flush();
            @out.Flush();
            @out.Close();
        }

Exemplos de LexicalizedParser.LoadModel em C# (CSharp)