Example #1
0
        public static void Main(string[] args)
        {
            ITreebankLangParserParams tlpParams = new ChineseTreebankParserParams();
            ITreebankLanguagePack     ctlp      = tlpParams.TreebankLanguagePack();
            Options       op = new Options(tlpParams);
            TreeAnnotator ta = new TreeAnnotator(tlpParams.HeadFinder(), tlpParams, op);

            log.Info("Reading Trees...");
            IFileFilter trainFilter   = new NumberRangesFileFilter(args[1], true);
            Treebank    trainTreebank = tlpParams.MemoryTreebank();

            trainTreebank.LoadPath(args[0], trainFilter);
            log.Info("Annotating trees...");
            ICollection <Tree> trainTrees = new List <Tree>();

            foreach (Tree tree in trainTreebank)
            {
                trainTrees.Add(ta.TransformTree(tree));
            }
            trainTreebank = null;
            // saves memory
            log.Info("Training lexicon...");
            IIndex <string> wordIndex    = new HashIndex <string>();
            IIndex <string> tagIndex     = new HashIndex <string>();
            int             featureLevel = DefaultFeatureLevel;

            if (args.Length > 3)
            {
                featureLevel = System.Convert.ToInt32(args[3]);
            }
            Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon lex = new Edu.Stanford.Nlp.Parser.Lexparser.ChineseMaxentLexicon(op, wordIndex, tagIndex, featureLevel);
            lex.InitializeTraining(trainTrees.Count);
            lex.Train(trainTrees);
            lex.FinishTraining();
            log.Info("Testing");
            IFileFilter testFilter   = new NumberRangesFileFilter(args[2], true);
            Treebank    testTreebank = tlpParams.MemoryTreebank();

            testTreebank.LoadPath(args[0], testFilter);
            IList <TaggedWord> testWords = new List <TaggedWord>();

            foreach (Tree t in testTreebank)
            {
                foreach (TaggedWord tw in t.TaggedYield())
                {
                    testWords.Add(tw);
                }
            }
            //testWords.addAll(t.taggedYield());
            int[] totalAndCorrect = lex.TestOnTreebank(testWords);
            log.Info("done.");
            System.Console.Out.WriteLine(totalAndCorrect[1] + " correct out of " + totalAndCorrect[0] + " -- ACC: " + ((double)totalAndCorrect[1]) / totalAndCorrect[0]);
        }
        /// <summary>for testing -- CURRENTLY BROKEN!!!</summary>
        /// <param name="args">input dir and output filename</param>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                throw new Exception("args: treebankPath trainNums testNums");
            }
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            ctpp.charTags = true;
            // TODO: these options are getting clobbered by reading in the
            // parser object (unless it's a text file parser?)
            Options op = new Options(ctpp);

            op.doDep = false;
            op.testOptions.maxLength = 90;
            LexicalizedParser lp;

            try
            {
                IFileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
                lp = LexicalizedParser.TrainFromTreebank(args[0], trainFilt, op);
                try
                {
                    string filename = "chineseCharTagPCFG.ser.gz";
                    log.Info("Writing parser in serialized format to file " + filename + " ");
                    System.Console.Error.Flush();
                    ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                    @out.WriteObject(lp);
                    @out.Close();
                    log.Info("done.");
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            catch (ArgumentException)
            {
                lp = LexicalizedParser.LoadModel(args[1], op);
            }
            IFileFilter    testFilt     = new NumberRangesFileFilter(args[2], false);
            MemoryTreebank testTreebank = ctpp.MemoryTreebank();

            testTreebank.LoadPath(new File(args[0]), testFilt);
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
            WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
            WordCatEqualityChecker    eqcheck = new WordCatEqualityChecker();
            EquivalenceClassEval      eval    = new EquivalenceClassEval(eqclass, eqcheck);

            //    System.out.println("Preterminals:" + preterminals);
            System.Console.Out.WriteLine("Testing...");
            foreach (Tree gold in testTreebank)
            {
                Tree tree;
                try
                {
                    tree = lp.ParseTree(gold.YieldHasWord());
                    if (tree == null)
                    {
                        System.Console.Out.WriteLine("Failed to parse " + gold.YieldHasWord());
                        continue;
                    }
                }
                catch (Exception e)
                {
                    Sharpen.Runtime.PrintStackTrace(e);
                    continue;
                }
                gold = gold.FirstChild();
                pw.Println(SentenceUtils.ListToString(gold.PreTerminalYield()));
                pw.Println(SentenceUtils.ListToString(gold.Yield()));
                gold.PennPrint(pw);
                pw.Println(tree.PreTerminalYield());
                pw.Println(tree.Yield());
                tree.PennPrint(pw);
                //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
                //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
                //      eval.eval(allBrackets, goldBrackets);
                eval.DisplayLast();
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine();
            eval.Display();
        }