Exemplo n.º 1
0
        public override bool SetOptions(Properties opts)
        {
            bool ret = base.SetOptions(opts);

            if (opts.Contains(ConfigParser.paramSplit))
            {
                string splitFileName = opts.GetProperty(ConfigParser.paramSplit);
                splitSet = MakeSplitSet(splitFileName);
            }
            CcTagset = PropertiesUtils.GetBool(opts, ConfigParser.paramCCTagset, false);
            treebank = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CcTagset), FrenchTreebankLanguagePack.FtbEncoding);
            if (lexMapper == null)
            {
                lexMapper = new DefaultMapper();
                lexMapper.Setup(null, lexMapOptions.Split(","));
            }
            if (pathsToMappings.Count != 0)
            {
                if (posMapper == null)
                {
                    posMapper = new DefaultMapper();
                }
                foreach (File path in pathsToMappings)
                {
                    posMapper.Setup(path);
                }
            }
            return(ret);
        }
Exemplo n.º 2
0
 public FTBDataset()
     : base()
 {
     //Need to use a MemoryTreebank so that we can compute gross corpus
     //stats for MWE pre-processing
     // The treebank may be reset if setOptions changes CC_TAGSET
     treebank          = new MemoryTreebank(new FrenchXMLTreeReaderFactory(CcTagset), FrenchTreebankLanguagePack.FtbEncoding);
     treeFileExtension = "xml";
 }
Exemplo n.º 3
0
        private static IEnumerator <Tree> TreebankIterator(string path)
        {
            /* Remove empty nodes and strip indices from internal nodes but keep
             * functional tags. */
            Treebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));

            tb.LoadPath(path);
            return(tb.GetEnumerator());
        }
Exemplo n.º 4
0
        /// <summary>Reads an annotation from the given filename using the requested input.</summary>
        public static IList <Annotation> GetAnnotations(StanfordCoreNLP tokenizer, SentimentPipeline.Input inputFormat, string filename, bool filterUnknown)
        {
            switch (inputFormat)
            {
            case SentimentPipeline.Input.Text:
            {
                string     text       = IOUtils.SlurpFileNoExceptions(filename);
                Annotation annotation = new Annotation(text);
                tokenizer.Annotate(annotation);
                IList <Annotation> annotations = Generics.NewArrayList();
                foreach (ICoreMap sentence in annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
                {
                    Annotation nextAnnotation = new Annotation(sentence.Get(typeof(CoreAnnotations.TextAnnotation)));
                    nextAnnotation.Set(typeof(CoreAnnotations.SentencesAnnotation), Java.Util.Collections.SingletonList(sentence));
                    annotations.Add(nextAnnotation);
                }
                return(annotations);
            }

            case SentimentPipeline.Input.Trees:
            {
                IList <Tree> trees;
                if (filterUnknown)
                {
                    trees = SentimentUtils.ReadTreesWithGoldLabels(filename);
                    trees = SentimentUtils.FilterUnknownRoots(trees);
                }
                else
                {
                    MemoryTreebank treebank = new MemoryTreebank("utf-8");
                    treebank.LoadPath(filename, null);
                    trees = new List <Tree>(treebank);
                }
                IList <Annotation> annotations = Generics.NewArrayList();
                foreach (Tree tree in trees)
                {
                    ICoreMap sentence = new Annotation(SentenceUtils.ListToString(tree.Yield()));
                    sentence.Set(typeof(TreeCoreAnnotations.TreeAnnotation), tree);
                    IList <ICoreMap> sentences  = Java.Util.Collections.SingletonList(sentence);
                    Annotation       annotation = new Annotation(string.Empty);
                    annotation.Set(typeof(CoreAnnotations.SentencesAnnotation), sentences);
                    annotations.Add(annotation);
                }
                return(annotations);
            }

            default:
            {
                throw new ArgumentException("Unknown format " + inputFormat);
            }
            }
        }
Exemplo n.º 5
0
        /// <summary>Given a file name, reads in those trees and returns them as a List</summary>
        public static IList <Tree> ReadTreesWithLabels(string path, Type annotationClass)
        {
            IList <Tree>   trees    = Generics.NewArrayList();
            MemoryTreebank treebank = new MemoryTreebank("utf-8");

            treebank.LoadPath(path, null);
            foreach (Tree tree in treebank)
            {
                AttachLabels(tree, annotationClass);
                trees.Add(tree);
            }
            return(trees);
        }
Exemplo n.º 6
0
        // A small variety of trees to test on, especially with different depths of unary transitions
        public virtual IList <Tree> BuildTestTreebank()
        {
            MemoryTreebank treebank = new MemoryTreebank();

            foreach (string text in TestTrees)
            {
                Tree tree = Tree.ValueOf(text);
                treebank.Add(tree);
            }
            IList <Tree> binarizedTrees = ShiftReduceParser.BinarizeTreebank(treebank, new Options());

            return(binarizedTrees);
        }
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagsToNumArgs = Generics.NewHashMap();

            flagsToNumArgs["-parser"]        = int.Parse(3);
            flagsToNumArgs["-lex"]           = int.Parse(3);
            flagsToNumArgs["-test"]          = int.Parse(2);
            flagsToNumArgs["-out"]           = int.Parse(1);
            flagsToNumArgs["-lengthPenalty"] = int.Parse(1);
            flagsToNumArgs["-penaltyType"]   = int.Parse(1);
            flagsToNumArgs["-maxLength"]     = int.Parse(1);
            flagsToNumArgs["-stats"]         = int.Parse(2);
            IDictionary <string, string[]> argMap = StringUtils.ArgsToMap(args, flagsToNumArgs);
            bool        eval = argMap.Contains("-eval");
            PrintWriter pw   = null;

            if (argMap.Contains("-out"))
            {
                pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream((argMap["-out"])[0]), "GB18030"), true);
            }
            log.Info("ChineseCharacterBasedLexicon called with args:");
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            for (int i = 0; i < args.Length; i++)
            {
                ctpp.SetOptionFlag(args, i);
                log.Info(" " + args[i]);
            }
            log.Info();
            Options op = new Options(ctpp);

            if (argMap.Contains("-stats"))
            {
                string[]       statArgs         = (argMap["-stats"]);
                MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    trainFilt        = new NumberRangesFileFilter(statArgs[1], false);
                rawTrainTreebank.LoadPath(new File(statArgs[0]), trainFilt);
                log.Info("Done reading trees.");
                MemoryTreebank trainTreebank;
                if (argMap.Contains("-annotate"))
                {
                    trainTreebank = new MemoryTreebank();
                    TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                    foreach (Tree tree in rawTrainTreebank)
                    {
                        trainTreebank.Add(annotator.TransformTree(tree));
                    }
                    log.Info("Done annotating trees.");
                }
                else
                {
                    trainTreebank = rawTrainTreebank;
                }
                PrintStats(trainTreebank, pw);
                System.Environment.Exit(0);
            }
            int maxLength = 1000000;

            //    Test.verbose = true;
            if (argMap.Contains("-norm"))
            {
                op.testOptions.lengthNormalization = true;
            }
            if (argMap.Contains("-maxLength"))
            {
                maxLength = System.Convert.ToInt32((argMap["-maxLength"])[0]);
            }
            op.testOptions.maxLength = 120;
            bool combo = argMap.Contains("-combo");

            if (combo)
            {
                ctpp.useCharacterBasedLexicon = true;
                op.testOptions.maxSpanForTags = 10;
                op.doDep  = false;
                op.dcTags = false;
            }
            LexicalizedParser lp  = null;
            ILexicon          lex = null;

            if (argMap.Contains("-parser"))
            {
                string[] parserArgs = (argMap["-parser"]);
                if (parserArgs.Length > 1)
                {
                    IFileFilter trainFilt = new NumberRangesFileFilter(parserArgs[1], false);
                    lp = LexicalizedParser.TrainFromTreebank(parserArgs[0], trainFilt, op);
                    if (parserArgs.Length == 3)
                    {
                        string filename = parserArgs[2];
                        log.Info("Writing parser in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lp);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string parserFile = parserArgs[0];
                    lp = LexicalizedParser.LoadModel(parserFile, op);
                }
                lex  = lp.GetLexicon();
                op   = lp.GetOp();
                ctpp = (ChineseTreebankParserParams)op.tlpParams;
            }
            if (argMap.Contains("-rad"))
            {
                ctpp.useUnknownCharacterModel = true;
            }
            if (argMap.Contains("-lengthPenalty"))
            {
                ctpp.lengthPenalty = double.Parse((argMap["-lengthPenalty"])[0]);
            }
            if (argMap.Contains("-penaltyType"))
            {
                ctpp.penaltyType = System.Convert.ToInt32((argMap["-penaltyType"])[0]);
            }
            if (argMap.Contains("-lex"))
            {
                string[] lexArgs = (argMap["-lex"]);
                if (lexArgs.Length > 1)
                {
                    IIndex <string> wordIndex = new HashIndex <string>();
                    IIndex <string> tagIndex  = new HashIndex <string>();
                    lex = ctpp.Lex(op, wordIndex, tagIndex);
                    MemoryTreebank rawTrainTreebank = op.tlpParams.MemoryTreebank();
                    IFileFilter    trainFilt        = new NumberRangesFileFilter(lexArgs[1], false);
                    rawTrainTreebank.LoadPath(new File(lexArgs[0]), trainFilt);
                    log.Info("Done reading trees.");
                    MemoryTreebank trainTreebank;
                    if (argMap.Contains("-annotate"))
                    {
                        trainTreebank = new MemoryTreebank();
                        TreeAnnotator annotator = new TreeAnnotator(ctpp.HeadFinder(), ctpp, op);
                        foreach (Tree tree in rawTrainTreebank)
                        {
                            tree = annotator.TransformTree(tree);
                            trainTreebank.Add(tree);
                        }
                        log.Info("Done annotating trees.");
                    }
                    else
                    {
                        trainTreebank = rawTrainTreebank;
                    }
                    lex.InitializeTraining(trainTreebank.Count);
                    lex.Train(trainTreebank);
                    lex.FinishTraining();
                    log.Info("Done training lexicon.");
                    if (lexArgs.Length == 3)
                    {
                        string filename = lexArgs.Length == 3 ? lexArgs[2] : "parsers/chineseCharLex.ser.gz";
                        log.Info("Writing lexicon in serialized format to file " + filename + " ");
                        System.Console.Error.Flush();
                        ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                        @out.WriteObject(lex);
                        @out.Close();
                        log.Info("done.");
                    }
                }
                else
                {
                    string lexFile = lexArgs.Length == 1 ? lexArgs[0] : "parsers/chineseCharLex.ser.gz";
                    log.Info("Reading Lexicon from file " + lexFile);
                    ObjectInputStream @in = IOUtils.ReadStreamFromString(lexFile);
                    try
                    {
                        lex = (ILexicon)@in.ReadObject();
                    }
                    catch (TypeLoadException)
                    {
                        throw new Exception("Bad serialized file: " + lexFile);
                    }
                    @in.Close();
                }
            }
            if (argMap.Contains("-test"))
            {
                bool segmentWords = ctpp.segment;
                bool parse        = lp != null;
                System.Diagnostics.Debug.Assert((parse || segmentWords));
                //      WordCatConstituent.collinizeWords = argMap.containsKey("-collinizeWords");
                //      WordCatConstituent.collinizeTags = argMap.containsKey("-collinizeTags");
                IWordSegmenter seg = null;
                if (segmentWords)
                {
                    seg = (IWordSegmenter)lex;
                }
                string[]       testArgs     = (argMap["-test"]);
                MemoryTreebank testTreebank = op.tlpParams.MemoryTreebank();
                IFileFilter    testFilt     = new NumberRangesFileFilter(testArgs[1], false);
                testTreebank.LoadPath(new File(testArgs[0]), testFilt);
                ITreeTransformer          subcategoryStripper = op.tlpParams.SubcategoryStripper();
                ITreeTransformer          collinizer          = ctpp.Collinizer();
                WordCatEquivalenceClasser eqclass             = new WordCatEquivalenceClasser();
                WordCatEqualityChecker    eqcheck             = new WordCatEqualityChecker();
                EquivalenceClassEval      basicEval           = new EquivalenceClassEval(eqclass, eqcheck, "basic");
                EquivalenceClassEval      collinsEval         = new EquivalenceClassEval(eqclass, eqcheck, "collinized");
                IList <string>            evalTypes           = new List <string>(3);
                bool goodPOS = false;
                if (segmentWords)
                {
                    evalTypes.Add(WordCatConstituent.wordType);
                    if (ctpp.segmentMarkov && !parse)
                    {
                        evalTypes.Add(WordCatConstituent.tagType);
                        goodPOS = true;
                    }
                }
                if (parse)
                {
                    evalTypes.Add(WordCatConstituent.tagType);
                    evalTypes.Add(WordCatConstituent.catType);
                    if (combo)
                    {
                        evalTypes.Add(WordCatConstituent.wordType);
                        goodPOS = true;
                    }
                }
                TreeToBracketProcessor proc = new TreeToBracketProcessor(evalTypes);
                log.Info("Testing...");
                foreach (Tree goldTop in testTreebank)
                {
                    Tree             gold         = goldTop.FirstChild();
                    IList <IHasWord> goldSentence = gold.YieldHasWord();
                    if (goldSentence.Count > maxLength)
                    {
                        log.Info("Skipping sentence; too long: " + goldSentence.Count);
                        continue;
                    }
                    else
                    {
                        log.Info("Processing sentence; length: " + goldSentence.Count);
                    }
                    IList <IHasWord> s;
                    if (segmentWords)
                    {
                        StringBuilder goldCharBuf = new StringBuilder();
                        foreach (IHasWord aGoldSentence in goldSentence)
                        {
                            StringLabel word = (StringLabel)aGoldSentence;
                            goldCharBuf.Append(word.Value());
                        }
                        string goldChars = goldCharBuf.ToString();
                        s = seg.Segment(goldChars);
                    }
                    else
                    {
                        s = goldSentence;
                    }
                    Tree tree;
                    if (parse)
                    {
                        tree = lp.ParseTree(s);
                        if (tree == null)
                        {
                            throw new Exception("PARSER RETURNED NULL!!!");
                        }
                    }
                    else
                    {
                        tree = Edu.Stanford.Nlp.Trees.Trees.ToFlatTree(s);
                        tree = subcategoryStripper.TransformTree(tree);
                    }
                    if (pw != null)
                    {
                        if (parse)
                        {
                            tree.PennPrint(pw);
                        }
                        else
                        {
                            IEnumerator sentIter = s.GetEnumerator();
                            for (; ;)
                            {
                                Word word = (Word)sentIter.Current;
                                pw.Print(word.Word());
                                if (sentIter.MoveNext())
                                {
                                    pw.Print(" ");
                                }
                                else
                                {
                                    break;
                                }
                            }
                        }
                        pw.Println();
                    }
                    if (eval)
                    {
                        ICollection ourBrackets;
                        ICollection goldBrackets;
                        ourBrackets  = proc.AllBrackets(tree);
                        goldBrackets = proc.AllBrackets(gold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(tree, gold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(gold, tree));
                        }
                        basicEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nScores:");
                        basicEval.DisplayLast();
                        Tree collinsTree = collinizer.TransformTree(tree);
                        Tree collinsGold = collinizer.TransformTree(gold);
                        ourBrackets  = proc.AllBrackets(collinsTree);
                        goldBrackets = proc.AllBrackets(collinsGold);
                        if (goodPOS)
                        {
                            Sharpen.Collections.AddAll(ourBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsTree, collinsGold));
                            Sharpen.Collections.AddAll(goldBrackets, TreeToBracketProcessor.CommonWordTagTypeBrackets(collinsGold, collinsTree));
                        }
                        collinsEval.Eval(ourBrackets, goldBrackets);
                        System.Console.Out.WriteLine("\nCollinized scores:");
                        collinsEval.DisplayLast();
                        System.Console.Out.WriteLine();
                    }
                }
                if (eval)
                {
                    basicEval.Display();
                    System.Console.Out.WriteLine();
                    collinsEval.Display();
                }
            }
        }
        /// <summary>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// </summary>
        /// <remarks>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// <p>
        /// Command-line options:<br />
        /// <c>-treeFile</c>
        /// : File with PTB-formatted constituency trees<br />
        /// <c>-conlluFile</c>
        /// : File with basic dependency trees in CoNLL-U format<br />
        /// <c>-outputRepresentation</c>
        /// : "basic" (default), "enhanced", or "enhanced++"
        /// </remarks>
        public static void Main(string[] args)
        {
            Properties props                = StringUtils.ArgsToProperties(args);
            string     treeFileName         = props.GetProperty("treeFile");
            string     conlluFileName       = props.GetProperty("conlluFile");
            string     outputRepresentation = props.GetProperty("outputRepresentation", "basic");
            IEnumerator <SemanticGraph> sgIterator;

            // = null;
            if (treeFileName != null)
            {
                MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
                tb.LoadPath(treeFileName);
                IEnumerator <Tree> treeIterator = tb.GetEnumerator();
                sgIterator = new UniversalDependenciesConverter.TreeToSemanticGraphIterator(treeIterator);
            }
            else
            {
                if (conlluFileName != null)
                {
                    CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
                    try
                    {
                        sgIterator = reader.GetIterator(IOUtils.ReaderFromString(conlluFileName));
                    }
                    catch (Exception e)
                    {
                        throw new Exception(e);
                    }
                }
                else
                {
                    System.Console.Error.WriteLine("No input file specified!");
                    System.Console.Error.WriteLine(string.Empty);
                    System.Console.Error.Printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", typeof(UniversalDependenciesConverter).GetCanonicalName());
                    return;
                }
            }
            CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();

            while (sgIterator.MoveNext())
            {
                SemanticGraph sg = sgIterator.Current;
                if (treeFileName != null)
                {
                    //add UPOS tags
                    Tree           tree       = ((UniversalDependenciesConverter.TreeToSemanticGraphIterator)sgIterator).GetCurrentTree();
                    Tree           uposTree   = UniversalPOSMapper.MapTree(tree);
                    IList <ILabel> uposLabels = uposTree.PreTerminalYield();
                    foreach (IndexedWord token in sg.VertexListSorted())
                    {
                        int    idx     = token.Index() - 1;
                        string uposTag = uposLabels[idx].Value();
                        token.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uposTag);
                    }
                }
                else
                {
                    AddLemmata(sg);
                    if (UseName)
                    {
                        AddNERTags(sg);
                    }
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced"))
                {
                    sg = ConvertBasicToEnhanced(sg);
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced++"))
                    {
                        sg = ConvertBasicToEnhancedPlusPlus(sg);
                    }
                }
                System.Console.Out.Write(writer.PrintSemanticGraph(sg));
            }
        }
Exemplo n.º 9
0
        /// <summary>Prints out all matches of a semgrex pattern on a file of dependencies.</summary>
        /// <remarks>
        /// Prints out all matches of a semgrex pattern on a file of dependencies.
        /// <p>
        /// Usage:<br />
        /// java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
        /// <br />
        /// See the help() function for a list of possible arguments to provide.
        /// </remarks>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            IDictionary <string, int> flagMap = Generics.NewHashMap();

            flagMap[Pattern]            = 1;
            flagMap[TreeFile]           = 1;
            flagMap[Mode]               = 1;
            flagMap[Extras]             = 1;
            flagMap[ConlluFile]         = 1;
            flagMap[OutputFormatOption] = 1;
            IDictionary <string, string[]> argsMap = StringUtils.ArgsToMap(args, flagMap);

            // args = argsMap.get(null);
            // TODO: allow patterns to be extracted from a file
            if (!(argsMap.Contains(Pattern)) || argsMap[Pattern].Length == 0)
            {
                Help();
                System.Environment.Exit(2);
            }
            Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern semgrex = Edu.Stanford.Nlp.Semgraph.Semgrex.SemgrexPattern.Compile(argsMap[Pattern][0]);
            string modeString = DefaultMode;

            if (argsMap.Contains(Mode) && argsMap[Mode].Length > 0)
            {
                modeString = argsMap[Mode][0].ToUpper();
            }
            SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.ValueOf(modeString);
            string outputFormatString      = DefaultOutputFormat;

            if (argsMap.Contains(OutputFormatOption) && argsMap[OutputFormatOption].Length > 0)
            {
                outputFormatString = argsMap[OutputFormatOption][0].ToUpper();
            }
            SemgrexPattern.OutputFormat outputFormat = SemgrexPattern.OutputFormat.ValueOf(outputFormatString);
            bool useExtras = true;

            if (argsMap.Contains(Extras) && argsMap[Extras].Length > 0)
            {
                useExtras = bool.ValueOf(argsMap[Extras][0]);
            }
            IList <SemanticGraph> graphs = Generics.NewArrayList();

            // TODO: allow other sources of graphs, such as dependency files
            if (argsMap.Contains(TreeFile) && argsMap[TreeFile].Length > 0)
            {
                foreach (string treeFile in argsMap[TreeFile])
                {
                    log.Info("Loading file " + treeFile);
                    MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
                    treebank.LoadPath(treeFile);
                    foreach (Tree tree in treebank)
                    {
                        // TODO: allow other languages... this defaults to English
                        SemanticGraph graph = SemanticGraphFactory.MakeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.Maximal : GrammaticalStructure.Extras.None);
                        graphs.Add(graph);
                    }
                }
            }
            if (argsMap.Contains(ConlluFile) && argsMap[ConlluFile].Length > 0)
            {
                CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
                foreach (string conlluFile in argsMap[ConlluFile])
                {
                    log.Info("Loading file " + conlluFile);
                    IEnumerator <SemanticGraph> it = reader.GetIterator(IOUtils.ReaderFromString(conlluFile));
                    while (it.MoveNext())
                    {
                        SemanticGraph graph = it.Current;
                        graphs.Add(graph);
                    }
                }
            }
            foreach (SemanticGraph graph_1 in graphs)
            {
                SemgrexMatcher matcher = semgrex.Matcher(graph_1);
                if (!matcher.Find())
                {
                    continue;
                }
                if (outputFormat == SemgrexPattern.OutputFormat.List)
                {
                    log.Info("Matched graph:" + Runtime.LineSeparator() + graph_1.ToString(SemanticGraph.OutputFormat.List));
                    int  i     = 1;
                    bool found = true;
                    while (found)
                    {
                        log.Info("Match " + i + " at: " + matcher.GetMatch().ToString(CoreLabel.OutputFormat.ValueIndex));
                        IList <string> nodeNames = Generics.NewArrayList();
                        Sharpen.Collections.AddAll(nodeNames, matcher.GetNodeNames());
                        nodeNames.Sort();
                        foreach (string name in nodeNames)
                        {
                            log.Info("  " + name + ": " + matcher.GetNode(name).ToString(CoreLabel.OutputFormat.ValueIndex));
                        }
                        log.Info(" ");
                        found = matcher.Find();
                    }
                }
                else
                {
                    if (outputFormat == SemgrexPattern.OutputFormat.Offset)
                    {
                        if (graph_1.VertexListSorted().IsEmpty())
                        {
                            continue;
                        }
                        System.Console.Out.Printf("+%d %s%n", graph_1.VertexListSorted()[0].Get(typeof(CoreAnnotations.LineNumberAnnotation)), argsMap[ConlluFile][0]);
                    }
                }
            }
        }
Exemplo n.º 10
0
        /* some documentation for Roger's convenience
         * {pcfg,dep,combo}{PE,DE,TE} are precision/dep/tagging evals for the models
         *
         * parser is the PCFG parser
         * dparser is the dependency parser
         * bparser is the combining parser
         *
         * during testing:
         * tree is the test tree (gold tree)
         * binaryTree is the gold tree binarized
         * tree2b is the best PCFG paser, binarized
         * tree2 is the best PCFG parse (debinarized)
         * tree3 is the dependency parse, binarized
         * tree3db is the dependency parser, debinarized
         * tree4 is the best combo parse, binarized and then debinarized
         * tree4b is the best combo parse, binarized
         */
        public static void Main(string[] args)
        {
            Options op = new Options(new EnglishTreebankParserParams());

            // op.tlpParams may be changed to something else later, so don't use it till
            // after options are parsed.
            StringUtils.LogInvocationString(log, args);
            string path          = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
            int    trainLow      = 200;
            int    trainHigh     = 2199;
            int    testLow       = 2200;
            int    testHigh      = 2219;
            string serializeFile = null;
            int    i             = 0;

            while (i < args.Length && args[i].StartsWith("-"))
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-path") && (i + 1 < args.Length))
                {
                    path = args[i + 1];
                    i   += 2;
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-train") && (i + 2 < args.Length))
                    {
                        trainLow  = System.Convert.ToInt32(args[i + 1]);
                        trainHigh = System.Convert.ToInt32(args[i + 2]);
                        i        += 3;
                    }
                    else
                    {
                        if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-test") && (i + 2 < args.Length))
                        {
                            testLow  = System.Convert.ToInt32(args[i + 1]);
                            testHigh = System.Convert.ToInt32(args[i + 2]);
                            i       += 3;
                        }
                        else
                        {
                            if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-serialize") && (i + 1 < args.Length))
                            {
                                serializeFile = args[i + 1];
                                i            += 2;
                            }
                            else
                            {
                                if (Sharpen.Runtime.EqualsIgnoreCase(args[i], "-tLPP") && (i + 1 < args.Length))
                                {
                                    try
                                    {
                                        op.tlpParams = (ITreebankLangParserParams)System.Activator.CreateInstance(Sharpen.Runtime.GetType(args[i + 1]));
                                    }
                                    catch (TypeLoadException e)
                                    {
                                        log.Info("Class not found: " + args[i + 1]);
                                        throw new Exception(e);
                                    }
                                    catch (InstantiationException e)
                                    {
                                        log.Info("Couldn't instantiate: " + args[i + 1] + ": " + e.ToString());
                                        throw new Exception(e);
                                    }
                                    catch (MemberAccessException e)
                                    {
                                        log.Info("illegal access" + e);
                                        throw new Exception(e);
                                    }
                                    i += 2;
                                }
                                else
                                {
                                    if (args[i].Equals("-encoding"))
                                    {
                                        // sets encoding for TreebankLangParserParams
                                        op.tlpParams.SetInputEncoding(args[i + 1]);
                                        op.tlpParams.SetOutputEncoding(args[i + 1]);
                                        i += 2;
                                    }
                                    else
                                    {
                                        i = op.SetOptionOrWarn(args, i);
                                    }
                                }
                            }
                        }
                    }
                }
            }
            // System.out.println(tlpParams.getClass());
            ITreebankLanguagePack tlp = op.tlpParams.TreebankLanguagePack();

            op.trainOptions.sisterSplitters = Generics.NewHashSet(Arrays.AsList(op.tlpParams.SisterSplitters()));
            //    BinarizerFactory.TreeAnnotator.setTreebankLang(tlpParams);
            PrintWriter pw = op.tlpParams.Pw();

            op.testOptions.Display();
            op.trainOptions.Display();
            op.Display();
            op.tlpParams.Display();
            // setup tree transforms
            Treebank       trainTreebank = op.tlpParams.MemoryTreebank();
            MemoryTreebank testTreebank  = op.tlpParams.TestMemoryTreebank();

            // Treebank blippTreebank = ((EnglishTreebankParserParams) tlpParams).diskTreebank();
            // String blippPath = "/afs/ir.stanford.edu/data/linguistic-data/BLLIP-WSJ/";
            // blippTreebank.loadPath(blippPath, "", true);
            Timing.StartTime();
            log.Info("Reading trees...");
            testTreebank.LoadPath(path, new NumberRangeFileFilter(testLow, testHigh, true));
            if (op.testOptions.increasingLength)
            {
                testTreebank.Sort(new TreeLengthComparator());
            }
            trainTreebank.LoadPath(path, new NumberRangeFileFilter(trainLow, trainHigh, true));
            Timing.Tick("done.");
            log.Info("Binarizing trees...");
            TreeAnnotatorAndBinarizer binarizer;

            if (!op.trainOptions.leftToRight)
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            else
            {
                binarizer = new TreeAnnotatorAndBinarizer(op.tlpParams.HeadFinder(), new LeftHeadFinder(), op.tlpParams, op.forceCNF, !op.trainOptions.OutsideFactor(), true, op);
            }
            CollinsPuncTransformer collinsPuncTransformer = null;

            if (op.trainOptions.collinsPunc)
            {
                collinsPuncTransformer = new CollinsPuncTransformer(tlp);
            }
            ITreeTransformer debinarizer      = new Debinarizer(op.forceCNF);
            IList <Tree>     binaryTrainTrees = new List <Tree>();

            if (op.trainOptions.selectiveSplit)
            {
                op.trainOptions.splitters = ParentAnnotationStats.GetSplitCategories(trainTreebank, op.trainOptions.tagSelectiveSplit, 0, op.trainOptions.selectiveSplitCutOff, op.trainOptions.tagSelectiveSplitCutOff, op.tlpParams.TreebankLanguagePack());
                if (op.trainOptions.deleteSplitters != null)
                {
                    IList <string> deleted = new List <string>();
                    foreach (string del in op.trainOptions.deleteSplitters)
                    {
                        string baseDel    = tlp.BasicCategory(del);
                        bool   checkBasic = del.Equals(baseDel);
                        for (IEnumerator <string> it = op.trainOptions.splitters.GetEnumerator(); it.MoveNext();)
                        {
                            string elem     = it.Current;
                            string baseElem = tlp.BasicCategory(elem);
                            bool   delStr   = checkBasic && baseElem.Equals(baseDel) || elem.Equals(del);
                            if (delStr)
                            {
                                it.Remove();
                                deleted.Add(elem);
                            }
                        }
                    }
                    log.Info("Removed from vertical splitters: " + deleted);
                }
            }
            if (op.trainOptions.selectivePostSplit)
            {
                ITreeTransformer myTransformer = new TreeAnnotator(op.tlpParams.HeadFinder(), op.tlpParams, op);
                Treebank         annotatedTB   = trainTreebank.Transform(myTransformer);
                op.trainOptions.postSplitters = ParentAnnotationStats.GetSplitCategories(annotatedTB, true, 0, op.trainOptions.selectivePostSplitCutOff, op.trainOptions.tagSelectivePostSplitCutOff, op.tlpParams.TreebankLanguagePack());
            }
            if (op.trainOptions.hSelSplit)
            {
                binarizer.SetDoSelectiveSplit(false);
                foreach (Tree tree in trainTreebank)
                {
                    if (op.trainOptions.collinsPunc)
                    {
                        tree = collinsPuncTransformer.TransformTree(tree);
                    }
                    //tree.pennPrint(tlpParams.pw());
                    tree = binarizer.TransformTree(tree);
                }
                //binaryTrainTrees.add(tree);
                binarizer.SetDoSelectiveSplit(true);
            }
            foreach (Tree tree_1 in trainTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_1 = collinsPuncTransformer.TransformTree(tree_1);
                }
                tree_1 = binarizer.TransformTree(tree_1);
                binaryTrainTrees.Add(tree_1);
            }
            if (op.testOptions.verbose)
            {
                binarizer.DumpStats();
            }
            IList <Tree> binaryTestTrees = new List <Tree>();

            foreach (Tree tree_2 in testTreebank)
            {
                if (op.trainOptions.collinsPunc)
                {
                    tree_2 = collinsPuncTransformer.TransformTree(tree_2);
                }
                tree_2 = binarizer.TransformTree(tree_2);
                binaryTestTrees.Add(tree_2);
            }
            Timing.Tick("done.");
            // binarization
            BinaryGrammar      bg = null;
            UnaryGrammar       ug = null;
            IDependencyGrammar dg = null;
            // DependencyGrammar dgBLIPP = null;
            ILexicon        lex        = null;
            IIndex <string> stateIndex = new HashIndex <string>();
            // extract grammars
            IExtractor <Pair <UnaryGrammar, BinaryGrammar> > bgExtractor = new BinaryGrammarExtractor(op, stateIndex);

            //Extractor bgExtractor = new SmoothedBinaryGrammarExtractor();//new BinaryGrammarExtractor();
            // Extractor lexExtractor = new LexiconExtractor();
            //Extractor dgExtractor = new DependencyMemGrammarExtractor();
            if (op.doPCFG)
            {
                log.Info("Extracting PCFG...");
                Pair <UnaryGrammar, BinaryGrammar> bgug = null;
                if (op.trainOptions.cheatPCFG)
                {
                    IList <Tree> allTrees = new List <Tree>(binaryTrainTrees);
                    Sharpen.Collections.AddAll(allTrees, binaryTestTrees);
                    bgug = bgExtractor.Extract(allTrees);
                }
                else
                {
                    bgug = bgExtractor.Extract(binaryTrainTrees);
                }
                bg = bgug.second;
                bg.SplitRules();
                ug = bgug.first;
                ug.PurgeRules();
                Timing.Tick("done.");
            }
            log.Info("Extracting Lexicon...");
            IIndex <string> wordIndex = new HashIndex <string>();
            IIndex <string> tagIndex  = new HashIndex <string>();

            lex = op.tlpParams.Lex(op, wordIndex, tagIndex);
            lex.InitializeTraining(binaryTrainTrees.Count);
            lex.Train(binaryTrainTrees);
            lex.FinishTraining();
            Timing.Tick("done.");
            if (op.doDep)
            {
                log.Info("Extracting Dependencies...");
                binaryTrainTrees.Clear();
                IExtractor <IDependencyGrammar> dgExtractor = new MLEDependencyGrammarExtractor(op, wordIndex, tagIndex);
                // dgBLIPP = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams,true));
                // DependencyGrammar dg1 = dgExtractor.extract(trainTreebank.iterator(), new TransformTreeDependency(op.tlpParams, true));
                //dgBLIPP=(DependencyGrammar)dgExtractor.extract(blippTreebank.iterator(),new TransformTreeDependency(tlpParams));
                //dg = (DependencyGrammar) dgExtractor.extract(new ConcatenationIterator(trainTreebank.iterator(),blippTreebank.iterator()),new TransformTreeDependency(tlpParams));
                // dg=new DependencyGrammarCombination(dg1,dgBLIPP,2);
                dg = dgExtractor.Extract(binaryTrainTrees);
                //uses information whether the words are known or not, discards unknown words
                Timing.Tick("done.");
                //System.out.print("Extracting Unknown Word Model...");
                //UnknownWordModel uwm = (UnknownWordModel)uwmExtractor.extract(binaryTrainTrees);
                //Timing.tick("done.");
                System.Console.Out.Write("Tuning Dependency Model...");
                dg.Tune(binaryTestTrees);
                //System.out.println("TUNE DEPS: "+tuneDeps);
                Timing.Tick("done.");
            }
            BinaryGrammar      boundBG = bg;
            UnaryGrammar       boundUG = ug;
            IGrammarProjection gp      = new NullGrammarProjection(bg, ug);

            // serialization
            if (serializeFile != null)
            {
                log.Info("Serializing parser...");
                LexicalizedParser parser = new LexicalizedParser(lex, bg, ug, dg, stateIndex, wordIndex, tagIndex, op);
                parser.SaveParserToSerialized(serializeFile);
                Timing.Tick("done.");
            }
            // test: pcfg-parse and output
            ExhaustivePCFGParser parser_1 = null;

            if (op.doPCFG)
            {
                parser_1 = new ExhaustivePCFGParser(boundBG, boundUG, lex, op, stateIndex, wordIndex, tagIndex);
            }
            ExhaustiveDependencyParser dparser = ((op.doDep && !op.testOptions.useFastFactored) ? new ExhaustiveDependencyParser(dg, lex, op, wordIndex, tagIndex) : null);
            IScorer scorer = (op.doPCFG ? new TwinScorer(new ProjectionScorer(parser_1, gp, op), dparser) : null);
            //Scorer scorer = parser;
            BiLexPCFGParser bparser = null;

            if (op.doPCFG && op.doDep)
            {
                bparser = (op.testOptions.useN5) ? new BiLexPCFGParser.N5BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex, wordIndex, tagIndex) : new BiLexPCFGParser(scorer, parser_1, dparser, bg, ug, dg, lex, op, gp, stateIndex
                                                                                                                                                                                                    , wordIndex, tagIndex);
            }
            Evalb        pcfgPE         = new Evalb("pcfg  PE", true);
            Evalb        comboPE        = new Evalb("combo PE", true);
            AbstractEval pcfgCB         = new Evalb.CBEval("pcfg  CB", true);
            AbstractEval pcfgTE         = new TaggingEval("pcfg  TE");
            AbstractEval comboTE        = new TaggingEval("combo TE");
            AbstractEval pcfgTEnoPunct  = new TaggingEval("pcfg nopunct TE");
            AbstractEval comboTEnoPunct = new TaggingEval("combo nopunct TE");
            AbstractEval depTE          = new TaggingEval("depnd TE");
            AbstractEval depDE          = new UnlabeledAttachmentEval("depnd DE", true, null, tlp.PunctuationWordRejectFilter());
            AbstractEval comboDE        = new UnlabeledAttachmentEval("combo DE", true, null, tlp.PunctuationWordRejectFilter());

            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.InitEVALBfiles(op.tlpParams);
            }
            // int[] countByLength = new int[op.testOptions.maxLength+1];
            // Use a reflection ruse, so one can run this without needing the
            // tagger.  Using a function rather than a MaxentTagger means we
            // can distribute a version of the parser that doesn't include the
            // entire tagger.
            IFunction <IList <IHasWord>, List <TaggedWord> > tagger = null;

            if (op.testOptions.preTag)
            {
                try
                {
                    Type[]   argsClass = new Type[] { typeof(string) };
                    object[] arguments = new object[] { op.testOptions.taggerSerializedFile };
                    tagger = (IFunction <IList <IHasWord>, List <TaggedWord> >)Sharpen.Runtime.GetType("edu.stanford.nlp.tagger.maxent.MaxentTagger").GetConstructor(argsClass).NewInstance(arguments);
                }
                catch (Exception e)
                {
                    log.Info(e);
                    log.Info("Warning: No pretagging of sentences will be done.");
                }
            }
            for (int tNum = 0; tNum < ttSize; tNum++)
            {
                Tree tree        = testTreebank[tNum];
                int  testTreeLen = tree_2.Yield().Count;
                if (testTreeLen > op.testOptions.maxLength)
                {
                    continue;
                }
                Tree binaryTree = binaryTestTrees[tNum];
                // countByLength[testTreeLen]++;
                System.Console.Out.WriteLine("-------------------------------------");
                System.Console.Out.WriteLine("Number: " + (tNum + 1));
                System.Console.Out.WriteLine("Length: " + testTreeLen);
                //tree.pennPrint(pw);
                // System.out.println("XXXX The binary tree is");
                // binaryTree.pennPrint(pw);
                //System.out.println("Here are the tags in the lexicon:");
                //System.out.println(lex.showTags());
                //System.out.println("Here's the tagnumberer:");
                //System.out.println(Numberer.getGlobalNumberer("tags").toString());
                long timeMil1 = Runtime.CurrentTimeMillis();
                Timing.Tick("Starting parse.");
                if (op.doPCFG)
                {
                    //log.info(op.testOptions.forceTags);
                    if (op.testOptions.forceTags)
                    {
                        if (tagger != null)
                        {
                            //System.out.println("Using a tagger to set tags");
                            //System.out.println("Tagged sentence as: " + tagger.processSentence(cutLast(wordify(binaryTree.yield()))).toString(false));
                            parser_1.Parse(AddLast(tagger.Apply(CutLast(Wordify(binaryTree.Yield())))));
                        }
                        else
                        {
                            //System.out.println("Forcing tags to match input.");
                            parser_1.Parse(CleanTags(binaryTree.TaggedYield(), tlp));
                        }
                    }
                    else
                    {
                        // System.out.println("XXXX Parsing " + binaryTree.yield());
                        parser_1.Parse(binaryTree.YieldHasWord());
                    }
                }
                //Timing.tick("Done with pcfg phase.");
                if (op.doDep)
                {
                    dparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with dependency phase.");
                bool bothPassed = false;
                if (op.doPCFG && op.doDep)
                {
                    bothPassed = bparser.Parse(binaryTree.YieldHasWord());
                }
                //Timing.tick("Done with combination phase.");
                long timeMil2 = Runtime.CurrentTimeMillis();
                long elapsed  = timeMil2 - timeMil1;
                log.Info("Time: " + ((int)(elapsed / 100)) / 10.00 + " sec.");
                //System.out.println("PCFG Best Parse:");
                Tree tree2b = null;
                Tree tree2  = null;
                //System.out.println("Got full best parse...");
                if (op.doPCFG)
                {
                    tree2b = parser_1.GetBestParse();
                    tree2  = debinarizer.TransformTree(tree2b);
                }
                //System.out.println("Debinarized parse...");
                //tree2.pennPrint();
                //System.out.println("DepG Best Parse:");
                Tree tree3   = null;
                Tree tree3db = null;
                if (op.doDep)
                {
                    tree3 = dparser.GetBestParse();
                    // was: but wrong Tree tree3db = debinarizer.transformTree(tree2);
                    tree3db = debinarizer.TransformTree(tree3);
                    tree3.PennPrint(pw);
                }
                //tree.pennPrint();
                //((Tree)binaryTrainTrees.get(tNum)).pennPrint();
                //System.out.println("Combo Best Parse:");
                Tree tree4 = null;
                if (op.doPCFG && op.doDep)
                {
                    try
                    {
                        tree4 = bparser.GetBestParse();
                        if (tree4 == null)
                        {
                            tree4 = tree2b;
                        }
                    }
                    catch (ArgumentNullException)
                    {
                        log.Info("Blocked, using PCFG parse!");
                        tree4 = tree2b;
                    }
                }
                if (op.doPCFG && !bothPassed)
                {
                    tree4 = tree2b;
                }
                //tree4.pennPrint();
                if (op.doDep)
                {
                    depDE.Evaluate(tree3, binaryTree, pw);
                    depTE.Evaluate(tree3db, tree_2, pw);
                }
                ITreeTransformer tc      = op.tlpParams.Collinizer();
                ITreeTransformer tcEvalb = op.tlpParams.CollinizerEvalb();
                if (op.doPCFG)
                {
                    // System.out.println("XXXX Best PCFG was: ");
                    // tree2.pennPrint();
                    // System.out.println("XXXX Transformed best PCFG is: ");
                    // tc.transformTree(tree2).pennPrint();
                    //System.out.println("True Best Parse:");
                    //tree.pennPrint();
                    //tc.transformTree(tree).pennPrint();
                    pcfgPE.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    pcfgCB.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    Tree tree4b = null;
                    if (op.doDep)
                    {
                        comboDE.Evaluate((bothPassed ? tree4 : tree3), binaryTree, pw);
                        tree4b = tree4;
                        tree4  = debinarizer.TransformTree(tree4);
                        if (op.nodePrune)
                        {
                            NodePruner np = new NodePruner(parser_1, debinarizer);
                            tree4 = np.Prune(tree4);
                        }
                        //tree4.pennPrint();
                        comboPE.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    //pcfgTE.evaluate(tree2, tree);
                    pcfgTE.Evaluate(tcEvalb.TransformTree(tree2), tcEvalb.TransformTree(tree_2), pw);
                    pcfgTEnoPunct.Evaluate(tc.TransformTree(tree2), tc.TransformTree(tree_2), pw);
                    if (op.doDep)
                    {
                        comboTE.Evaluate(tcEvalb.TransformTree(tree4), tcEvalb.TransformTree(tree_2), pw);
                        comboTEnoPunct.Evaluate(tc.TransformTree(tree4), tc.TransformTree(tree_2), pw);
                    }
                    System.Console.Out.WriteLine("PCFG only: " + parser_1.ScoreBinarizedTree(tree2b, 0));
                    //tc.transformTree(tree2).pennPrint();
                    tree2.PennPrint(pw);
                    if (op.doDep)
                    {
                        System.Console.Out.WriteLine("Combo: " + parser_1.ScoreBinarizedTree(tree4b, 0));
                        // tc.transformTree(tree4).pennPrint(pw);
                        tree4.PennPrint(pw);
                    }
                    System.Console.Out.WriteLine("Correct:" + parser_1.ScoreBinarizedTree(binaryTree, 0));

                    /*
                     * if (parser.scoreBinarizedTree(tree2b,true) < parser.scoreBinarizedTree(binaryTree,true)) {
                     * System.out.println("SCORE INVERSION");
                     * parser.validateBinarizedTree(binaryTree,0);
                     * }
                     */
                    tree_2.PennPrint(pw);
                }
                // end if doPCFG
                if (op.testOptions.evalb)
                {
                    if (op.doPCFG && op.doDep)
                    {
                        EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree4));
                    }
                    else
                    {
                        if (op.doPCFG)
                        {
                            EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree2));
                        }
                        else
                        {
                            if (op.doDep)
                            {
                                EvalbFormatWriter.WriteEVALBline(tcEvalb.TransformTree(tree_2), tcEvalb.TransformTree(tree3db));
                            }
                        }
                    }
                }
            }
            // end for each tree in test treebank
            if (op.testOptions.evalb)
            {
                EvalbFormatWriter.CloseEVALBfiles();
            }
            // op.testOptions.display();
            if (op.doPCFG)
            {
                pcfgPE.Display(false, pw);
                System.Console.Out.WriteLine("Grammar size: " + stateIndex.Size());
                pcfgCB.Display(false, pw);
                if (op.doDep)
                {
                    comboPE.Display(false, pw);
                }
                pcfgTE.Display(false, pw);
                pcfgTEnoPunct.Display(false, pw);
                if (op.doDep)
                {
                    comboTE.Display(false, pw);
                    comboTEnoPunct.Display(false, pw);
                }
            }
            if (op.doDep)
            {
                depTE.Display(false, pw);
                depDE.Display(false, pw);
            }
            if (op.doPCFG && op.doDep)
            {
                comboDE.Display(false, pw);
            }
        }
        // main method only
        public static void Main(string[] args)
        {
            Treebank   tb           = new MemoryTreebank();
            Properties props        = StringUtils.ArgsToProperties(args);
            string     treeFileName = props.GetProperty("treeFile");
            string     sentFileName = props.GetProperty("sentFile");
            string     testGraph    = props.GetProperty("testGraph");

            if (testGraph == null)
            {
                testGraph = "false";
            }
            string load = props.GetProperty("load");
            string save = props.GetProperty("save");

            if (load != null)
            {
                log.Info("Load not implemented!");
                return;
            }
            if (sentFileName == null && treeFileName == null)
            {
                log.Info("Usage: java SemanticGraph [-sentFile file|-treeFile file] [-testGraph]");
                Tree t = Tree.ValueOf("(ROOT (S (NP (NP (DT An) (NN attempt)) (PP (IN on) (NP (NP (NNP Andres) (NNP Pastrana) (POS 's)) (NN life)))) (VP (VBD was) (VP (VBN carried) (PP (IN out) (S (VP (VBG using) (NP (DT a) (JJ powerful) (NN bomb))))))) (. .)))"
                                      );
                tb.Add(t);
            }
            else
            {
                if (treeFileName != null)
                {
                    tb.LoadPath(treeFileName);
                }
                else
                {
                    string[]          options = new string[] { "-retainNPTmpSubcategories" };
                    LexicalizedParser lp      = ((LexicalizedParser)LexicalizedParser.LoadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options));
                    BufferedReader    reader  = null;
                    try
                    {
                        reader = IOUtils.ReaderFromString(sentFileName);
                    }
                    catch (IOException e)
                    {
                        throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
                    }
                    try
                    {
                        System.Console.Out.WriteLine("Processing sentence file " + sentFileName);
                        for (string line; (line = reader.ReadLine()) != null;)
                        {
                            System.Console.Out.WriteLine("Processing sentence: " + line);
                            PTBTokenizer <Word> ptb   = PTBTokenizer.NewPTBTokenizer(new StringReader(line));
                            IList <Word>        words = ptb.Tokenize();
                            Tree parseTree            = lp.ParseTree(words);
                            tb.Add(parseTree);
                        }
                        reader.Close();
                    }
                    catch (Exception e)
                    {
                        throw new Exception("Exception reading key file " + sentFileName, e);
                    }
                }
            }
            foreach (Tree t_1 in tb)
            {
                SemanticGraph sg = SemanticGraphFactory.GenerateUncollapsedDependencies(t_1);
                System.Console.Out.WriteLine(sg.ToString());
                System.Console.Out.WriteLine(sg.ToCompactString());
                if (testGraph.Equals("true"))
                {
                    SemanticGraph g1 = SemanticGraphFactory.GenerateCollapsedDependencies(t_1);
                    System.Console.Out.WriteLine("TEST SEMANTIC GRAPH - graph ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString());
                    System.Console.Out.WriteLine("readable ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString(SemanticGraph.OutputFormat.Readable));
                    System.Console.Out.WriteLine("List of dependencies ----------------------------");
                    System.Console.Out.WriteLine(g1.ToList());
                    System.Console.Out.WriteLine("xml ----------------------------");
                    System.Console.Out.WriteLine(g1.ToString(SemanticGraph.OutputFormat.Xml));
                    System.Console.Out.WriteLine("dot ----------------------------");
                    System.Console.Out.WriteLine(g1.ToDotFormat());
                    System.Console.Out.WriteLine("dot (simple) ----------------------------");
                    System.Console.Out.WriteLine(g1.ToDotFormat("Simple", CoreLabel.OutputFormat.Value));
                }
            }
            // System.out.println(" graph ----------------------------");
            // System.out.println(t.allTypedDependenciesCCProcessed(false));
            if (save != null)
            {
                log.Info("Save not implemented!");
            }
        }
        /// <summary>for testing -- CURRENTLY BROKEN!!!</summary>
        /// <param name="args">input dir and output filename</param>
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length != 3)
            {
                throw new Exception("args: treebankPath trainNums testNums");
            }
            ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();

            ctpp.charTags = true;
            // TODO: these options are getting clobbered by reading in the
            // parser object (unless it's a text file parser?)
            Options op = new Options(ctpp);

            op.doDep = false;
            op.testOptions.maxLength = 90;
            LexicalizedParser lp;

            try
            {
                IFileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
                lp = LexicalizedParser.TrainFromTreebank(args[0], trainFilt, op);
                try
                {
                    string filename = "chineseCharTagPCFG.ser.gz";
                    log.Info("Writing parser in serialized format to file " + filename + " ");
                    System.Console.Error.Flush();
                    ObjectOutputStream @out = IOUtils.WriteStreamFromString(filename);
                    @out.WriteObject(lp);
                    @out.Close();
                    log.Info("done.");
                }
                catch (IOException ioe)
                {
                    Sharpen.Runtime.PrintStackTrace(ioe);
                }
            }
            catch (ArgumentException)
            {
                lp = LexicalizedParser.LoadModel(args[1], op);
            }
            IFileFilter    testFilt     = new NumberRangesFileFilter(args[2], false);
            MemoryTreebank testTreebank = ctpp.MemoryTreebank();

            testTreebank.LoadPath(new File(args[0]), testFilt);
            PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
            WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
            WordCatEqualityChecker    eqcheck = new WordCatEqualityChecker();
            EquivalenceClassEval      eval    = new EquivalenceClassEval(eqclass, eqcheck);

            //    System.out.println("Preterminals:" + preterminals);
            System.Console.Out.WriteLine("Testing...");
            foreach (Tree gold in testTreebank)
            {
                Tree tree;
                try
                {
                    tree = lp.ParseTree(gold.YieldHasWord());
                    if (tree == null)
                    {
                        System.Console.Out.WriteLine("Failed to parse " + gold.YieldHasWord());
                        continue;
                    }
                }
                catch (Exception e)
                {
                    Sharpen.Runtime.PrintStackTrace(e);
                    continue;
                }
                gold = gold.FirstChild();
                pw.Println(SentenceUtils.ListToString(gold.PreTerminalYield()));
                pw.Println(SentenceUtils.ListToString(gold.Yield()));
                gold.PennPrint(pw);
                pw.Println(tree.PreTerminalYield());
                pw.Println(tree.Yield());
                tree.PennPrint(pw);
                //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
                //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
                //      eval.eval(allBrackets, goldBrackets);
                eval.DisplayLast();
            }
            System.Console.Out.WriteLine();
            System.Console.Out.WriteLine();
            eval.Display();
        }