Exemplo n.º 1
0
        /// <exception cref="System.IO.IOException"/>
        public static void Main(string[] args)
        {
            if (args.Length < 2)
            {
                log.Info("Usage: ");
                log.Info("java ");
                log.Info(typeof(Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator).GetCanonicalName());
                log.Info(" CoNLL-U_file tree_file [-addUPOS -escapeParenthesis]");
                return;
            }
            string coNLLUFile   = args[0];
            string treeFile     = args[1];
            bool   addUPOS      = false;
            bool   escapeParens = false;

            for (int i = 2; i < args.Length; i++)
            {
                if (args[i].Equals("-addUPOS"))
                {
                    addUPOS = true;
                }
                else
                {
                    if (args[i].Equals("-escapeParenthesis"))
                    {
                        escapeParens = true;
                    }
                }
            }
            Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator featureAnnotator = new Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator();
            Reader r = IOUtils.ReaderFromString(coNLLUFile);
            CoNLLUDocumentReader        depReader = new CoNLLUDocumentReader();
            CoNLLUDocumentWriter        depWriter = new CoNLLUDocumentWriter();
            IEnumerator <SemanticGraph> it        = depReader.GetIterator(r);
            IEnumerator <Tree>          treeIt    = TreebankIterator(treeFile);

            while (it.MoveNext())
            {
                SemanticGraph sg = it.Current;
                Tree          t  = treeIt.Current;
                if (t == null || t.Yield().Count != sg.Size())
                {
                    StringBuilder sentenceSb = new StringBuilder();
                    foreach (IndexedWord word in sg.VertexListSorted())
                    {
                        sentenceSb.Append(word.Get(typeof(CoreAnnotations.TextAnnotation)));
                        sentenceSb.Append(' ');
                    }
                    throw new Exception("CoNLL-U file and tree file are not aligned. \n" + "Sentence: " + sentenceSb + '\n' + "Tree: " + ((t == null) ? "null" : t.PennString()));
                }
                featureAnnotator.AddFeatures(sg, t, true, addUPOS);
                System.Console.Out.Write(depWriter.PrintSemanticGraph(sg, !escapeParens));
            }
        }
        /// <summary>
        /// Tests whether reading a Semantic Graph and printing it
        /// is equal to the original input.
        /// </summary>
        private void TestSingleReadAndWrite(string input)
        {
            string clean = input.ReplaceAll("[\\t ]+", "\t");
            CoNLLUDocumentReader reader    = new CoNLLUDocumentReader();
            CoNLLUDocumentWriter writer    = new CoNLLUDocumentWriter();
            Reader stringReader            = new StringReader(clean);
            IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader);
            SemanticGraph sg     = it.Current;
            string        output = writer.PrintSemanticGraph(sg);

            NUnit.Framework.Assert.AreEqual(clean, output);
        }
        public virtual void TestExtraDependencies()
        {
            CoNLLUDocumentReader reader    = new CoNLLUDocumentReader();
            Reader stringReader            = new StringReader(ExtraDepsTestInput);
            IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader);
            SemanticGraph sg = it.Current;

            NUnit.Framework.Assert.IsNotNull(sg);
            NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext());
            NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(4), sg.GetNodeByIndex(1)));
            NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(2), sg.GetNodeByIndex(7)));
            NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(4), sg.GetNodeByIndex(7)));
        }
        public virtual void TestComment()
        {
            CoNLLUDocumentReader reader    = new CoNLLUDocumentReader();
            Reader stringReader            = new StringReader(CommentTestInput);
            IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader);
            SemanticGraph sg = it.Current;

            NUnit.Framework.Assert.IsNotNull(sg);
            NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext());
            NUnit.Framework.Assert.AreEqual("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.ToCompactString(true));
            NUnit.Framework.Assert.AreEqual(int.Parse(3), sg.GetNodeByIndex(1).Get(typeof(CoreAnnotations.LineNumberAnnotation)));
            NUnit.Framework.Assert.AreEqual(2, sg.GetComments().Count);
            NUnit.Framework.Assert.AreEqual("#comment line 1", sg.GetComments()[0]);
        }
        public virtual void TestMultiWords()
        {
            CoNLLUDocumentReader reader    = new CoNLLUDocumentReader();
            Reader stringReader            = new StringReader(MultiwordTestInput);
            IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader);
            SemanticGraph sg = it.Current;

            NUnit.Framework.Assert.IsNotNull(sg);
            NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext());
            NUnit.Framework.Assert.AreEqual("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.ToCompactString(true));
            foreach (IndexedWord iw in sg.VertexListSorted())
            {
                if (iw.Index() != 2 && iw.Index() != 3)
                {
                    NUnit.Framework.Assert.AreEqual(string.Empty, iw.OriginalText());
                }
                else
                {
                    NUnit.Framework.Assert.AreEqual("haven't", iw.OriginalText());
                }
            }
            NUnit.Framework.Assert.AreEqual(int.Parse(3), sg.GetNodeByIndex(2).Get(typeof(CoreAnnotations.LineNumberAnnotation)));
        }
        /// <summary>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// </summary>
        /// <remarks>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// <p>
        /// Command-line options:<br />
        /// <c>-treeFile</c>
        /// : File with PTB-formatted constituency trees<br />
        /// <c>-conlluFile</c>
        /// : File with basic dependency trees in CoNLL-U format<br />
        /// <c>-outputRepresentation</c>
        /// : "basic" (default), "enhanced", or "enhanced++"
        /// </remarks>
        public static void Main(string[] args)
        {
            Properties props                = StringUtils.ArgsToProperties(args);
            string     treeFileName         = props.GetProperty("treeFile");
            string     conlluFileName       = props.GetProperty("conlluFile");
            string     outputRepresentation = props.GetProperty("outputRepresentation", "basic");
            IEnumerator <SemanticGraph> sgIterator;

            // = null;
            if (treeFileName != null)
            {
                MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
                tb.LoadPath(treeFileName);
                IEnumerator <Tree> treeIterator = tb.GetEnumerator();
                sgIterator = new UniversalDependenciesConverter.TreeToSemanticGraphIterator(treeIterator);
            }
            else
            {
                if (conlluFileName != null)
                {
                    CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
                    try
                    {
                        sgIterator = reader.GetIterator(IOUtils.ReaderFromString(conlluFileName));
                    }
                    catch (Exception e)
                    {
                        throw new Exception(e);
                    }
                }
                else
                {
                    System.Console.Error.WriteLine("No input file specified!");
                    System.Console.Error.WriteLine(string.Empty);
                    System.Console.Error.Printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", typeof(UniversalDependenciesConverter).GetCanonicalName());
                    return;
                }
            }
            CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();

            while (sgIterator.MoveNext())
            {
                SemanticGraph sg = sgIterator.Current;
                if (treeFileName != null)
                {
                    //add UPOS tags
                    Tree           tree       = ((UniversalDependenciesConverter.TreeToSemanticGraphIterator)sgIterator).GetCurrentTree();
                    Tree           uposTree   = UniversalPOSMapper.MapTree(tree);
                    IList <ILabel> uposLabels = uposTree.PreTerminalYield();
                    foreach (IndexedWord token in sg.VertexListSorted())
                    {
                        int    idx     = token.Index() - 1;
                        string uposTag = uposLabels[idx].Value();
                        token.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uposTag);
                    }
                }
                else
                {
                    AddLemmata(sg);
                    if (UseName)
                    {
                        AddNERTags(sg);
                    }
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced"))
                {
                    sg = ConvertBasicToEnhanced(sg);
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced++"))
                    {
                        sg = ConvertBasicToEnhancedPlusPlus(sg);
                    }
                }
                System.Console.Out.Write(writer.PrintSemanticGraph(sg));
            }
        }