/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { if (args.Length < 2) { log.Info("Usage: "); log.Info("java "); log.Info(typeof(Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator).GetCanonicalName()); log.Info(" CoNLL-U_file tree_file [-addUPOS -escapeParenthesis]"); return; } string coNLLUFile = args[0]; string treeFile = args[1]; bool addUPOS = false; bool escapeParens = false; for (int i = 2; i < args.Length; i++) { if (args[i].Equals("-addUPOS")) { addUPOS = true; } else { if (args[i].Equals("-escapeParenthesis")) { escapeParens = true; } } } Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator featureAnnotator = new Edu.Stanford.Nlp.Trees.UD.UniversalDependenciesFeatureAnnotator(); Reader r = IOUtils.ReaderFromString(coNLLUFile); CoNLLUDocumentReader depReader = new CoNLLUDocumentReader(); CoNLLUDocumentWriter depWriter = new CoNLLUDocumentWriter(); IEnumerator <SemanticGraph> it = depReader.GetIterator(r); IEnumerator <Tree> treeIt = TreebankIterator(treeFile); while (it.MoveNext()) { SemanticGraph sg = it.Current; Tree t = treeIt.Current; if (t == null || t.Yield().Count != sg.Size()) { StringBuilder sentenceSb = new StringBuilder(); foreach (IndexedWord word in sg.VertexListSorted()) { sentenceSb.Append(word.Get(typeof(CoreAnnotations.TextAnnotation))); sentenceSb.Append(' '); } throw new Exception("CoNLL-U file and tree file are not aligned. \n" + "Sentence: " + sentenceSb + '\n' + "Tree: " + ((t == null) ? "null" : t.PennString())); } featureAnnotator.AddFeatures(sg, t, true, addUPOS); System.Console.Out.Write(depWriter.PrintSemanticGraph(sg, !escapeParens)); } }
/// <summary> /// Tests whether reading a Semantic Graph and printing it /// is equal to the original input. /// </summary> private void TestSingleReadAndWrite(string input) { string clean = input.ReplaceAll("[\\t ]+", "\t"); CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter(); Reader stringReader = new StringReader(clean); IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader); SemanticGraph sg = it.Current; string output = writer.PrintSemanticGraph(sg); NUnit.Framework.Assert.AreEqual(clean, output); }
public virtual void TestExtraDependencies() { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); Reader stringReader = new StringReader(ExtraDepsTestInput); IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader); SemanticGraph sg = it.Current; NUnit.Framework.Assert.IsNotNull(sg); NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext()); NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(4), sg.GetNodeByIndex(1))); NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(2), sg.GetNodeByIndex(7))); NUnit.Framework.Assert.IsTrue(sg.ContainsEdge(sg.GetNodeByIndex(4), sg.GetNodeByIndex(7))); }
public virtual void TestComment() { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); Reader stringReader = new StringReader(CommentTestInput); IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader); SemanticGraph sg = it.Current; NUnit.Framework.Assert.IsNotNull(sg); NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext()); NUnit.Framework.Assert.AreEqual("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.ToCompactString(true)); NUnit.Framework.Assert.AreEqual(int.Parse(3), sg.GetNodeByIndex(1).Get(typeof(CoreAnnotations.LineNumberAnnotation))); NUnit.Framework.Assert.AreEqual(2, sg.GetComments().Count); NUnit.Framework.Assert.AreEqual("#comment line 1", sg.GetComments()[0]); }
public virtual void TestMultiWords() { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); Reader stringReader = new StringReader(MultiwordTestInput); IEnumerator <SemanticGraph> it = reader.GetIterator(stringReader); SemanticGraph sg = it.Current; NUnit.Framework.Assert.IsNotNull(sg); NUnit.Framework.Assert.IsFalse("The input only contains one dependency tree.", it.MoveNext()); NUnit.Framework.Assert.AreEqual("[have/VBP nsubj>I/PRP neg>not/RB dobj>[clue/NN det>a/DT] punct>./.]", sg.ToCompactString(true)); foreach (IndexedWord iw in sg.VertexListSorted()) { if (iw.Index() != 2 && iw.Index() != 3) { NUnit.Framework.Assert.AreEqual(string.Empty, iw.OriginalText()); } else { NUnit.Framework.Assert.AreEqual("haven't", iw.OriginalText()); } } NUnit.Framework.Assert.AreEqual(int.Parse(3), sg.GetNodeByIndex(2).Get(typeof(CoreAnnotations.LineNumberAnnotation))); }
/// <summary> /// Converts a constituency tree to the English basic, enhanced, or /// enhanced++ Universal dependencies representation, or an English basic /// Universal dependencies tree to the enhanced or enhanced++ representation. /// </summary> /// <remarks> /// Converts a constituency tree to the English basic, enhanced, or /// enhanced++ Universal dependencies representation, or an English basic /// Universal dependencies tree to the enhanced or enhanced++ representation. /// <p> /// Command-line options:<br /> /// <c>-treeFile</c> /// : File with PTB-formatted constituency trees<br /> /// <c>-conlluFile</c> /// : File with basic dependency trees in CoNLL-U format<br /> /// <c>-outputRepresentation</c> /// : "basic" (default), "enhanced", or "enhanced++" /// </remarks> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); string treeFileName = props.GetProperty("treeFile"); string conlluFileName = props.GetProperty("conlluFile"); string outputRepresentation = props.GetProperty("outputRepresentation", "basic"); IEnumerator <SemanticGraph> sgIterator; // = null; if (treeFileName != null) { MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false)); tb.LoadPath(treeFileName); IEnumerator <Tree> treeIterator = tb.GetEnumerator(); sgIterator = new UniversalDependenciesConverter.TreeToSemanticGraphIterator(treeIterator); } else { if (conlluFileName != null) { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); try { sgIterator = reader.GetIterator(IOUtils.ReaderFromString(conlluFileName)); } catch (Exception e) { throw new Exception(e); } } else { System.Console.Error.WriteLine("No input file specified!"); System.Console.Error.WriteLine(string.Empty); System.Console.Error.Printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", typeof(UniversalDependenciesConverter).GetCanonicalName()); return; } } CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter(); while (sgIterator.MoveNext()) { SemanticGraph sg = sgIterator.Current; if (treeFileName != null) { //add UPOS tags Tree tree = ((UniversalDependenciesConverter.TreeToSemanticGraphIterator)sgIterator).GetCurrentTree(); Tree uposTree = UniversalPOSMapper.MapTree(tree); IList <ILabel> uposLabels = uposTree.PreTerminalYield(); foreach (IndexedWord token in sg.VertexListSorted()) { int idx = token.Index() - 1; string uposTag = uposLabels[idx].Value(); token.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uposTag); } } else { AddLemmata(sg); if (UseName) { AddNERTags(sg); } } if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced")) { sg = ConvertBasicToEnhanced(sg); } else { if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced++")) { sg = ConvertBasicToEnhancedPlusPlus(sg); } } System.Console.Out.Write(writer.PrintSemanticGraph(sg)); } }