public virtual void AddFeatures(SemanticGraph sg, Tree t, bool addLemma, bool addUPOS) { ICollection <int> imperatives = t != null?GetImperatives(t) : new HashSet <int>(); foreach (IndexedWord word in sg.VertexListSorted()) { string posTag = word.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); string token = word.Get(typeof(CoreAnnotations.TextAnnotation)); int index = word.Get(typeof(CoreAnnotations.IndexAnnotation)); Dictionary <string, string> wordFeatures = word.Get(typeof(CoreAnnotations.CoNLLUFeats)); if (wordFeatures == null) { wordFeatures = new Dictionary <string, string>(); word.Set(typeof(CoreAnnotations.CoNLLUFeats), wordFeatures); } /* Features that only depend on the word and the PTB POS tag. */ wordFeatures.PutAll(GetPOSFeatures(token, posTag)); /* Semantic graph features. */ wordFeatures.PutAll(GetGraphFeatures(sg, word)); /* Handle VBs. */ if (imperatives.Contains(index)) { /* Imperative */ wordFeatures["VerbForm"] = "Fin"; wordFeatures["Mood"] = "Imp"; } else { if (posTag.Equals("VB")) { /* Infinitive */ wordFeatures["VerbForm"] = "Inf"; } } /* Subjunctive detection too unreliable. */ //} else { // /* Present subjunctive */ // wordFeatures.put("VerbForm", "Fin"); // wordFeatures.put("Tense", "Pres"); // wordFeatures.put("Mood", "Subj"); //} string lemma = word.Get(typeof(CoreAnnotations.LemmaAnnotation)); if (addLemma && (lemma == null || lemma.Equals("_"))) { word.Set(typeof(CoreAnnotations.LemmaAnnotation), morphology.Lemma(token, posTag)); } } if (addUPOS && t != null) { t = UniversalPOSMapper.MapTree(t); IList <ILabel> uPOSTags = t.PreTerminalYield(); IList <IndexedWord> yield = sg.VertexListSorted(); // int len = yield.size(); foreach (IndexedWord word_1 in yield) { ILabel uPOSTag = uPOSTags[word_1.Index() - 1]; word_1.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uPOSTag.Value()); } } }
/// <summary> /// Converts a constituency tree to the English basic, enhanced, or /// enhanced++ Universal dependencies representation, or an English basic /// Universal dependencies tree to the enhanced or enhanced++ representation. /// </summary> /// <remarks> /// Converts a constituency tree to the English basic, enhanced, or /// enhanced++ Universal dependencies representation, or an English basic /// Universal dependencies tree to the enhanced or enhanced++ representation. /// <p> /// Command-line options:<br /> /// <c>-treeFile</c> /// : File with PTB-formatted constituency trees<br /> /// <c>-conlluFile</c> /// : File with basic dependency trees in CoNLL-U format<br /> /// <c>-outputRepresentation</c> /// : "basic" (default), "enhanced", or "enhanced++" /// </remarks> public static void Main(string[] args) { Properties props = StringUtils.ArgsToProperties(args); string treeFileName = props.GetProperty("treeFile"); string conlluFileName = props.GetProperty("conlluFile"); string outputRepresentation = props.GetProperty("outputRepresentation", "basic"); IEnumerator <SemanticGraph> sgIterator; // = null; if (treeFileName != null) { MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false)); tb.LoadPath(treeFileName); IEnumerator <Tree> treeIterator = tb.GetEnumerator(); sgIterator = new UniversalDependenciesConverter.TreeToSemanticGraphIterator(treeIterator); } else { if (conlluFileName != null) { CoNLLUDocumentReader reader = new CoNLLUDocumentReader(); try { sgIterator = reader.GetIterator(IOUtils.ReaderFromString(conlluFileName)); } catch (Exception e) { throw new Exception(e); } } else { System.Console.Error.WriteLine("No input file specified!"); System.Console.Error.WriteLine(string.Empty); System.Console.Error.Printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", typeof(UniversalDependenciesConverter).GetCanonicalName()); return; } } CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter(); while (sgIterator.MoveNext()) { SemanticGraph sg = sgIterator.Current; if (treeFileName != null) { //add UPOS tags Tree tree = ((UniversalDependenciesConverter.TreeToSemanticGraphIterator)sgIterator).GetCurrentTree(); Tree uposTree = UniversalPOSMapper.MapTree(tree); IList <ILabel> uposLabels = uposTree.PreTerminalYield(); foreach (IndexedWord token in sg.VertexListSorted()) { int idx = token.Index() - 1; string uposTag = uposLabels[idx].Value(); token.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uposTag); } } else { AddLemmata(sg); if (UseName) { AddNERTags(sg); } } if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced")) { sg = ConvertBasicToEnhanced(sg); } else { if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced++")) { sg = ConvertBasicToEnhancedPlusPlus(sg); } } System.Console.Out.Write(writer.PrintSemanticGraph(sg)); } }