コード例 #1
0
        public virtual void AddFeatures(SemanticGraph sg, Tree t, bool addLemma, bool addUPOS)
        {
            ICollection <int> imperatives = t != null?GetImperatives(t) : new HashSet <int>();

            foreach (IndexedWord word in sg.VertexListSorted())
            {
                string posTag = word.Get(typeof(CoreAnnotations.PartOfSpeechAnnotation));
                string token  = word.Get(typeof(CoreAnnotations.TextAnnotation));
                int    index  = word.Get(typeof(CoreAnnotations.IndexAnnotation));
                Dictionary <string, string> wordFeatures = word.Get(typeof(CoreAnnotations.CoNLLUFeats));
                if (wordFeatures == null)
                {
                    wordFeatures = new Dictionary <string, string>();
                    word.Set(typeof(CoreAnnotations.CoNLLUFeats), wordFeatures);
                }
                /* Features that only depend on the word and the PTB POS tag. */
                wordFeatures.PutAll(GetPOSFeatures(token, posTag));
                /* Semantic graph features. */
                wordFeatures.PutAll(GetGraphFeatures(sg, word));
                /* Handle VBs. */
                if (imperatives.Contains(index))
                {
                    /* Imperative */
                    wordFeatures["VerbForm"] = "Fin";
                    wordFeatures["Mood"]     = "Imp";
                }
                else
                {
                    if (posTag.Equals("VB"))
                    {
                        /* Infinitive */
                        wordFeatures["VerbForm"] = "Inf";
                    }
                }
                /* Subjunctive detection too unreliable. */
                //} else {
                //  /* Present subjunctive */
                //  wordFeatures.put("VerbForm", "Fin");
                //  wordFeatures.put("Tense", "Pres");
                //  wordFeatures.put("Mood", "Subj");
                //}
                string lemma = word.Get(typeof(CoreAnnotations.LemmaAnnotation));
                if (addLemma && (lemma == null || lemma.Equals("_")))
                {
                    word.Set(typeof(CoreAnnotations.LemmaAnnotation), morphology.Lemma(token, posTag));
                }
            }
            if (addUPOS && t != null)
            {
                t = UniversalPOSMapper.MapTree(t);
                IList <ILabel>      uPOSTags = t.PreTerminalYield();
                IList <IndexedWord> yield    = sg.VertexListSorted();
                // int len = yield.size();
                foreach (IndexedWord word_1 in yield)
                {
                    ILabel uPOSTag = uPOSTags[word_1.Index() - 1];
                    word_1.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uPOSTag.Value());
                }
            }
        }
        /// <summary>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// </summary>
        /// <remarks>
        /// Converts a constituency tree to the English basic, enhanced, or
        /// enhanced++ Universal dependencies representation, or an English basic
        /// Universal dependencies tree to the enhanced or enhanced++ representation.
        /// <p>
        /// Command-line options:<br />
        /// <c>-treeFile</c>
        /// : File with PTB-formatted constituency trees<br />
        /// <c>-conlluFile</c>
        /// : File with basic dependency trees in CoNLL-U format<br />
        /// <c>-outputRepresentation</c>
        /// : "basic" (default), "enhanced", or "enhanced++"
        /// </remarks>
        public static void Main(string[] args)
        {
            Properties props                = StringUtils.ArgsToProperties(args);
            string     treeFileName         = props.GetProperty("treeFile");
            string     conlluFileName       = props.GetProperty("conlluFile");
            string     outputRepresentation = props.GetProperty("outputRepresentation", "basic");
            IEnumerator <SemanticGraph> sgIterator;

            // = null;
            if (treeFileName != null)
            {
                MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
                tb.LoadPath(treeFileName);
                IEnumerator <Tree> treeIterator = tb.GetEnumerator();
                sgIterator = new UniversalDependenciesConverter.TreeToSemanticGraphIterator(treeIterator);
            }
            else
            {
                if (conlluFileName != null)
                {
                    CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
                    try
                    {
                        sgIterator = reader.GetIterator(IOUtils.ReaderFromString(conlluFileName));
                    }
                    catch (Exception e)
                    {
                        throw new Exception(e);
                    }
                }
                else
                {
                    System.Console.Error.WriteLine("No input file specified!");
                    System.Console.Error.WriteLine(string.Empty);
                    System.Console.Error.Printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", typeof(UniversalDependenciesConverter).GetCanonicalName());
                    return;
                }
            }
            CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();

            while (sgIterator.MoveNext())
            {
                SemanticGraph sg = sgIterator.Current;
                if (treeFileName != null)
                {
                    //add UPOS tags
                    Tree           tree       = ((UniversalDependenciesConverter.TreeToSemanticGraphIterator)sgIterator).GetCurrentTree();
                    Tree           uposTree   = UniversalPOSMapper.MapTree(tree);
                    IList <ILabel> uposLabels = uposTree.PreTerminalYield();
                    foreach (IndexedWord token in sg.VertexListSorted())
                    {
                        int    idx     = token.Index() - 1;
                        string uposTag = uposLabels[idx].Value();
                        token.Set(typeof(CoreAnnotations.CoarseTagAnnotation), uposTag);
                    }
                }
                else
                {
                    AddLemmata(sg);
                    if (UseName)
                    {
                        AddNERTags(sg);
                    }
                }
                if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced"))
                {
                    sg = ConvertBasicToEnhanced(sg);
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(outputRepresentation, "enhanced++"))
                    {
                        sg = ConvertBasicToEnhancedPlusPlus(sg);
                    }
                }
                System.Console.Out.Write(writer.PrintSemanticGraph(sg));
            }
        }