Ejemplo n.º 1
0
        /// <summary>Extracts features from relative and interrogative pronouns.</summary>
        private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word)
        {
            Dictionary <string, string> features = new Dictionary <string, string>();

            if (word.Tag().StartsWith("W"))
            {
                bool        isRel  = false;
                IndexedWord parent = sg.GetParent(word);
                if (parent != null)
                {
                    IndexedWord parentParent = sg.GetParent(parent);
                    if (parentParent != null)
                    {
                        SemanticGraphEdge edge = sg.GetEdge(parentParent, parent);
                        isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier);
                    }
                }
                if (isRel)
                {
                    features["PronType"] = "Rel";
                }
                else
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that"))
                    {
                        features["PronType"] = "Dem";
                    }
                    else
                    {
                        features["PronType"] = "Int";
                    }
                }
            }
            return(features);
        }
Ejemplo n.º 2
0
        /// <summary>Determine the case of the pronoun "you" or "it".</summary>
        private static string PronounCase(SemanticGraph sg, IndexedWord word)
        {
            word = sg.GetNodeByIndex(word.Index());
            IndexedWord parent = sg.GetParent(word);

            if (parent != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(parent, word);
                if (edge != null)
                {
                    if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation()))
                    {
                        /* "you" is an object. */
                        return("Acc");
                    }
                    else
                    {
                        if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root)
                        {
                            if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker))
                            {
                                /* "you" is the head of a prepositional phrase. */
                                return("Acc");
                            }
                        }
                    }
                }
            }
            return("Nom");
        }
Ejemplo n.º 3
0
        private static Dictionary <string, string> GetGraphFeatures(SemanticGraph sg, IndexedWord word)
        {
            Dictionary <string, string> features = new Dictionary <string, string>();

            /* Determine the case of "you". */
            if (word.Tag().Equals("PRP") && (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "you") || Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "it")))
            {
                features["Case"] = PronounCase(sg, word);
            }
            /* Determine the person of "was". */
            if (word.Tag().Equals("VBD") && Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "was"))
            {
                string person = WasPerson(sg, word);
                if (person != null)
                {
                    features["Person"] = person;
                }
            }
            /* Determine features of relative and interrogative pronouns. */
            features.PutAll(GetRelAndIntPronFeatures(sg, word));
            /* Determine features of gerunds and present participles. */
            if (word.Tag().Equals("VBG"))
            {
                if (HasBeAux(sg, word))
                {
                    features["VerbForm"] = "Part";
                    features["Tense"]    = "Pres";
                }
                else
                {
                    features["VerbForm"] = "Ger";
                }
            }
            /* Determine whether reflexive pronoun is reflexive or intensive. */
            if (word.Value().Matches(SelfRegex) && word.Tag().Equals("PRP"))
            {
                IndexedWord parent = sg.GetParent(word);
                if (parent != null)
                {
                    SemanticGraphEdge edge = sg.GetEdge(parent, word);
                    if (edge.GetRelation() != UniversalEnglishGrammaticalRelations.NpAdverbialModifier)
                    {
                        features["Case"]   = "Acc";
                        features["Reflex"] = "Yes";
                    }
                }
            }
            /* Voice feature. */
            if (word.Tag().Equals("VBN"))
            {
                if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxPassiveModifier))
                {
                    features["Voice"] = "Pass";
                }
            }
            return(features);
        }
        public virtual SimpleMatrix GetMentionEmbeddings(Mention m, SimpleMatrix docEmbedding)
        {
            IEnumerator <SemanticGraphEdge> depIterator = m.enhancedDependency.IncomingEdgeIterator(m.headIndexedWord);
            SemanticGraphEdge depRelation = depIterator.MoveNext() ? depIterator.Current : null;

            return(NeuralUtils.Concatenate(GetAverageEmbedding(m.sentenceWords, m.startIndex, m.endIndex), GetAverageEmbedding(m.sentenceWords, m.startIndex - 5, m.startIndex), GetAverageEmbedding(m.sentenceWords, m.endIndex, m.endIndex + 5), GetAverageEmbedding
                                               (m.sentenceWords.SubList(0, m.sentenceWords.Count - 1)), docEmbedding, GetWordEmbedding(m.sentenceWords, m.headIndex), GetWordEmbedding(m.sentenceWords, m.startIndex), GetWordEmbedding(m.sentenceWords, m.endIndex - 1), GetWordEmbedding(m.sentenceWords
                                                                                                                                                                                                                                                                                           , m.startIndex - 1), GetWordEmbedding(m.sentenceWords, m.endIndex), GetWordEmbedding(m.sentenceWords, m.startIndex - 2), GetWordEmbedding(m.sentenceWords, m.endIndex + 1), GetWordEmbedding(depRelation == null ? null : depRelation.GetSource(
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                            ).Word())));
        }
Ejemplo n.º 5
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            string            relation = sm.GetRelnString(edgeName);
            IndexedWord       govNode  = GetNamedNode(govName, sm);
            IndexedWord       depNode  = GetNamedNode(depName, sm);
            SemanticGraphEdge edge     = sg.GetEdge(govNode, depNode, GrammaticalRelation.ValueOf(relation));

            if (edge != null)
            {
                sg.RemoveEdge(edge);
            }
        }
        public virtual double ObjDeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors)
        {
            // Get information about the neighbors
            // (in a totally not-creepy-stalker sort of way)
            Optional <string> subj = Optional.Empty();
            Optional <string> pp   = Optional.Empty();

            foreach (SemanticGraphEdge neighbor in neighbors)
            {
                if (neighbor != edge)
                {
                    string neighborRel = neighbor.GetRelation().ToString();
                    if (neighborRel.Contains("subj"))
                    {
                        subj = Optional.Of(neighbor.GetDependent().OriginalText().ToLower());
                    }
                    if (neighborRel.Contains("prep"))
                    {
                        pp = Optional.Of(neighborRel);
                    }
                    if (neighborRel.Contains("obj"))
                    {
                        return(1.0);
                    }
                }
            }
            // allow deleting second object
            string obj  = edge.GetDependent().OriginalText().ToLower();
            string verb = edge.GetGovernor().OriginalText().ToLower();
            // Compute the most informative drop probability we can
            double rawScore = null;

            if (subj.IsPresent())
            {
                if (pp.IsPresent())
                {
                    // Case: subj+obj
                    rawScore = verbSubjPPObjAffinity[Quadruple.MakeQuadruple(verb, subj.Get(), pp.Get(), obj)];
                }
            }
            if (rawScore == null)
            {
                rawScore = verbObjAffinity[verb];
            }
            if (rawScore == null)
            {
                return(DeletionProbability(edge.GetRelation().ToString()));
            }
            else
            {
                return(1.0 - Math.Min(1.0, rawScore / upperProbabilityCap));
            }
        }
Ejemplo n.º 7
0
        private void ExtractPronounForHeadword(IndexedWord headword, SemanticGraph dep, ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel> sent     = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph     basic    = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            SemanticGraph     enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation));

            if (enhanced == null)
            {
                enhanced = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            }
            int beginIdx = headword.Index() - 1;
            int endIdx   = headword.Index();

            // handle "you all", "they both" etc
            if (sent.Count > headword.Index() && sent[headword.Index()].Word().Matches("all|both"))
            {
                IndexedWord       c    = dep.GetNodeByIndex(headword.Index() + 1);
                SemanticGraphEdge edge = dep.GetEdge(headword, c);
                if (edge != null)
                {
                    endIdx++;
                }
            }
            IntPair mSpan = new IntPair(beginIdx, endIdx);

            if (!mentionSpanSet.Contains(mSpan) && (!InsideNE(mSpan, namedEntitySpanSet)))
            {
                int     dummyMentionId = -1;
                Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new List <CoreLabel>(sent.SubList(beginIdx, endIdx)));
                m.headIndex  = headword.Index() - 1;
                m.headWord   = sent[m.headIndex];
                m.headString = m.headWord.Word().ToLower(Locale.English);
                mentions.Add(m);
                mentionSpanSet.Add(mSpan);
            }
            // when pronoun is a part of conjunction (e.g., you and I)
            ICollection <IndexedWord> conjChildren = dep.GetChildrenWithReln(headword, UniversalEnglishGrammaticalRelations.Conjunct);

            if (conjChildren.Count > 0)
            {
                IntPair npSpan = GetNPSpan(headword, dep, sent);
                beginIdx = npSpan.Get(0);
                endIdx   = npSpan.Get(1) + 1;
                if (",".Equals(sent[endIdx - 1].Word()))
                {
                    endIdx--;
                }
                // try not to have span that ends with ,
                AddMention(beginIdx, endIdx, headword, mentions, mentionSpanSet, namedEntitySpanSet, sent, basic, enhanced);
            }
        }
Ejemplo n.º 8
0
        /// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary>
        internal ParseResult(Annotation annotation)
        {
            java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList;
            CoreMap sentence = sentences.get(0) as CoreMap;
            LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode;
            // Skip the ROOT
            Tree childOfRoot = constituencyParse.firstChild();

            Constituents = childOfRoot;
            Constituents.indexLeaves();

            // Build the collection of tokens
            var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList;
            var mentions     = sentence.get(MentionsAnnotationClass);

            for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++)
            {
                CoreLabel source        = parsedTokens.get(tokenIndex) as CoreLabel;
                var       tokenMentions = source.get(MentionTokenAnnotationClass);
                var       tokenGender   = source.get(GenderAnnotationClass);
                Tokens.Add(new ParseToken
                {
                    Index            = source.index(),
                    Word             = source.word(),
                    Lemma            = source.lemma(),
                    PartOfSpeech     = source.get(PartOfSpeechAnnotationClass) as string,
                    NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string,
                });
            }

            // Create the list of dependencies between tokens
            SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph;

            //java.util.List dependencies = dependencyGraph.edgeListSorted();
            java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator();
            while (dependencyGraphEdges.hasNext())
            {
                SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge;

                string      relationName      = edge.getRelation().getShortName();
                string      relationSpecifier = edge.getRelation().getSpecific();
                IndexedWord governor          = edge.getGovernor();
                IndexedWord dependent         = edge.getDependent();

                Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index()));
            }
        }
 public virtual double SubjDeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors)
 {
     // Get information about the neighbors
     // (in a totally not-creepy-stalker sort of way)
     foreach (SemanticGraphEdge neighbor in neighbors)
     {
         if (neighbor != edge)
         {
             string neighborRel = neighbor.GetRelation().ToString();
             if (neighborRel.Contains("subj"))
             {
                 return(1.0);
             }
         }
     }
     return(0.0);
 }
Ejemplo n.º 10
0
        /// <summary>
        /// Returns true if
        /// <paramref name="word"/>
        /// has an auxiliary verb attached to it.
        /// </summary>
        private static bool HasAux(SemanticGraph sg, IndexedWord word)
        {
            if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier))
            {
                return(true);
            }
            IndexedWord gov = sg.GetParent(word);

            if (gov != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(gov, word);
                if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.Copula.Equals(edge.GetRelation()))
                {
                    return(HasAux(sg, gov));
                }
            }
            return(false);
        }
Ejemplo n.º 11
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            bool        govWild = govName.Equals(WildcardNode);
            bool        depWild = depName.Equals(WildcardNode);
            IndexedWord govNode = GetNamedNode(govName, sm);
            IndexedWord depNode = GetNamedNode(depName, sm);

            if (govNode != null && depNode != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(govNode, depNode, relation);
                if (edge != null)
                {
                    bool successFlag = sg.RemoveEdge(edge);
                }
            }
            else
            {
                if (depNode != null && govWild)
                {
                    // dep known, wildcard gov
                    foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(depNode))
                    {
                        if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                        {
                            sg.RemoveEdge(edge);
                        }
                    }
                }
                else
                {
                    if (govNode != null && depWild)
                    {
                        // gov known, wildcard dep
                        foreach (SemanticGraphEdge edge in sg.OutgoingEdgeIterable(govNode))
                        {
                            if (edge.GetRelation().Equals(relation) && sg.ContainsEdge(edge))
                            {
                                sg.RemoveEdge(edge);
                            }
                        }
                    }
                }
            }
        }
        public virtual double DeletionProbability(SemanticGraphEdge edge, IEnumerable <SemanticGraphEdge> neighbors)
        {
            string edgeRel = edge.GetRelation().ToString();

            if (edgeRel.Contains("prep"))
            {
                return(PpDeletionProbability(edge, neighbors));
            }
            else
            {
                if (edgeRel.Contains("obj"))
                {
                    return(ObjDeletionProbability(edge, neighbors));
                }
                else
                {
                    if (edgeRel.Contains("subj"))
                    {
                        return(SubjDeletionProbability(edge, neighbors));
                    }
                    else
                    {
                        if (edgeRel.Equals("amod"))
                        {
                            string word = (edge.GetDependent().Lemma() != null ? edge.GetDependent().Lemma() : edge.GetDependent().Word()).ToLower();
                            if (Edu.Stanford.Nlp.Naturalli.Util.PrivativeAdjectives.Contains(word))
                            {
                                return(0.0);
                            }
                            else
                            {
                                return(1.0);
                            }
                        }
                        else
                        {
                            return(DeletionProbability(edgeRel));
                        }
                    }
                }
            }
        }
Ejemplo n.º 13
0
        private void ExtractNPorPRPFromDependency(ICoreMap s, IList <Mention> mentions, ICollection <IntPair> mentionSpanSet, ICollection <IntPair> namedEntitySpanSet)
        {
            IList <CoreLabel>   sent       = s.Get(typeof(CoreAnnotations.TokensAnnotation));
            SemanticGraph       basic      = s.Get(typeof(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation));
            IList <IndexedWord> nounsOrPrp = basic.GetAllNodesByPartOfSpeechPattern("N.*|PRP.*|DT");
            // DT is for "this, these, etc"
            Tree tree = s.Get(typeof(TreeCoreAnnotations.TreeAnnotation));

            foreach (IndexedWord w in nounsOrPrp)
            {
                SemanticGraphEdge   edge = basic.GetEdge(basic.GetParent(w), w);
                GrammaticalRelation rel  = null;
                string shortname         = "root";
                // if edge is null, it's root
                if (edge != null)
                {
                    rel       = edge.GetRelation();
                    shortname = rel.GetShortName();
                }
                // TODO: what to remove? remove more?
                if (shortname.Matches("det|compound"))
                {
                    //        // for debug  ---------------
                    //        Tree t = tree.getLeaves().get(w.index()-1);
                    //        for(Tree p : tree.pathNodeToNode(t, tree)) {
                    //          if(p.label().value().equals("NP")) {
                    //            HeadFinder headFinder = new SemanticHeadFinder();
                    //            Tree head = headFinder.determineHead(p);
                    //            if(head == t.parent(tree)) {
                    //              log.info();
                    //            }
                    //            break;
                    //          }
                    //        } // for debug -------------
                    continue;
                }
                else
                {
                    ExtractMentionForHeadword(w, basic, s, mentions, mentionSpanSet, namedEntitySpanSet);
                }
            }
        }
Ejemplo n.º 14
0
        public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm)
        {
            IndexedWord       govNode      = GetNamedNode(govName, sm);
            IndexedWord       depNode      = GetNamedNode(depName, sm);
            SemanticGraphEdge existingEdge = sg.GetEdge(govNode, depNode, relation);

            if (existingEdge == null)
            {
                // When adding the edge, check to see if the gov/dep nodes are presently in the graph.
                //
                if (!sg.ContainsVertex(govNode))
                {
                    sg.AddVertex(govNode);
                }
                if (!sg.ContainsVertex(depNode))
                {
                    sg.AddVertex(depNode);
                }
                sg.AddEdge(govNode, depNode, relation, weight, false);
            }
        }
Ejemplo n.º 15
0
        /// <summary>Determine the person of "was".</summary>
        private static string WasPerson(SemanticGraph sg, IndexedWord word)
        {
            IndexedWord subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalSubject);

            if (subj == null)
            {
                subj = sg.GetChildWithReln(word, UniversalEnglishGrammaticalRelations.NominalPassiveSubject);
            }
            if (subj != null)
            {
                if (Sharpen.Runtime.EqualsIgnoreCase(subj.Word(), "i"))
                {
                    /* "I" is the subject of "was". */
                    return("1");
                }
            }
            IndexedWord parent = sg.GetParent(word);

            if (parent == null)
            {
                return(subj != null ? "3" : null);
            }
            SemanticGraphEdge edge = sg.GetEdge(parent, word);

            if (edge == null)
            {
                return(subj != null ? "3" : null);
            }
            if (UniversalEnglishGrammaticalRelations.AuxModifier.Equals(edge.GetRelation()) || UniversalEnglishGrammaticalRelations.AuxPassiveModifier.Equals(edge.GetRelation()))
            {
                return(WasPerson(sg, parent));
            }
            if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
            {
                /* Check if the subject of the head of a conjunction is "I". */
                return(WasPerson(sg, parent));
            }
            return("3");
        }
Ejemplo n.º 16
0
        /// <summary>
        /// Returns true if
        /// <paramref name="word"/>
        /// has an inflection of "be" as an auxiliary.
        /// </summary>
        private static bool HasBeAux(SemanticGraph sg, IndexedWord word)
        {
            foreach (IndexedWord aux in sg.GetChildrenWithReln(word, UniversalEnglishGrammaticalRelations.AuxModifier))
            {
                if (aux.Value().Matches(BeRegex))
                {
                    return(true);
                }
            }
            /* Check if head of conjunction has an auxiliary in case the word is part of a conjunction */
            IndexedWord gov = sg.GetParent(word);

            if (gov != null)
            {
                SemanticGraphEdge edge = sg.GetEdge(gov, word);
                if (UniversalEnglishGrammaticalRelations.Conjunct.IsAncestor(edge.GetRelation()))
                {
                    return(HasBeAux(sg, gov));
                }
            }
            return(false);
        }
Ejemplo n.º 17
0
        /// <summary>
        /// return the left and right most node except copula relation (nsubj & cop) and some others (maybe discourse?)
        /// e.g., you are the person -&gt; return "the person"
        /// </summary>
        private IntPair GetNPSpan(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent)
        {
            int headwordIdx = headword.Index() - 1;
            IList <IndexedWord> children = dep.GetChildList(headword);
            //    if(children.size()==0) return new IntPair(headwordIdx, headwordIdx);    // the headword is the only word
            // check if we have copula relation
            IndexedWord cop      = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula);
            int         startIdx = (cop == null) ? 0 : children.IndexOf(cop) + 1;
            // children which will be inside of NP
            IList <IndexedWord> insideNP = Generics.NewArrayList();

            for (int i = startIdx; i < children.Count; i++)
            {
                IndexedWord       child = children[i];
                SemanticGraphEdge edge  = dep.GetEdge(headword, child);
                if (edge.GetRelation().GetShortName().Matches("dep|discourse|punct"))
                {
                    continue;
                }
                else
                {
                    // skip
                    insideNP.Add(child);
                }
            }
            if (insideNP.Count == 0)
            {
                return(new IntPair(headwordIdx, headwordIdx));
            }
            // the headword is the only word
            Pair <IndexedWord, IndexedWord> firstChildLeftRight = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[0], dep);
            Pair <IndexedWord, IndexedWord> lastChildLeftRight  = SemanticGraphUtils.LeftRightMostChildVertices(insideNP[insideNP.Count - 1], dep);
            // headword can be first or last word
            int beginIdx = Math.Min(headwordIdx, firstChildLeftRight.first.Index() - 1);
            int endIdx   = Math.Max(headwordIdx, lastChildLeftRight.second.Index() - 1);

            return(new IntPair(beginIdx, endIdx));
        }
Ejemplo n.º 18
0
        private void AddDependencyFeatures(ICounter <string> features, string prefix, SemanticGraphEdge e, bool addWord)
        {
            if (e == null)
            {
                features.IncrementCount("no-" + prefix);
                return;
            }
            IndexedWord parent         = e.GetSource();
            string      parentPOS      = parent.Tag();
            string      parentWord     = parent.Word();
            string      parentRelation = e.GetRelation().ToString();

            //String parentDir = e.getSource().beginPosition() < e.getTarget().beginPosition()
            //    ? "right" : "left";
            if (addWord)
            {
                features.IncrementCount(prefix + "-word=" + WordIndicator(parentWord, parentPOS));
            }
            features.IncrementCount(prefix + "-POS=" + parentPOS);
            features.IncrementCount(prefix + "-relation=" + parentRelation);
        }
Ejemplo n.º 19
0
        private ICounter <string> GetFeatures(Document doc, Mention m1, Mention m2)
        {
            System.Diagnostics.Debug.Assert((m1.AppearEarlierThan(m2)));
            ICounter <string> features = new ClassicCounter <string>();

            // global features
            features.IncrementCount("bias");
            if (useDocSource)
            {
                features.IncrementCount("doc-type=" + doc.docType);
                if (doc.docInfo != null && doc.docInfo.Contains("DOC_ID"))
                {
                    features.IncrementCount("doc-source=" + doc.docInfo["DOC_ID"].Split("/")[1]);
                }
            }
            // singleton feature conjunctions
            IList <string> singletonFeatures1 = m1.GetSingletonFeatures(dictionaries);
            IList <string> singletonFeatures2 = m2.GetSingletonFeatures(dictionaries);

            foreach (KeyValuePair <int, string> e in SingletonFeatures)
            {
                if (e.Key < singletonFeatures1.Count && e.Key < singletonFeatures2.Count)
                {
                    features.IncrementCount(e.Value + "=" + singletonFeatures1[e.Key] + "_" + singletonFeatures2[e.Key]);
                }
            }
            SemanticGraphEdge p1 = GetDependencyParent(m1);
            SemanticGraphEdge p2 = GetDependencyParent(m2);

            features.IncrementCount("dep-relations=" + (p1 == null ? "null" : p1.GetRelation()) + "_" + (p2 == null ? "null" : p2.GetRelation()));
            features.IncrementCount("roles=" + GetRole(m1) + "_" + GetRole(m2));
            CoreLabel headCL1  = HeadWord(m1);
            CoreLabel headCL2  = HeadWord(m2);
            string    headPOS1 = GetPOS(headCL1);
            string    headPOS2 = GetPOS(headCL2);

            features.IncrementCount("head-pos-s=" + headPOS1 + "_" + headPOS2);
            features.IncrementCount("head-words=" + WordIndicator("h_" + headCL1.Word().ToLower() + "_" + headCL2.Word().ToLower(), headPOS1 + "_" + headPOS2));
            // agreement features
            AddFeature(features, "animacies-agree", m2.AnimaciesAgree(m1));
            AddFeature(features, "attributes-agree", m2.AttributesAgree(m1, dictionaries));
            AddFeature(features, "entity-types-agree", m2.EntityTypesAgree(m1, dictionaries));
            AddFeature(features, "numbers-agree", m2.NumbersAgree(m1));
            AddFeature(features, "genders-agree", m2.GendersAgree(m1));
            AddFeature(features, "ner-strings-equal", m1.nerString.Equals(m2.nerString));
            // string matching features
            AddFeature(features, "antecedent-head-in-anaphor", HeadContainedIn(m1, m2));
            AddFeature(features, "anaphor-head-in-antecedent", HeadContainedIn(m2, m1));
            if (m1.mentionType != Dictionaries.MentionType.Pronominal && m2.mentionType != Dictionaries.MentionType.Pronominal)
            {
                AddFeature(features, "antecedent-in-anaphor", m2.SpanToString().ToLower().Contains(m1.SpanToString().ToLower()));
                AddFeature(features, "anaphor-in-antecedent", m1.SpanToString().ToLower().Contains(m2.SpanToString().ToLower()));
                AddFeature(features, "heads-equal", Sharpen.Runtime.EqualsIgnoreCase(m1.headString, m2.headString));
                AddFeature(features, "heads-agree", m2.HeadsAgree(m1));
                AddFeature(features, "exact-match", m1.ToString().Trim().ToLower().Equals(m2.ToString().Trim().ToLower()));
                AddFeature(features, "partial-match", RelaxedStringMatch(m1, m2));
                double editDistance = StringUtils.EditDistance(m1.SpanToString(), m2.SpanToString()) / (double)(m1.SpanToString().Length + m2.SpanToString().Length);
                features.IncrementCount("edit-distance", editDistance);
                features.IncrementCount("edit-distance=" + ((int)(editDistance * 10) / 10.0));
                double headEditDistance = StringUtils.EditDistance(m1.headString, m2.headString) / (double)(m1.headString.Length + m2.headString.Length);
                features.IncrementCount("head-edit-distance", headEditDistance);
                features.IncrementCount("head-edit-distance=" + ((int)(headEditDistance * 10) / 10.0));
            }
            // distance features
            AddNumeric(features, "mention-distance", m2.mentionNum - m1.mentionNum);
            AddNumeric(features, "sentence-distance", m2.sentNum - m1.sentNum);
            if (m2.sentNum == m1.sentNum)
            {
                AddNumeric(features, "word-distance", m2.startIndex - m1.endIndex);
                if (m1.endIndex > m2.startIndex)
                {
                    features.IncrementCount("spans-intersect");
                }
            }
            // setup for dcoref features
            ICollection <Mention> ms1 = new HashSet <Mention>();

            ms1.Add(m1);
            ICollection <Mention> ms2 = new HashSet <Mention>();

            ms2.Add(m2);
            Random       r  = new Random();
            CorefCluster c1 = new CorefCluster(20000 + r.NextInt(10000), ms1);
            CorefCluster c2 = new CorefCluster(10000 + r.NextInt(10000), ms2);
            string       s2 = m2.LowercaseNormalizedSpanString();
            string       s1 = m1.LowercaseNormalizedSpanString();

            // discourse dcoref features
            AddFeature(features, "mention-speaker-PER0", Sharpen.Runtime.EqualsIgnoreCase(m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)), "PER0"));
            AddFeature(features, "antecedent-is-anaphor-speaker", CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
            AddFeature(features, "same-speaker", CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "person-disagree-same-speaker", CorefRules.EntityPersonDisagree(doc, m2, m1, dictionaries) && CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "antecedent-matches-anaphor-speaker", CorefRules.AntecedentMatchesMentionSpeakerAnnotation(m2, m1, doc));
            AddFeature(features, "discourse-you-PER0", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"));
            AddFeature(features, "speaker-match-i-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.
                       EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "speaker-match-speaker-i", m2.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s2) && CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1, dictionaries));
            AddFeature(features, "speaker-match-i-speaker", m1.number == Dictionaries.Number.Singular && dictionaries.firstPersonPronouns.Contains(s1) && CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries));
            AddFeature(features, "speaker-match-you-you", dictionaries.secondPersonPronouns.Contains(s1) && dictionaries.secondPersonPronouns.Contains(s2) && CorefRules.EntitySameSpeaker(doc, m2, m1));
            AddFeature(features, "discourse-between-two-person", ((m2.person == Dictionaries.Person.I && m1.person == Dictionaries.Person.You || (m2.person == Dictionaries.Person.You && m1.person == Dictionaries.Person.I)) && (m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation
                                                                                                                                                                                                                                                          )) - m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) == 1) && doc.docType == Document.DocType.Conversation));
            AddFeature(features, "incompatible-not-match", m1.person != Dictionaries.Person.I && m2.person != Dictionaries.Person.I && (CorefRules.AntecedentIsMentionSpeaker(doc, m1, m2, dictionaries) || CorefRules.AntecedentIsMentionSpeaker(doc, m2, m1
                                                                                                                                                                                                                                                  , dictionaries)));
            int utteranceDist = Math.Abs(m1.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)) - m2.headWord.Get(typeof(CoreAnnotations.UtteranceAnnotation)));

            if (doc.docType != Document.DocType.Article && utteranceDist == 1 && !CorefRules.EntitySameSpeaker(doc, m2, m1))
            {
                AddFeature(features, "speaker-mismatch-i-i", m1.person == Dictionaries.Person.I && m2.person == Dictionaries.Person.I);
                AddFeature(features, "speaker-mismatch-you-you", m1.person == Dictionaries.Person.You && m2.person == Dictionaries.Person.You);
                AddFeature(features, "speaker-mismatch-we-we", m1.person == Dictionaries.Person.We && m2.person == Dictionaries.Person.We);
            }
            // other dcoref features
            string firstWord1 = FirstWord(m1).Word().ToLower();

            AddFeature(features, "indefinite-article-np", (m1.appositions == null && m1.predicateNominatives == null && (firstWord1.Equals("a") || firstWord1.Equals("an"))));
            AddFeature(features, "far-this", m2.LowercaseNormalizedSpanString().Equals("this") && Math.Abs(m2.sentNum - m1.sentNum) > 3);
            AddFeature(features, "per0-you-in-article", m2.person == Dictionaries.Person.You && doc.docType == Document.DocType.Article && m2.headWord.Get(typeof(CoreAnnotations.SpeakerAnnotation)).Equals("PER0"));
            AddFeature(features, "inside-in", m2.InsideIn(m1) || m1.InsideIn(m2));
            AddFeature(features, "indefinite-determiners", dictionaries.indefinitePronouns.Contains(m1.originalSpan[0].Lemma()) || dictionaries.indefinitePronouns.Contains(m2.originalSpan[0].Lemma()));
            AddFeature(features, "entity-attributes-agree", CorefRules.EntityAttributesAgree(c2, c1));
            AddFeature(features, "entity-token-distance", CorefRules.EntityTokenDistance(m2, m1));
            AddFeature(features, "i-within-i", CorefRules.EntityIWithinI(m2, m1, dictionaries));
            AddFeature(features, "exact-string-match", CorefRules.EntityExactStringMatch(c2, c1, dictionaries, doc.roleSet));
            AddFeature(features, "entity-relaxed-heads-agree", CorefRules.EntityRelaxedHeadsAgreeBetweenMentions(c2, c1, m2, m1));
            AddFeature(features, "is-acronym", CorefRules.EntityIsAcronym(doc, c2, c1));
            AddFeature(features, "demonym", m2.IsDemonym(m1, dictionaries));
            AddFeature(features, "incompatible-modifier", CorefRules.EntityHaveIncompatibleModifier(m2, m1));
            AddFeature(features, "head-lemma-match", m1.headWord.Lemma().Equals(m2.headWord.Lemma()));
            AddFeature(features, "words-included", CorefRules.EntityWordsIncluded(c2, c1, m2, m1));
            AddFeature(features, "extra-proper-noun", CorefRules.EntityHaveExtraProperNoun(m2, m1, new HashSet <string>()));
            AddFeature(features, "number-in-later-mentions", CorefRules.EntityNumberInLaterMention(m2, m1));
            AddFeature(features, "sentence-context-incompatible", CorefRules.SentenceContextIncompatible(m2, m1, dictionaries));
            // syntax features
            if (useConstituencyParse)
            {
                if (m1.sentNum == m2.sentNum)
                {
                    int  clauseCount = 0;
                    Tree tree        = m2.contextParseTree;
                    Tree current     = m2.mentionSubTree;
                    while (true)
                    {
                        current = current.Ancestor(1, tree);
                        if (current.Label().Value().StartsWith("S"))
                        {
                            clauseCount++;
                        }
                        if (current.Dominates(m1.mentionSubTree))
                        {
                            break;
                        }
                        if (current.Label().Value().Equals("ROOT") || current.Ancestor(1, tree) == null)
                        {
                            break;
                        }
                    }
                    features.IncrementCount("clause-count", clauseCount);
                    features.IncrementCount("clause-count=" + Bin(clauseCount));
                }
                if (RuleBasedCorefMentionFinder.IsPleonastic(m2, m2.contextParseTree) || RuleBasedCorefMentionFinder.IsPleonastic(m1, m1.contextParseTree))
                {
                    features.IncrementCount("pleonastic-it");
                }
                if (MaximalNp(m1.mentionSubTree) == MaximalNp(m2.mentionSubTree))
                {
                    features.IncrementCount("same-maximal-np");
                }
                bool m1Embedded = HeadEmbeddingLevel(m1.mentionSubTree, m1.headIndex - m1.startIndex) > 1;
                bool m2Embedded = HeadEmbeddingLevel(m2.mentionSubTree, m2.headIndex - m2.startIndex) > 1;
                features.IncrementCount("embedding=" + m1Embedded + "_" + m2Embedded);
            }
            return(features);
        }
Ejemplo n.º 20
0
        private ICounter <string> GetFeatures(Document doc, Mention m, IDictionary <int, IList <Mention> > mentionsByHeadIndex)
        {
            ICounter <string> features = new ClassicCounter <string>();

            // type features
            features.IncrementCount("mention-type=" + m.mentionType);
            features.IncrementCount("gender=" + m.gender);
            features.IncrementCount("person-fine=" + m.person);
            features.IncrementCount("head-ne-type=" + m.nerString);
            IList <string> singletonFeatures = m.GetSingletonFeatures(dictionaries);

            foreach (KeyValuePair <int, string> e in SingletonFeatures)
            {
                if (e.Key < singletonFeatures.Count)
                {
                    features.IncrementCount(e.Value + "=" + singletonFeatures[e.Key]);
                }
            }
            // length and location features
            AddNumeric(features, "mention-length", m.SpanToString().Length);
            AddNumeric(features, "mention-words", m.originalSpan.Count);
            AddNumeric(features, "sentence-words", m.sentenceWords.Count);
            features.IncrementCount("sentence-words=" + Bin(m.sentenceWords.Count));
            features.IncrementCount("mention-position", m.mentionNum / (double)doc.predictedMentions.Count);
            features.IncrementCount("sentence-position", m.sentNum / (double)doc.numSentences);
            // lexical features
            CoreLabel firstWord    = FirstWord(m);
            CoreLabel lastWord     = LastWord(m);
            CoreLabel headWord     = HeadWord(m);
            CoreLabel prevWord     = PrevWord(m);
            CoreLabel nextWord     = NextWord(m);
            CoreLabel prevprevWord = PrevprevWord(m);
            CoreLabel nextnextWord = NextnextWord(m);
            string    headPOS      = GetPOS(headWord);
            string    firstPOS     = GetPOS(firstWord);
            string    lastPOS      = GetPOS(lastWord);
            string    prevPOS      = GetPOS(prevWord);
            string    nextPOS      = GetPOS(nextWord);
            string    prevprevPOS  = GetPOS(prevprevWord);
            string    nextnextPOS  = GetPOS(nextnextWord);

            features.IncrementCount("first-word=" + WordIndicator(firstWord, firstPOS));
            features.IncrementCount("last-word=" + WordIndicator(lastWord, lastPOS));
            features.IncrementCount("head-word=" + WordIndicator(headWord, headPOS));
            features.IncrementCount("next-word=" + WordIndicator(nextWord, nextPOS));
            features.IncrementCount("prev-word=" + WordIndicator(prevWord, prevPOS));
            features.IncrementCount("next-bigram=" + WordIndicator(nextWord, nextnextWord, nextPOS + "_" + nextnextPOS));
            features.IncrementCount("prev-bigram=" + WordIndicator(prevprevWord, prevWord, prevprevPOS + "_" + prevPOS));
            features.IncrementCount("next-pos=" + nextPOS);
            features.IncrementCount("prev-pos=" + prevPOS);
            features.IncrementCount("first-pos=" + firstPOS);
            features.IncrementCount("last-pos=" + lastPOS);
            features.IncrementCount("next-pos-bigram=" + nextPOS + "_" + nextnextPOS);
            features.IncrementCount("prev-pos-bigram=" + prevprevPOS + "_" + prevPOS);
            AddDependencyFeatures(features, "parent", GetDependencyParent(m), true);
            AddFeature(features, "ends-with-head", m.headIndex == m.endIndex - 1);
            AddFeature(features, "is-generic", m.originalSpan.Count == 1 && firstPOS.Equals("NNS"));
            // syntax features
            IndexedWord w       = m.headIndexedWord;
            string      depPath = string.Empty;
            int         depth   = 0;

            while (w != null)
            {
                SemanticGraphEdge e_1 = GetDependencyParent(m, w);
                depth++;
                if (depth <= 3 && e_1 != null)
                {
                    depPath += (depPath.IsEmpty() ? string.Empty : "_") + e_1.GetRelation().ToString();
                    features.IncrementCount("dep-path=" + depPath);
                    w = e_1.GetSource();
                }
                else
                {
                    w = null;
                }
            }
            if (useConstituencyParse)
            {
                int fullEmbeddingLevel    = HeadEmbeddingLevel(m.contextParseTree, m.headIndex);
                int mentionEmbeddingLevel = HeadEmbeddingLevel(m.mentionSubTree, m.headIndex - m.startIndex);
                if (fullEmbeddingLevel != -1 && mentionEmbeddingLevel != -1)
                {
                    features.IncrementCount("mention-embedding-level=" + Bin(fullEmbeddingLevel - mentionEmbeddingLevel));
                    features.IncrementCount("head-embedding-level=" + Bin(mentionEmbeddingLevel));
                }
                else
                {
                    features.IncrementCount("undetermined-embedding-level");
                }
                features.IncrementCount("num-embedded-nps=" + Bin(NumEmbeddedNps(m.mentionSubTree)));
                string syntaxPath = string.Empty;
                Tree   tree       = m.contextParseTree;
                Tree   head       = tree.GetLeaves()[m.headIndex].Ancestor(1, tree);
                depth = 0;
                foreach (Tree node in tree.PathNodeToNode(head, tree))
                {
                    syntaxPath += node.Value() + "-";
                    features.IncrementCount("syntax-path=" + syntaxPath);
                    depth++;
                    if (depth >= 4 || node.Value().Equals("S"))
                    {
                        break;
                    }
                }
            }
            // mention containment features
            AddFeature(features, "contained-in-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null));
            AddFeature(features, "contains-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null));
            // features from dcoref rules
            AddFeature(features, "bare-plural", m.originalSpan.Count == 1 && headPOS.Equals("NNS"));
            AddFeature(features, "quantifier-start", dictionaries.quantifiers.Contains(firstWord.Word().ToLower()));
            AddFeature(features, "negative-start", firstWord.Word().ToLower().Matches("none|no|nothing|not"));
            AddFeature(features, "partitive", RuleBasedCorefMentionFinder.PartitiveRule(m, m.sentenceWords, dictionaries));
            AddFeature(features, "adjectival-demonym", dictionaries.IsAdjectivalDemonym(m.SpanToString()));
            if (doc.docType != Document.DocType.Article && m.person == Dictionaries.Person.You && nextWord != null && Sharpen.Runtime.EqualsIgnoreCase(nextWord.Word(), "know"))
            {
                features.IncrementCount("generic-you");
            }
            return(features);
        }
Ejemplo n.º 21
0
        public virtual string PrintSemanticGraph(SemanticGraph sg, bool unescapeParenthesis)
        {
            bool          isTree = SemanticGraphUtils.IsTree(sg);
            StringBuilder sb     = new StringBuilder();

            /* Print comments. */
            foreach (string comment in sg.GetComments())
            {
                sb.Append(comment).Append("\n");
            }
            foreach (IndexedWord token in sg.VertexListSorted())
            {
                /* Check for multiword tokens. */
                if (token.ContainsKey(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation)))
                {
                    IntPair tokenSpan = token.Get(typeof(CoreAnnotations.CoNLLUTokenSpanAnnotation));
                    if (tokenSpan.GetSource() == token.Index())
                    {
                        string range = string.Format("%d-%d", tokenSpan.GetSource(), tokenSpan.GetTarget());
                        sb.Append(string.Format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.OriginalText()));
                    }
                }
                /* Try to find main governor and additional dependencies. */
                string govIdx = null;
                GrammaticalRelation         reln = null;
                Dictionary <string, string> enhancedDependencies = new Dictionary <string, string>();
                foreach (IndexedWord parent in sg.GetParents(token))
                {
                    SemanticGraphEdge edge = sg.GetEdge(parent, token);
                    if (govIdx == null && !edge.IsExtra())
                    {
                        govIdx = parent.ToCopyIndex();
                        reln   = edge.GetRelation();
                    }
                    enhancedDependencies[parent.ToCopyIndex()] = edge.GetRelation().ToString();
                }
                string additionalDepsString = isTree ? "_" : CoNLLUUtils.ToExtraDepsString(enhancedDependencies);
                string word           = token.Word();
                string featuresString = CoNLLUUtils.ToFeatureString(token.Get(typeof(CoreAnnotations.CoNLLUFeats)));
                string pos            = token.GetString <CoreAnnotations.PartOfSpeechAnnotation>("_");
                string upos           = token.GetString <CoreAnnotations.CoarseTagAnnotation>("_");
                string misc           = token.GetString <CoreAnnotations.CoNLLUMisc>("_");
                string lemma          = token.GetString <CoreAnnotations.LemmaAnnotation>("_");
                string relnName       = reln == null ? "_" : reln.ToString();
                /* Root. */
                if (govIdx == null && sg.GetRoots().Contains(token))
                {
                    govIdx               = "0";
                    relnName             = GrammaticalRelation.Root.ToString();
                    additionalDepsString = isTree ? "_" : "0:" + relnName;
                }
                else
                {
                    if (govIdx == null)
                    {
                        govIdx   = "_";
                        relnName = "_";
                    }
                }
                if (unescapeParenthesis)
                {
                    word  = word.ReplaceAll(LrbPattern, "(");
                    word  = word.ReplaceAll(RrbPattern, ")");
                    lemma = lemma.ReplaceAll(LrbPattern, "(");
                    lemma = lemma.ReplaceAll(RrbPattern, ")");
                }
                sb.Append(string.Format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.ToCopyIndex(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
            }
            sb.Append("\n");
            return(sb.ToString());
        }
Ejemplo n.º 22
0
        /// <summary>The search algorithm, starting with a full sentence and iteratively shortening it to its entailed sentences.</summary>
        /// <returns>A list of search results, corresponding to shortenings of the sentence.</returns>
        private IList <ForwardEntailerSearchProblem.SearchResult> SearchImplementation()
        {
            // Pre-process the tree
            SemanticGraph parseTree = new SemanticGraph(this.parseTree);

            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            // (remove common determiners)
            IList <string> determinerRemovals = new List <string>();

            parseTree.GetLeafVertices().Stream().Filter(null).ForEach(null);
            // (cut conj_and nodes)
            ICollection <SemanticGraphEdge> andsToAdd = new HashSet <SemanticGraphEdge>();

            foreach (IndexedWord vertex in parseTree.VertexSet())
            {
                if (parseTree.InDegree(vertex) > 1)
                {
                    SemanticGraphEdge conjAnd = null;
                    foreach (SemanticGraphEdge edge in parseTree.IncomingEdgeIterable(vertex))
                    {
                        if ("conj:and".Equals(edge.GetRelation().ToString()))
                        {
                            conjAnd = edge;
                        }
                    }
                    if (conjAnd != null)
                    {
                        parseTree.RemoveEdge(conjAnd);
                        System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
                        andsToAdd.Add(conjAnd);
                    }
                }
            }
            // Clean the tree
            Edu.Stanford.Nlp.Naturalli.Util.CleanTree(parseTree);
            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            // Find the subject / object split
            // This takes max O(n^2) time, expected O(n*log(n)) time.
            // Optimal is O(n), but I'm too lazy to implement it.
            BitSet isSubject = new BitSet(256);

            foreach (IndexedWord vertex_1 in parseTree.VertexSet())
            {
                // Search up the tree for a subj node; if found, mark that vertex as a subject.
                IEnumerator <SemanticGraphEdge> incomingEdges = parseTree.IncomingEdgeIterator(vertex_1);
                SemanticGraphEdge edge = null;
                if (incomingEdges.MoveNext())
                {
                    edge = incomingEdges.Current;
                }
                int numIters = 0;
                while (edge != null)
                {
                    if (edge.GetRelation().ToString().EndsWith("subj"))
                    {
                        System.Diagnostics.Debug.Assert(vertex_1.Index() > 0);
                        isSubject.Set(vertex_1.Index() - 1);
                        break;
                    }
                    incomingEdges = parseTree.IncomingEdgeIterator(edge.GetGovernor());
                    if (incomingEdges.MoveNext())
                    {
                        edge = incomingEdges.Current;
                    }
                    else
                    {
                        edge = null;
                    }
                    numIters += 1;
                    if (numIters > 100)
                    {
                        //          log.error("tree has apparent depth > 100");
                        return(Java.Util.Collections.EmptyList);
                    }
                }
            }
            // Outputs
            IList <ForwardEntailerSearchProblem.SearchResult> results = new List <ForwardEntailerSearchProblem.SearchResult>();

            if (!determinerRemovals.IsEmpty())
            {
                if (andsToAdd.IsEmpty())
                {
                    double score = Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count);
                    System.Diagnostics.Debug.Assert(!double.IsNaN(score));
                    System.Diagnostics.Debug.Assert(!double.IsInfinite(score));
                    results.Add(new ForwardEntailerSearchProblem.SearchResult(parseTree, determinerRemovals, score));
                }
                else
                {
                    SemanticGraph treeWithAnds = new SemanticGraph(parseTree);
                    System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds));
                    foreach (SemanticGraphEdge and in andsToAdd)
                    {
                        treeWithAnds.AddEdge(and.GetGovernor(), and.GetDependent(), and.GetRelation(), double.NegativeInfinity, false);
                    }
                    System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(treeWithAnds));
                    results.Add(new ForwardEntailerSearchProblem.SearchResult(treeWithAnds, determinerRemovals, Math.Pow(weights.DeletionProbability("det"), (double)determinerRemovals.Count)));
                }
            }
            // Initialize the search
            System.Diagnostics.Debug.Assert(Edu.Stanford.Nlp.Naturalli.Util.IsTree(parseTree));
            IList <IndexedWord> topologicalVertices;

            try
            {
                topologicalVertices = parseTree.TopologicalSort();
            }
            catch (InvalidOperationException)
            {
                //      log.info("Could not topologically sort the vertices! Using left-to-right traversal.");
                topologicalVertices = parseTree.VertexListSorted();
            }
            if (topologicalVertices.IsEmpty())
            {
                return(results);
            }
            Stack <ForwardEntailerSearchProblem.SearchState> fringe = new Stack <ForwardEntailerSearchProblem.SearchState>();

            fringe.Push(new ForwardEntailerSearchProblem.SearchState(new BitSet(256), 0, parseTree, null, null, 1.0));
            // Start the search
            int numTicks = 0;

            while (!fringe.IsEmpty())
            {
                // Overhead with popping a node.
                if (numTicks >= maxTicks)
                {
                    return(results);
                }
                numTicks += 1;
                if (results.Count >= maxResults)
                {
                    return(results);
                }
                ForwardEntailerSearchProblem.SearchState state = fringe.Pop();
                System.Diagnostics.Debug.Assert(state.score > 0.0);
                IndexedWord currentWord = topologicalVertices[state.currentIndex];
                // Push the case where we don't delete
                int nextIndex = state.currentIndex + 1;
                int numIters  = 0;
                while (nextIndex < topologicalVertices.Count)
                {
                    IndexedWord nextWord = topologicalVertices[nextIndex];
                    System.Diagnostics.Debug.Assert(nextWord.Index() > 0);
                    if (!state.deletionMask.Get(nextWord.Index() - 1))
                    {
                        fringe.Push(new ForwardEntailerSearchProblem.SearchState(state.deletionMask, nextIndex, state.tree, null, state, state.score));
                        break;
                    }
                    else
                    {
                        nextIndex += 1;
                    }
                    numIters += 1;
                    if (numIters > 10000)
                    {
                        //          log.error("logic error (apparent infinite loop); returning");
                        return(results);
                    }
                }
                // Check if we can delete this subtree
                bool canDelete = !state.tree.GetFirstRoot().Equals(currentWord);
                foreach (SemanticGraphEdge edge in state.tree.IncomingEdgeIterable(currentWord))
                {
                    if ("CD".Equals(edge.GetGovernor().Tag()))
                    {
                        canDelete = false;
                    }
                    else
                    {
                        // Get token information
                        CoreLabel            token = edge.GetDependent().BackingLabel();
                        OperatorSpec         @operator;
                        NaturalLogicRelation lexicalRelation;
                        Polarity             tokenPolarity = token.Get(typeof(NaturalLogicAnnotations.PolarityAnnotation));
                        if (tokenPolarity == null)
                        {
                            tokenPolarity = Polarity.Default;
                        }
                        // Get the relation for this deletion
                        if ((@operator = token.Get(typeof(NaturalLogicAnnotations.OperatorAnnotation))) != null)
                        {
                            lexicalRelation = @operator.instance.deleteRelation;
                        }
                        else
                        {
                            System.Diagnostics.Debug.Assert(edge.GetDependent().Index() > 0);
                            lexicalRelation = NaturalLogicRelation.ForDependencyDeletion(edge.GetRelation().ToString(), isSubject.Get(edge.GetDependent().Index() - 1));
                        }
                        NaturalLogicRelation projectedRelation = tokenPolarity.ProjectLexicalRelation(lexicalRelation);
                        // Make sure this is a valid entailment
                        if (!projectedRelation.ApplyToTruthValue(truthOfPremise).IsTrue())
                        {
                            canDelete = false;
                        }
                    }
                }
                if (canDelete)
                {
                    // Register the deletion
                    Lazy <Pair <SemanticGraph, BitSet> > treeWithDeletionsAndNewMask = Lazy.Of(null);
                    // Compute the score of the sentence
                    double newScore = state.score;
                    foreach (SemanticGraphEdge edge_1 in state.tree.IncomingEdgeIterable(currentWord))
                    {
                        double multiplier = weights.DeletionProbability(edge_1, state.tree.OutgoingEdgeIterable(edge_1.GetGovernor()));
                        System.Diagnostics.Debug.Assert(!double.IsNaN(multiplier));
                        System.Diagnostics.Debug.Assert(!double.IsInfinite(multiplier));
                        newScore *= multiplier;
                    }
                    // Register the result
                    if (newScore > 0.0)
                    {
                        SemanticGraph resultTree = new SemanticGraph(treeWithDeletionsAndNewMask.Get().first);
                        andsToAdd.Stream().Filter(null).ForEach(null);
                        results.Add(new ForwardEntailerSearchProblem.SearchResult(resultTree, AggregateDeletedEdges(state, state.tree.IncomingEdgeIterable(currentWord), determinerRemovals), newScore));
                        // Push the state with this subtree deleted
                        nextIndex = state.currentIndex + 1;
                        numIters  = 0;
                        while (nextIndex < topologicalVertices.Count)
                        {
                            IndexedWord   nextWord          = topologicalVertices[nextIndex];
                            BitSet        newMask           = treeWithDeletionsAndNewMask.Get().second;
                            SemanticGraph treeWithDeletions = treeWithDeletionsAndNewMask.Get().first;
                            if (!newMask.Get(nextWord.Index() - 1))
                            {
                                System.Diagnostics.Debug.Assert(treeWithDeletions.ContainsVertex(topologicalVertices[nextIndex]));
                                fringe.Push(new ForwardEntailerSearchProblem.SearchState(newMask, nextIndex, treeWithDeletions, null, state, newScore));
                                break;
                            }
                            else
                            {
                                nextIndex += 1;
                            }
                            numIters += 1;
                            if (numIters > 10000)
                            {
                                //              log.error("logic error (apparent infinite loop); returning");
                                return(results);
                            }
                        }
                    }
                }
            }
            // Return
            return(results);
        }
Ejemplo n.º 23
0
        //using quote-removed depparses
        public virtual void DependencyParses(Annotation doc)
        {
            IList <ICoreMap>  quotes    = doc.Get(typeof(CoreAnnotations.QuotationsAnnotation));
            IList <CoreLabel> tokens    = doc.Get(typeof(CoreAnnotations.TokensAnnotation));
            IList <ICoreMap>  sentences = doc.Get(typeof(CoreAnnotations.SentencesAnnotation));

            foreach (ICoreMap quote in quotes)
            {
                if (quote.Get(typeof(QuoteAttributionAnnotator.MentionAnnotation)) != null)
                {
                    continue;
                }
                Pair <int, int> range = QuoteAttributionUtils.GetRemainderInSentence(doc, quote);
                if (range == null)
                {
                    continue;
                }
                //search for mentions in the first run
                Pair <List <string>, List <Pair <int, int> > > namesAndNameIndices = ScanForNames(range);
                List <string>           names       = namesAndNameIndices.first;
                List <Pair <int, int> > nameIndices = namesAndNameIndices.second;
                SemanticGraph           graph       = quote.Get(typeof(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation));
                SemgrexMatcher          matcher     = subjVerbPattern.Matcher(graph);
                IList <Pair <IndexedWord, IndexedWord> > subjVerbPairs = new List <Pair <IndexedWord, IndexedWord> >();
                //TODO: check and see if this is necessary
                while (matcher.Find())
                {
                    IndexedWord subj = matcher.GetNode("SUBJ");
                    IndexedWord verb = matcher.GetNode("VERB");
                    subjVerbPairs.Add(new Pair <IndexedWord, IndexedWord>(subj, verb));
                }
                IList <IndexedWord> vbs = graph.GetAllNodesByPartOfSpeechPattern("VB.*");
                foreach (IndexedWord iw in vbs)
                {
                    // does it have an nsubj child?
                    ICollection <IndexedWord> children = graph.GetChildren(iw);
                    IList <IndexedWord>       deps     = Generics.NewArrayList();
                    IndexedWord nsubj = null;
                    foreach (IndexedWord child in children)
                    {
                        SemanticGraphEdge sge = graph.GetEdge(iw, child);
                        if (sge.GetRelation().GetShortName().Equals("dep") && child.Tag().StartsWith("VB"))
                        {
                            deps.Add(child);
                        }
                        else
                        {
                            if (sge.GetRelation().GetShortName().Equals("nsubj"))
                            {
                                nsubj = child;
                            }
                        }
                    }
                    if (nsubj != null)
                    {
                        foreach (IndexedWord dep in deps)
                        {
                            subjVerbPairs.Add(new Pair(nsubj, dep));
                        }
                    }
                }
                //look for a speech verb
                foreach (Pair <IndexedWord, IndexedWord> SVPair in subjVerbPairs)
                {
                    IndexedWord verb = SVPair.second;
                    IndexedWord subj = SVPair.first;
                    //check if subj and verb outside of quote
                    int verbTokPos = TokenToLocation(verb.BackingLabel());
                    int subjTokPos = TokenToLocation(verb.BackingLabel());
                    if (InRange(range, verbTokPos) && InRange(range, subjTokPos) && commonSpeechWords.Contains(verb.Lemma()))
                    {
                        if (subj.Tag().Equals("NNP"))
                        {
                            int startChar = subj.BeginPosition();
                            for (int i = 0; i < names.Count; i++)
                            {
                                Pair <int, int> nameIndex = nameIndices[i];
                                //avoid names that don't actually exist in
                                if (RangeContainsCharIndex(nameIndex, startChar))
                                {
                                    FillInMention(quote, TokenRangeToString(nameIndex), nameIndex.first, nameIndex.second, sieveName, Name);
                                    break;
                                }
                            }
                        }
                        else
                        {
                            if (subj.Tag().Equals("PRP"))
                            {
                                int loc = TokenToLocation(subj.BackingLabel());
                                FillInMention(quote, subj.Word(), loc, loc, sieveName, Pronoun);
                                break;
                            }
                            else
                            {
                                if (subj.Tag().Equals("NN") && animacySet.Contains(subj.Word()))
                                {
                                    int loc = TokenToLocation(subj.BackingLabel());
                                    FillInMention(quote, subj.Word(), loc, loc, sieveName, AnimateNoun);
                                    break;
                                }
                            }
                        }
                    }
                }
            }
        }
Ejemplo n.º 24
0
        /// <summary>Fix some bizarre peculiarities with certain trees.</summary>
        /// <remarks>
        /// Fix some bizarre peculiarities with certain trees.
        /// So far, these include:
        /// <ul>
        /// <li>Sometimes there's a node from a word to itself. This seems wrong.</li>
        /// </ul>
        /// </remarks>
        /// <param name="tree">The tree to clean (in place!).</param>
        /// <returns>A list of extra edges, which are valid but were removed.</returns>
        public static IList <SemanticGraphEdge> CleanTree(SemanticGraph tree)
        {
            //    assert !isCyclic(tree);
            // Clean nodes
            IList <IndexedWord> toDelete = new List <IndexedWord>();

            foreach (IndexedWord vertex in tree.VertexSet())
            {
                // Clean punctuation
                if (vertex.Tag() == null)
                {
                    continue;
                }
                char tag = vertex.BackingLabel().Tag()[0];
                if (tag == '.' || tag == ',' || tag == '(' || tag == ')' || tag == ':')
                {
                    if (!tree.OutgoingEdgeIterator(vertex).MoveNext())
                    {
                        // This should really never happen, but it does.
                        toDelete.Add(vertex);
                    }
                }
            }
            toDelete.ForEach(null);
            // Clean edges
            IEnumerator <SemanticGraphEdge> iter = tree.EdgeIterable().GetEnumerator();
            IList <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> > toAdd = new List <Triple <IndexedWord, IndexedWord, SemanticGraphEdge> >();

            toDelete.Clear();
            while (iter.MoveNext())
            {
                SemanticGraphEdge edge = iter.Current;
                if (edge.GetDependent().Index() == edge.GetGovernor().Index())
                {
                    // Clean up copy-edges
                    if (edge.GetDependent().IsCopy(edge.GetGovernor()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetDependent()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetGovernor(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetDependent());
                    }
                    if (edge.GetGovernor().IsCopy(edge.GetDependent()))
                    {
                        foreach (SemanticGraphEdge toCopy in tree.OutgoingEdgeIterable(edge.GetGovernor()))
                        {
                            toAdd.Add(Triple.MakeTriple(edge.GetDependent(), toCopy.GetDependent(), toCopy));
                        }
                        toDelete.Add(edge.GetGovernor());
                    }
                    // Clean self-edges
                    iter.Remove();
                }
                else
                {
                    if (edge.GetRelation().ToString().Equals("punct"))
                    {
                        // Clean punctuation (again)
                        if (!tree.OutgoingEdgeIterator(edge.GetDependent()).MoveNext())
                        {
                            // This should really never happen, but it does.
                            iter.Remove();
                        }
                    }
                }
            }
            // (add edges we wanted to add)
            toDelete.ForEach(null);
            foreach (Triple <IndexedWord, IndexedWord, SemanticGraphEdge> edge_1 in toAdd)
            {
                tree.AddEdge(edge_1.first, edge_1.second, edge_1.third.GetRelation(), edge_1.third.GetWeight(), edge_1.third.IsExtra());
            }
            // Handle extra edges.
            // Two cases:
            // (1) the extra edge is a subj/obj edge and the main edge is a conj:.*
            //     in this case, keep the extra
            // (2) otherwise, delete the extra
            IList <SemanticGraphEdge> extraEdges = new List <SemanticGraphEdge>();

            foreach (SemanticGraphEdge edge_2 in tree.EdgeIterable())
            {
                if (edge_2.IsExtra())
                {
                    IList <SemanticGraphEdge> incomingEdges = tree.IncomingEdgeList(edge_2.GetDependent());
                    SemanticGraphEdge         toKeep        = null;
                    foreach (SemanticGraphEdge candidate in incomingEdges)
                    {
                        if (toKeep == null)
                        {
                            toKeep = candidate;
                        }
                        else
                        {
                            if (toKeep.GetRelation().ToString().StartsWith("conj") && candidate.GetRelation().ToString().Matches(".subj.*|.obj.*"))
                            {
                                toKeep = candidate;
                            }
                            else
                            {
                                if (!candidate.IsExtra() && !(candidate.GetRelation().ToString().StartsWith("conj") && toKeep.GetRelation().ToString().Matches(".subj.*|.obj.*")))
                                {
                                    toKeep = candidate;
                                }
                            }
                        }
                    }
                    foreach (SemanticGraphEdge candidate_1 in incomingEdges)
                    {
                        if (candidate_1 != toKeep)
                        {
                            extraEdges.Add(candidate_1);
                        }
                    }
                }
            }
            extraEdges.ForEach(null);
            // Add apposition edges (simple coref)
            foreach (SemanticGraphEdge extraEdge in new List <SemanticGraphEdge>(extraEdges))
            {
                // note[gabor] prevent concurrent modification exception
                foreach (SemanticGraphEdge candidateAppos in tree.IncomingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos.GetGovernor(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
                foreach (SemanticGraphEdge candidateAppos_1 in tree.OutgoingEdgeIterable(extraEdge.GetDependent()))
                {
                    if (candidateAppos_1.GetRelation().ToString().Equals("appos"))
                    {
                        extraEdges.Add(new SemanticGraphEdge(extraEdge.GetGovernor(), candidateAppos_1.GetDependent(), extraEdge.GetRelation(), extraEdge.GetWeight(), extraEdge.IsExtra()));
                    }
                }
            }
            // Brute force ensure tree
            // Remove incoming edges from roots
            IList <SemanticGraphEdge> rootIncomingEdges = new List <SemanticGraphEdge>();

            foreach (IndexedWord root in tree.GetRoots())
            {
                foreach (SemanticGraphEdge incomingEdge in tree.IncomingEdgeIterable(root))
                {
                    rootIncomingEdges.Add(incomingEdge);
                }
            }
            rootIncomingEdges.ForEach(null);
            // Loop until it becomes a tree.
            bool changed = true;

            while (changed)
            {
                // I just want trees to be trees; is that so much to ask!?
                changed = false;
                IList <IndexedWord>       danglingNodes = new List <IndexedWord>();
                IList <SemanticGraphEdge> invalidEdges  = new List <SemanticGraphEdge>();
                foreach (IndexedWord vertex_1 in tree.VertexSet())
                {
                    // Collect statistics
                    IEnumerator <SemanticGraphEdge> incomingIter = tree.IncomingEdgeIterator(vertex_1);
                    bool hasIncoming         = incomingIter.MoveNext();
                    bool hasMultipleIncoming = false;
                    if (hasIncoming)
                    {
                        incomingIter.Current;
                        hasMultipleIncoming = incomingIter.MoveNext();
                    }
                    // Register actions
                    if (!hasIncoming && !tree.GetRoots().Contains(vertex_1))
                    {
                        danglingNodes.Add(vertex_1);
                    }
                    else
                    {
                        if (hasMultipleIncoming)
                        {
                            foreach (SemanticGraphEdge edge in new IterableIterator <SemanticGraphEdge>(incomingIter))
                            {
                                invalidEdges.Add(edge_2);
                            }
                        }
                    }
                }
                // Perform actions
                foreach (IndexedWord vertex_2 in danglingNodes)
                {
                    tree.RemoveVertex(vertex_2);
                    changed = true;
                }
                foreach (SemanticGraphEdge edge_3 in invalidEdges)
                {
                    tree.RemoveEdge(edge_3);
                    changed = true;
                }
            }
            // Edge case: remove duplicate dobj to "that."
            //            This is a common parse error.
            foreach (IndexedWord vertex_3 in tree.VertexSet())
            {
                SemanticGraphEdge thatEdge = null;
                int dobjCount = 0;
                foreach (SemanticGraphEdge edge in tree.OutgoingEdgeIterable(vertex_3))
                {
                    if (Sharpen.Runtime.EqualsIgnoreCase("that", edge_2.GetDependent().Word()))
                    {
                        thatEdge = edge_2;
                    }
                    if ("dobj".Equals(edge_2.GetRelation().ToString()))
                    {
                        dobjCount += 1;
                    }
                }
                if (dobjCount > 1 && thatEdge != null)
                {
                    // Case: there are two dobj edges, one of which goes to the word "that"
                    // Action: rewrite the dobj edge to "that" to be a "mark" edge.
                    tree.RemoveEdge(thatEdge);
                    tree.AddEdge(thatEdge.GetGovernor(), thatEdge.GetDependent(), GrammaticalRelation.ValueOf(thatEdge.GetRelation().GetLanguage(), "mark"), thatEdge.GetWeight(), thatEdge.IsExtra());
                }
            }
            // Return
            System.Diagnostics.Debug.Assert(IsTree(tree));
            return(extraEdges);
        }
        public virtual void Process(int id, Document document)
        {
            IJsonArrayBuilder clusters = Javax.Json.Json.CreateArrayBuilder();

            foreach (CorefCluster gold in document.goldCorefClusters.Values)
            {
                IJsonArrayBuilder c = Javax.Json.Json.CreateArrayBuilder();
                foreach (Mention m in gold.corefMentions)
                {
                    c.Add(m.mentionID);
                }
                clusters.Add(c.Build());
            }
            goldClusterWriter.Println(Javax.Json.Json.CreateObjectBuilder().Add(id.ToString(), clusters.Build()).Build());
            IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetLabeledMentionPairs(document);
            IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document);
            IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >();

            foreach (Mention m_1 in mentionsList)
            {
                IList <Mention> withIndex = mentionsByHeadIndex.ComputeIfAbsent(m_1.headIndex, null);
                withIndex.Add(m_1);
            }
            IJsonObjectBuilder docFeatures = Javax.Json.Json.CreateObjectBuilder();

            docFeatures.Add("doc_id", id);
            docFeatures.Add("type", document.docType == Document.DocType.Article ? 1 : 0);
            docFeatures.Add("source", document.docInfo["DOC_ID"].Split("/")[0]);
            IJsonArrayBuilder sentences = Javax.Json.Json.CreateArrayBuilder();

            foreach (ICoreMap sentence in document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation)))
            {
                sentences.Add(GetSentenceArray(sentence.Get(typeof(CoreAnnotations.TokensAnnotation))));
            }
            IJsonObjectBuilder mentions = Javax.Json.Json.CreateObjectBuilder();

            foreach (Mention m_2 in document.predictedMentionsByID.Values)
            {
                IEnumerator <SemanticGraphEdge> iterator = m_2.enhancedDependency.IncomingEdgeIterator(m_2.headIndexedWord);
                SemanticGraphEdge relation    = iterator.MoveNext() ? iterator.Current : null;
                string            depRelation = relation == null ? "no-parent" : relation.GetRelation().ToString();
                string            depParent   = relation == null ? "<missing>" : relation.GetSource().Word();
                mentions.Add(m_2.mentionNum.ToString(), Javax.Json.Json.CreateObjectBuilder().Add("doc_id", id).Add("mention_id", m_2.mentionID).Add("mention_num", m_2.mentionNum).Add("sent_num", m_2.sentNum).Add("start_index", m_2.startIndex).Add("end_index"
                                                                                                                                                                                                                                                        , m_2.endIndex).Add("head_index", m_2.headIndex).Add("mention_type", m_2.mentionType.ToString()).Add("dep_relation", depRelation).Add("dep_parent", depParent).Add("sentence", GetSentenceArray(m_2.sentenceWords)).Add("contained-in-other-mention"
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                , mentionsByHeadIndex[m_2.headIndex].Stream().AnyMatch(null) ? 1 : 0).Build());
            }
            IJsonArrayBuilder featureNames = Javax.Json.Json.CreateArrayBuilder().Add("same-speaker").Add("antecedent-is-mention-speaker").Add("mention-is-antecedent-speaker").Add("relaxed-head-match").Add("exact-string-match").Add("relaxed-string-match"
                                                                                                                                                                                                                                        );
            IJsonObjectBuilder features = Javax.Json.Json.CreateObjectBuilder();
            IJsonObjectBuilder labels   = Javax.Json.Json.CreateObjectBuilder();

            foreach (KeyValuePair <Pair <int, int>, bool> e in mentionPairs)
            {
                Mention           m1      = document.predictedMentionsByID[e.Key.first];
                Mention           m2      = document.predictedMentionsByID[e.Key.second];
                string            key     = m1.mentionNum + " " + m2.mentionNum;
                IJsonArrayBuilder builder = Javax.Json.Json.CreateArrayBuilder();
                foreach (int val in CategoricalFeatureExtractor.PairwiseFeatures(document, m1, m2, dictionaries, conll))
                {
                    builder.Add(val);
                }
                features.Add(key, builder.Build());
                labels.Add(key, e.Value ? 1 : 0);
            }
            IJsonObject docData = Javax.Json.Json.CreateObjectBuilder().Add("sentences", sentences.Build()).Add("mentions", mentions.Build()).Add("labels", labels.Build()).Add("pair_feature_names", featureNames.Build()).Add("pair_features", features.Build
                                                                                                                                                                                                                                    ()).Add("document_features", docFeatures.Build()).Build();

            dataWriter.Println(docData);
        }