private void AddDependencyFeatures(ICounter <string> features, string prefix, SemanticGraphEdge e, bool addWord) { if (e == null) { features.IncrementCount("no-" + prefix); return; } IndexedWord parent = e.GetSource(); string parentPOS = parent.Tag(); string parentWord = parent.Word(); string parentRelation = e.GetRelation().ToString(); //String parentDir = e.getSource().beginPosition() < e.getTarget().beginPosition() // ? "right" : "left"; if (addWord) { features.IncrementCount(prefix + "-word=" + WordIndicator(parentWord, parentPOS)); } features.IncrementCount(prefix + "-POS=" + parentPOS); features.IncrementCount(prefix + "-relation=" + parentRelation); }
public virtual SimpleMatrix GetMentionEmbeddings(Mention m, SimpleMatrix docEmbedding) { IEnumerator <SemanticGraphEdge> depIterator = m.enhancedDependency.IncomingEdgeIterator(m.headIndexedWord); SemanticGraphEdge depRelation = depIterator.MoveNext() ? depIterator.Current : null; return(NeuralUtils.Concatenate(GetAverageEmbedding(m.sentenceWords, m.startIndex, m.endIndex), GetAverageEmbedding(m.sentenceWords, m.startIndex - 5, m.startIndex), GetAverageEmbedding(m.sentenceWords, m.endIndex, m.endIndex + 5), GetAverageEmbedding (m.sentenceWords.SubList(0, m.sentenceWords.Count - 1)), docEmbedding, GetWordEmbedding(m.sentenceWords, m.headIndex), GetWordEmbedding(m.sentenceWords, m.startIndex), GetWordEmbedding(m.sentenceWords, m.endIndex - 1), GetWordEmbedding(m.sentenceWords , m.startIndex - 1), GetWordEmbedding(m.sentenceWords, m.endIndex), GetWordEmbedding(m.sentenceWords, m.startIndex - 2), GetWordEmbedding(m.sentenceWords, m.endIndex + 1), GetWordEmbedding(depRelation == null ? null : depRelation.GetSource( ).Word()))); }
private ICounter <string> GetFeatures(Document doc, Mention m, IDictionary <int, IList <Mention> > mentionsByHeadIndex) { ICounter <string> features = new ClassicCounter <string>(); // type features features.IncrementCount("mention-type=" + m.mentionType); features.IncrementCount("gender=" + m.gender); features.IncrementCount("person-fine=" + m.person); features.IncrementCount("head-ne-type=" + m.nerString); IList <string> singletonFeatures = m.GetSingletonFeatures(dictionaries); foreach (KeyValuePair <int, string> e in SingletonFeatures) { if (e.Key < singletonFeatures.Count) { features.IncrementCount(e.Value + "=" + singletonFeatures[e.Key]); } } // length and location features AddNumeric(features, "mention-length", m.SpanToString().Length); AddNumeric(features, "mention-words", m.originalSpan.Count); AddNumeric(features, "sentence-words", m.sentenceWords.Count); features.IncrementCount("sentence-words=" + Bin(m.sentenceWords.Count)); features.IncrementCount("mention-position", m.mentionNum / (double)doc.predictedMentions.Count); features.IncrementCount("sentence-position", m.sentNum / (double)doc.numSentences); // lexical features CoreLabel firstWord = FirstWord(m); CoreLabel lastWord = LastWord(m); CoreLabel headWord = HeadWord(m); CoreLabel prevWord = PrevWord(m); CoreLabel nextWord = NextWord(m); CoreLabel prevprevWord = PrevprevWord(m); CoreLabel nextnextWord = NextnextWord(m); string headPOS = GetPOS(headWord); string firstPOS = GetPOS(firstWord); string lastPOS = GetPOS(lastWord); string prevPOS = GetPOS(prevWord); string nextPOS = GetPOS(nextWord); string prevprevPOS = GetPOS(prevprevWord); string nextnextPOS = GetPOS(nextnextWord); features.IncrementCount("first-word=" + WordIndicator(firstWord, firstPOS)); features.IncrementCount("last-word=" + WordIndicator(lastWord, lastPOS)); features.IncrementCount("head-word=" + WordIndicator(headWord, headPOS)); features.IncrementCount("next-word=" + WordIndicator(nextWord, nextPOS)); features.IncrementCount("prev-word=" + WordIndicator(prevWord, prevPOS)); features.IncrementCount("next-bigram=" + WordIndicator(nextWord, nextnextWord, nextPOS + "_" + nextnextPOS)); features.IncrementCount("prev-bigram=" + WordIndicator(prevprevWord, prevWord, prevprevPOS + "_" + prevPOS)); features.IncrementCount("next-pos=" + nextPOS); features.IncrementCount("prev-pos=" + prevPOS); features.IncrementCount("first-pos=" + firstPOS); features.IncrementCount("last-pos=" + lastPOS); features.IncrementCount("next-pos-bigram=" + nextPOS + "_" + nextnextPOS); features.IncrementCount("prev-pos-bigram=" + prevprevPOS + "_" + prevPOS); AddDependencyFeatures(features, "parent", GetDependencyParent(m), true); AddFeature(features, "ends-with-head", m.headIndex == m.endIndex - 1); AddFeature(features, "is-generic", m.originalSpan.Count == 1 && firstPOS.Equals("NNS")); // syntax features IndexedWord w = m.headIndexedWord; string depPath = string.Empty; int depth = 0; while (w != null) { SemanticGraphEdge e_1 = GetDependencyParent(m, w); depth++; if (depth <= 3 && e_1 != null) { depPath += (depPath.IsEmpty() ? string.Empty : "_") + e_1.GetRelation().ToString(); features.IncrementCount("dep-path=" + depPath); w = e_1.GetSource(); } else { w = null; } } if (useConstituencyParse) { int fullEmbeddingLevel = HeadEmbeddingLevel(m.contextParseTree, m.headIndex); int mentionEmbeddingLevel = HeadEmbeddingLevel(m.mentionSubTree, m.headIndex - m.startIndex); if (fullEmbeddingLevel != -1 && mentionEmbeddingLevel != -1) { features.IncrementCount("mention-embedding-level=" + Bin(fullEmbeddingLevel - mentionEmbeddingLevel)); features.IncrementCount("head-embedding-level=" + Bin(mentionEmbeddingLevel)); } else { features.IncrementCount("undetermined-embedding-level"); } features.IncrementCount("num-embedded-nps=" + Bin(NumEmbeddedNps(m.mentionSubTree))); string syntaxPath = string.Empty; Tree tree = m.contextParseTree; Tree head = tree.GetLeaves()[m.headIndex].Ancestor(1, tree); depth = 0; foreach (Tree node in tree.PathNodeToNode(head, tree)) { syntaxPath += node.Value() + "-"; features.IncrementCount("syntax-path=" + syntaxPath); depth++; if (depth >= 4 || node.Value().Equals("S")) { break; } } } // mention containment features AddFeature(features, "contained-in-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null)); AddFeature(features, "contains-other-mention", mentionsByHeadIndex[m.headIndex].Stream().AnyMatch(null)); // features from dcoref rules AddFeature(features, "bare-plural", m.originalSpan.Count == 1 && headPOS.Equals("NNS")); AddFeature(features, "quantifier-start", dictionaries.quantifiers.Contains(firstWord.Word().ToLower())); AddFeature(features, "negative-start", firstWord.Word().ToLower().Matches("none|no|nothing|not")); AddFeature(features, "partitive", RuleBasedCorefMentionFinder.PartitiveRule(m, m.sentenceWords, dictionaries)); AddFeature(features, "adjectival-demonym", dictionaries.IsAdjectivalDemonym(m.SpanToString())); if (doc.docType != Document.DocType.Article && m.person == Dictionaries.Person.You && nextWord != null && Sharpen.Runtime.EqualsIgnoreCase(nextWord.Word(), "know")) { features.IncrementCount("generic-you"); } return(features); }
public virtual void Process(int id, Document document) { IJsonArrayBuilder clusters = Javax.Json.Json.CreateArrayBuilder(); foreach (CorefCluster gold in document.goldCorefClusters.Values) { IJsonArrayBuilder c = Javax.Json.Json.CreateArrayBuilder(); foreach (Mention m in gold.corefMentions) { c.Add(m.mentionID); } clusters.Add(c.Build()); } goldClusterWriter.Println(Javax.Json.Json.CreateObjectBuilder().Add(id.ToString(), clusters.Build()).Build()); IDictionary <Pair <int, int>, bool> mentionPairs = CorefUtils.GetLabeledMentionPairs(document); IList <Mention> mentionsList = CorefUtils.GetSortedMentions(document); IDictionary <int, IList <Mention> > mentionsByHeadIndex = new Dictionary <int, IList <Mention> >(); foreach (Mention m_1 in mentionsList) { IList <Mention> withIndex = mentionsByHeadIndex.ComputeIfAbsent(m_1.headIndex, null); withIndex.Add(m_1); } IJsonObjectBuilder docFeatures = Javax.Json.Json.CreateObjectBuilder(); docFeatures.Add("doc_id", id); docFeatures.Add("type", document.docType == Document.DocType.Article ? 1 : 0); docFeatures.Add("source", document.docInfo["DOC_ID"].Split("/")[0]); IJsonArrayBuilder sentences = Javax.Json.Json.CreateArrayBuilder(); foreach (ICoreMap sentence in document.annotation.Get(typeof(CoreAnnotations.SentencesAnnotation))) { sentences.Add(GetSentenceArray(sentence.Get(typeof(CoreAnnotations.TokensAnnotation)))); } IJsonObjectBuilder mentions = Javax.Json.Json.CreateObjectBuilder(); foreach (Mention m_2 in document.predictedMentionsByID.Values) { IEnumerator <SemanticGraphEdge> iterator = m_2.enhancedDependency.IncomingEdgeIterator(m_2.headIndexedWord); SemanticGraphEdge relation = iterator.MoveNext() ? iterator.Current : null; string depRelation = relation == null ? "no-parent" : relation.GetRelation().ToString(); string depParent = relation == null ? "<missing>" : relation.GetSource().Word(); mentions.Add(m_2.mentionNum.ToString(), Javax.Json.Json.CreateObjectBuilder().Add("doc_id", id).Add("mention_id", m_2.mentionID).Add("mention_num", m_2.mentionNum).Add("sent_num", m_2.sentNum).Add("start_index", m_2.startIndex).Add("end_index" , m_2.endIndex).Add("head_index", m_2.headIndex).Add("mention_type", m_2.mentionType.ToString()).Add("dep_relation", depRelation).Add("dep_parent", depParent).Add("sentence", GetSentenceArray(m_2.sentenceWords)).Add("contained-in-other-mention" , mentionsByHeadIndex[m_2.headIndex].Stream().AnyMatch(null) ? 1 : 0).Build()); } IJsonArrayBuilder featureNames = Javax.Json.Json.CreateArrayBuilder().Add("same-speaker").Add("antecedent-is-mention-speaker").Add("mention-is-antecedent-speaker").Add("relaxed-head-match").Add("exact-string-match").Add("relaxed-string-match" ); IJsonObjectBuilder features = Javax.Json.Json.CreateObjectBuilder(); IJsonObjectBuilder labels = Javax.Json.Json.CreateObjectBuilder(); foreach (KeyValuePair <Pair <int, int>, bool> e in mentionPairs) { Mention m1 = document.predictedMentionsByID[e.Key.first]; Mention m2 = document.predictedMentionsByID[e.Key.second]; string key = m1.mentionNum + " " + m2.mentionNum; IJsonArrayBuilder builder = Javax.Json.Json.CreateArrayBuilder(); foreach (int val in CategoricalFeatureExtractor.PairwiseFeatures(document, m1, m2, dictionaries, conll)) { builder.Add(val); } features.Add(key, builder.Build()); labels.Add(key, e.Value ? 1 : 0); } IJsonObject docData = Javax.Json.Json.CreateObjectBuilder().Add("sentences", sentences.Build()).Add("mentions", mentions.Build()).Add("labels", labels.Build()).Add("pair_feature_names", featureNames.Build()).Add("pair_features", features.Build ()).Add("document_features", docFeatures.Build()).Build(); dataWriter.Println(docData); }