/// <summary>Determine the case of the pronoun "you" or "it".</summary> private static string PronounCase(SemanticGraph sg, IndexedWord word) { word = sg.GetNodeByIndex(word.Index()); IndexedWord parent = sg.GetParent(word); if (parent != null) { SemanticGraphEdge edge = sg.GetEdge(parent, word); if (edge != null) { if (UniversalEnglishGrammaticalRelations.Object.IsAncestor(edge.GetRelation())) { /* "you" is an object. */ return("Acc"); } else { if (UniversalEnglishGrammaticalRelations.NominalModifier.IsAncestor(edge.GetRelation()) || edge.GetRelation() == GrammaticalRelation.Root) { if (sg.HasChildWithReln(word, UniversalEnglishGrammaticalRelations.CaseMarker)) { /* "you" is the head of a prepositional phrase. */ return("Acc"); } } } } } return("Nom"); }
/// <summary>Check one mention is the speaker of the other mention</summary> public static bool IsSpeaker(Mention m, Mention ant, Dictionaries dict) { if (!dict.firstPersonPronouns.Contains(ant.SpanToString().ToLower()) || ant.number == Dictionaries.Number.Plural || ant.sentNum != m.sentNum) { return(false); } int countQuotationMark = 0; for (int i = System.Math.Min(m.headIndex, ant.headIndex) + 1; i < System.Math.Max(m.headIndex, ant.headIndex); i++) { string word = m.sentenceWords[i].Get(typeof(CoreAnnotations.TextAnnotation)); if (word.Equals("``") || word.Equals("''")) { countQuotationMark++; } } if (countQuotationMark != 1) { return(false); } IndexedWord w = m.dependency.GetNodeByWordPattern(m.sentenceWords[m.headIndex].Get(typeof(CoreAnnotations.TextAnnotation))); if (w == null) { return(false); } foreach (Pair <GrammaticalRelation, IndexedWord> parent in m.dependency.ParentPairs(w)) { if (parent.First().GetShortName().Equals("nsubj") && dict.reportVerb.Contains(parent.Second().Get(typeof(CoreAnnotations.LemmaAnnotation)))) { return(true); } } return(false); }
protected internal static ICollection <IndexedWord> Crawl(IndexedWord vertex, SemanticGraph sg) { ICollection <IndexedWord> seen = Generics.NewHashSet(); Crawl(vertex, sg, seen); return(seen); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord newNode = AddDep.FromCheapString(nodeString); sg.AddVertex(newNode); AddNamedNode(newNode, nodeName); }
// get descendants that have this relation private void DescendantsWithReln(SemanticGraph g, IndexedWord w, string relation, IList <IndexedWord> seenNodes, IList <IndexedWord> descendantSet) { if (seenNodes.Contains(w)) { return; } seenNodes.Add(w); if (descendantSet.Contains(w)) { return; } if (ignoreCommonTags && ignoreTags.Contains(w.Tag().Trim())) { return; } foreach (IndexedWord child in g.GetChildren(w)) { foreach (SemanticGraphEdge edge in g.GetAllEdges(w, child)) { if (edge.GetRelation().ToString().Equals(relation)) { descendantSet.Add(child); } } DescendantsWithReln(g, child, relation, seenNodes, descendantSet); } }
//public ExtractPhraseFromPattern(Namespace curNS) { // this.curNS = curNS; //} private bool CheckIfSatisfiedMaxDepth(SemanticGraph g, IndexedWord parent, IndexedWord child, IntPair depths) { if (depths.Get(0) == int.MaxValue) { return(true); } if (parent.Equals(child)) { return(true); } bool foundInMaxDepth = false; foreach (IndexedWord c in g.GetChildren(parent)) { if (c.Equals(child)) { return(true); } } depths.Set(1, depths.Get(1) + 1); if (depths.Get(1) >= depths.Get(0)) { return(false); } foreach (IndexedWord c_1 in g.GetChildren(parent)) { foundInMaxDepth = CheckIfSatisfiedMaxDepth(g, c_1, child, depths); if (foundInMaxDepth == true) { return(foundInMaxDepth); } } return(false); }
public NotenizerWord(IndexedWord indexedWord) { _indexedWord = indexedWord; _wordString = indexedWord.word(); _startingPosition = indexedWord.beginPosition(); _endPosition = indexedWord.endPosition(); _namedEntity = new NamedEntity(indexedWord.ner()); Object temp = indexedWord.get(typeof(CoreAnnotations.EndIndexAnnotation)); if (temp != null) _index = temp.ToInt(); else _index = -1; temp = indexedWord.get(typeof(CoreAnnotations.PartOfSpeechAnnotation)); if (temp != null) _pos = new PartOfSpeech(temp.ToString()); else _pos = new PartOfSpeech(String.Empty); temp = indexedWord.lemma(); if (temp != null) _lemma = temp.ToString(); else _lemma = String.Empty; }
public override bool Equals(object o) { if (o == this) { return(true); } if (!(o is IndexedWord)) { return(false); } IndexedWord other = (IndexedWord)o; if (this.Index != other.Index) { return(false); } object thisWord = this.Word; object otherWord = other.Word; if (thisWord == null ? otherWord != null : !thisWord.Equals(otherWord)) { return(false); } return(true); }
private void WriteWordLocations(IndexedWord fieldMatch) { WordLocation?lastLocation = null; foreach (var location in fieldMatch.Locations) { if (lastLocation != null) { var locationData = DeriveEntryStructureInformation(lastLocation.Value, location); if (locationData.structure == LocationEntryStructure.Full) { this.WriteLocationInFull(location); } else { this.WriteAbbreviatedLocationDetails(location.Length, locationData); } } else { this.WriteLocationInFull(location); } lastLocation = location; } }
private IntPair GetNPSpanOld(IndexedWord headword, SemanticGraph dep, IList <CoreLabel> sent) { IndexedWord cop = dep.GetChildWithReln(headword, UniversalEnglishGrammaticalRelations.Copula); Pair <IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.LeftRightMostChildVertices(headword, dep); // headword can be first or last word int beginIdx = Math.Min(headword.Index() - 1, leftRight.first.Index() - 1); int endIdx = Math.Max(headword.Index() - 1, leftRight.second.Index() - 1); // no copula relation if (cop == null) { return(new IntPair(beginIdx, endIdx)); } // if we have copula relation IList <IndexedWord> children = dep.GetChildList(headword); int copIdx = children.IndexOf(cop); if (copIdx + 1 < children.Count) { beginIdx = Math.Min(headword.Index() - 1, SemanticGraphUtils.LeftMostChildVertice(children[copIdx + 1], dep).Index() - 1); } else { beginIdx = headword.Index() - 1; } return(new IntPair(beginIdx, endIdx)); }
/// <summary> /// Will add the item to the set. If it matches (CompareTo), it'll replace the old value. /// </summary> /// <param name="item"></param> /// <returns></returns> public void Add(IndexedWord <T> item) { //empty list if (_list.First == null) { _list.AddLast(item); return; } //find its location for (LinkedListNode <IndexedWord <T> > node = _list.First; node != null; node = node.Next) { int compare = item.CompareTo(node.Value); // if compare to matches, add after if (compare == 0) { _list.AddAfter(node, item); return; } //otherwise if item comes before this else if (compare < 0) { _list.AddBefore(node, item); return; } } //otherwise it goes at the back _list.AddLast(item); }
/// <summary>Determine if a tree is cyclic.</summary> /// <param name="tree">The tree to check.</param> /// <returns>True if the tree has at least once cycle in it.</returns> public static bool IsCyclic(SemanticGraph tree) { foreach (IndexedWord vertex in tree.VertexSet()) { if (tree.GetRoots().Contains(vertex)) { continue; } IndexedWord node = tree.IncomingEdgeIterator(vertex).Current.GetGovernor(); ICollection <IndexedWord> seen = new HashSet <IndexedWord>(); seen.Add(vertex); while (node != null) { if (seen.Contains(node)) { return(true); } seen.Add(node); if (tree.IncomingEdgeIterator(node).MoveNext()) { node = tree.IncomingEdgeIterator(node).Current.GetGovernor(); } else { node = null; } } } return(false); }
/// <summary>Extracts features from relative and interrogative pronouns.</summary> private static Dictionary <string, string> GetRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word) { Dictionary <string, string> features = new Dictionary <string, string>(); if (word.Tag().StartsWith("W")) { bool isRel = false; IndexedWord parent = sg.GetParent(word); if (parent != null) { IndexedWord parentParent = sg.GetParent(parent); if (parentParent != null) { SemanticGraphEdge edge = sg.GetEdge(parentParent, parent); isRel = edge.GetRelation().Equals(UniversalEnglishGrammaticalRelations.RelativeClauseModifier); } } if (isRel) { features["PronType"] = "Rel"; } else { if (Sharpen.Runtime.EqualsIgnoreCase(word.Value(), "that")) { features["PronType"] = "Dem"; } else { features["PronType"] = "Int"; } } } return(features); }
public virtual void TestShortestPath() { //graph.prettyPrint(); IndexedWord word1 = graph.GetNodeByIndex(10); IndexedWord word2 = graph.GetNodeByIndex(14); // System.out.println("word1: " + word1); // System.out.println("word1: " + word1.hashCode()); // System.out.println("word2: " + word2); // System.out.println("word2: " + word2.hashCode()); // System.out.println("word eq: " + word1.equals(word2)); // System.out.println("word eq: " + (word1.hashCode() == word2.hashCode())); // System.out.println("word eq: " + (word1.toString().equals(word2.toString()))); IList <SemanticGraphEdge> edges = graph.GetShortestUndirectedPathEdges(word1, word2); // System.out.println("path: " + edges); NUnit.Framework.Assert.IsNotNull(edges); IList <IndexedWord> nodes = graph.GetShortestUndirectedPathNodes(word1, word2); // System.out.println("path: " + nodes); NUnit.Framework.Assert.IsNotNull(nodes); NUnit.Framework.Assert.AreEqual(word1, nodes[0]); NUnit.Framework.Assert.AreEqual(word2, nodes[nodes.Count - 1]); edges = graph.GetShortestUndirectedPathEdges(word1, word1); // System.out.println("path: " + edges); NUnit.Framework.Assert.IsNotNull(edges); NUnit.Framework.Assert.AreEqual(0, edges.Count); nodes = graph.GetShortestUndirectedPathNodes(word1, word1); // System.out.println("path: " + nodes); NUnit.Framework.Assert.IsNotNull(nodes); NUnit.Framework.Assert.AreEqual(1, nodes.Count); NUnit.Framework.Assert.AreEqual(word1, nodes[0]); }
// TODO: implement referencing regexes public static SemanticGraph MakeComplicatedGraph() { SemanticGraph graph = new SemanticGraph(); string[] words = new string[] { "A", "B", "C", "D", "E", "F", "G", "H", "I", "J" }; IndexedWord[] nodes = new IndexedWord[words.Length]; for (int i = 0; i < words.Length; ++i) { IndexedWord word = new IndexedWord("test", 1, i + 1); word.SetWord(words[i]); word.SetValue(words[i]); nodes[i] = word; graph.AddVertex(word); } graph.SetRoot(nodes[0]); // this graph isn't supposed to make sense graph.AddEdge(nodes[0], nodes[1], EnglishGrammaticalRelations.Modifier, 1.0, false); graph.AddEdge(nodes[0], nodes[2], EnglishGrammaticalRelations.DirectObject, 1.0, false); graph.AddEdge(nodes[0], nodes[3], EnglishGrammaticalRelations.IndirectObject, 1.0, false); graph.AddEdge(nodes[1], nodes[4], EnglishGrammaticalRelations.Marker, 1.0, false); graph.AddEdge(nodes[2], nodes[4], EnglishGrammaticalRelations.Expletive, 1.0, false); graph.AddEdge(nodes[3], nodes[4], EnglishGrammaticalRelations.AdjectivalComplement, 1.0, false); graph.AddEdge(nodes[4], nodes[5], EnglishGrammaticalRelations.AdjectivalModifier, 1.0, false); graph.AddEdge(nodes[4], nodes[6], EnglishGrammaticalRelations.AdverbialModifier, 1.0, false); graph.AddEdge(nodes[4], nodes[8], EnglishGrammaticalRelations.Modifier, 1.0, false); graph.AddEdge(nodes[5], nodes[7], EnglishGrammaticalRelations.PossessionModifier, 1.0, false); graph.AddEdge(nodes[6], nodes[7], EnglishGrammaticalRelations.PossessiveModifier, 1.0, false); graph.AddEdge(nodes[7], nodes[8], EnglishGrammaticalRelations.Agent, 1.0, false); graph.AddEdge(nodes[8], nodes[9], EnglishGrammaticalRelations.Determiner, 1.0, false); return(graph); }
private static ICollection <IList <TypedDependency> > GetGovMaxChains(IDictionary <IndexedWord, IList <TypedDependency> > govToDepMap, IndexedWord gov, int depth) { ICollection <IList <TypedDependency> > depLists = Generics.NewHashSet(); IList <TypedDependency> children = govToDepMap[gov]; if (depth > 0 && children != null) { foreach (TypedDependency child in children) { IndexedWord childNode = child.Dep(); if (childNode == null) { continue; } ICollection <IList <TypedDependency> > childDepLists = GetGovMaxChains(govToDepMap, childNode, depth - 1); if (childDepLists.Count != 0) { foreach (IList <TypedDependency> childDepList in childDepLists) { IList <TypedDependency> depList = new List <TypedDependency>(childDepList.Count + 1); depList.Add(child); Sharpen.Collections.AddAll(depList, childDepList); depLists.Add(depList); } } else { depLists.Add(Arrays.AsList(child)); } } } return(depLists); }
private void FormatSGNodeOnelineHelper(SemanticGraph sg, IndexedWord node, StringBuilder sb, ICollection <IndexedWord> usedOneline) { usedOneline.Add(node); bool isntLeaf = (sg.OutDegree(node) > 0); if (isntLeaf) { sb.Append(Lparen); } sb.Append(FormatLabel(node)); foreach (SemanticGraphEdge depcy in sg.GetOutEdgesSorted(node)) { IndexedWord dep = depcy.GetDependent(); sb.Append(Space); if (showRelns) { sb.Append(depcy.GetRelation()); sb.Append(Colon); } if (!usedOneline.Contains(dep) && !used.Contains(dep)) { // avoid infinite loop FormatSGNodeOnelineHelper(sg, dep, sb, usedOneline); } else { sb.Append(FormatLabel(dep)); } } if (isntLeaf) { sb.Append(Rparen); } }
public AddDep(string govNodeName, GrammaticalRelation relation, IndexedWord newNodePrototype) { this.newNodePrototype = newNodePrototype; this.relation = relation; this.govNodeName = govNodeName; this.weight = 0; }
// Simple mapping of all the stuff we care about (until IndexedFeatureLabel --> CoreLabel map pain is fixed) /// <summary>This converts the node into a simple string based representation.</summary> /// <remarks> /// This converts the node into a simple string based representation. /// NOTE: this is extremely brittle, and presumes values do not contain delimiters /// </remarks> public static string CheapWordToString(IndexedWord node) { StringWriter buf = new StringWriter(); buf.Write("{"); buf.Write(WordKey); buf.Write(TupleDelimiter); buf.Write(NullShield(node.Word())); buf.Write(AtomDelimiter); buf.Write(LemmaKey); buf.Write(TupleDelimiter); buf.Write(NullShield(node.Lemma())); buf.Write(AtomDelimiter); buf.Write(PosKey); buf.Write(TupleDelimiter); buf.Write(NullShield(node.Tag())); buf.Write(AtomDelimiter); buf.Write(ValueKey); buf.Write(TupleDelimiter); buf.Write(NullShield(node.Value())); buf.Write(AtomDelimiter); buf.Write(CurrentKey); buf.Write(TupleDelimiter); buf.Write(NullShield(node.OriginalText())); buf.Write("}"); return(buf.ToString()); }
/// <summary>Given the node arg string, converts it into an IndexedWord.</summary> public static IndexedWord FromCheapString(string rawArg) { string arg = Sharpen.Runtime.Substring(rawArg, 1, rawArg.Length - 1); string[] tuples = arg.Split(AtomDelimiter); IDictionary <string, string> args = Generics.NewHashMap(); foreach (string tuple in tuples) { string[] vals = tuple.Split(TupleDelimiter); string key = vals[0]; string value = string.Empty; if (vals.Length == 2) { value = vals[1]; } args[key] = value; } IndexedWord newWord = new IndexedWord(); newWord.SetWord(args[WordKey]); newWord.SetLemma(args[LemmaKey]); newWord.SetTag(args[PosKey]); newWord.SetValue(args[ValueKey]); newWord.SetOriginalText(args[CurrentKey]); return(newWord); }
public TypedDependency(Edu.Stanford.Nlp.Trees.TypedDependency other) { this.reln = other.reln; this.gov = other.gov; this.dep = other.dep; this.extra = other.extra; }
/// <summary>Parse a JSON formatted tree into a SemanticGraph.</summary> /// <param name="jsonString"> /// The JSON string tree to parse, e.g: /// "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]"); /// </param> /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param> /// <returns>A semantic graph of the sentence, according to the given tree.</returns> public static SemanticGraph ParseJsonTree(string jsonString, IList <CoreLabel> tokens) { // Escape quoted string parts IJsonReader json = Javax.Json.Json.CreateReader(new StringReader(jsonString)); SemanticGraph tree = new SemanticGraph(); IJsonArray array = json.ReadArray(); if (array == null || array.IsEmpty()) { return(tree); } IndexedWord[] vertices = new IndexedWord[tokens.Count + 2]; // Add edges for (int i = 0; i < array.Count; i++) { IJsonObject entry = array.GetJsonObject(i); // Parse row int dependentIndex = entry.GetInt("dependent"); if (vertices[dependentIndex] == null) { if (dependentIndex > tokens.Count) { // Bizarre mismatch in sizes; the malt parser seems to do this often return(new SemanticGraph()); } vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]); } IndexedWord dependent = vertices[dependentIndex]; int governorIndex = entry.GetInt("governor"); if (governorIndex > tokens.Count) { // Bizarre mismatch in sizes; the malt parser seems to do this often return(new SemanticGraph()); } if (vertices[governorIndex] == null && governorIndex > 0) { vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]); } IndexedWord governor = vertices[governorIndex]; string relation = entry.GetString("dep"); // Process row if (governorIndex == 0) { tree.AddRoot(dependent); } else { tree.AddVertex(dependent); if (!tree.ContainsVertex(governor)) { tree.AddVertex(governor); } if (!"ref".Equals(relation)) { tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false); } } } return(tree); }
/// <summary>Generate the training features from the CoNLL input file.</summary> /// <returns>Dataset of feature vectors</returns> /// <exception cref="System.Exception"/> private static GeneralDataset <string, string> GenerateFeatureVectors(Properties props) { GeneralDataset <string, string> dataset = new Dataset <string, string>(); Dictionaries dict = new Dictionaries(props); DocumentMaker docMaker = new DocumentMaker(props, dict); Document document; while ((document = docMaker.NextDoc()) != null) { SetTokenIndices(document); IDictionary <int, CorefCluster> entities = document.goldCorefClusters; // Generate features for coreferent mentions with class label 1 foreach (CorefCluster entity in entities.Values) { foreach (Mention mention in entity.GetCorefMentions()) { // Ignore verbal mentions if (mention.headWord.Tag().StartsWith("V")) { continue; } IndexedWord head = mention.enhancedDependency.GetNodeByIndexSafe(mention.headWord.Index()); if (head == null) { continue; } List <string> feats = mention.GetSingletonFeatures(dict); dataset.Add(new BasicDatum <string, string>(feats, "1")); } } // Generate features for singletons with class label 0 List <CoreLabel> gold_heads = new List <CoreLabel>(); foreach (Mention gold_men in document.goldMentionsByID.Values) { gold_heads.Add(gold_men.headWord); } foreach (Mention predicted_men in document.predictedMentionsByID.Values) { SemanticGraph dep = predicted_men.enhancedDependency; IndexedWord head = dep.GetNodeByIndexSafe(predicted_men.headWord.Index()); if (head == null || !dep.VertexSet().Contains(head)) { continue; } // Ignore verbal mentions if (predicted_men.headWord.Tag().StartsWith("V")) { continue; } // If the mention is in the gold set, it is not a singleton and thus ignore if (gold_heads.Contains(predicted_men.headWord)) { continue; } dataset.Add(new BasicDatum <string, string>(predicted_men.GetSingletonFeatures(dict), "0")); } } dataset.SummaryStatistics(); return(dataset); }
/// <summary> /// Constructs and returns a new Alignment from the given hypothesis /// <c>SemanticGraph</c> /// to the given text (passage) SemanticGraph, using /// the given array of indexes. The i'th node of the array should contain the /// index of the node in the text (passage) SemanticGraph to which the i'th /// node in the hypothesis SemanticGraph is aligned, or -1 if it is aligned to /// NO_WORD. /// </summary> public static Edu.Stanford.Nlp.Semgraph.Semgrex.Alignment MakeFromIndexArray(SemanticGraph txtGraph, SemanticGraph hypGraph, int[] indexes, double score, string justification) { if (txtGraph == null || txtGraph.IsEmpty()) { throw new ArgumentException("Invalid txtGraph " + txtGraph); } if (hypGraph == null || hypGraph.IsEmpty()) { throw new ArgumentException("Invalid hypGraph " + hypGraph); } if (indexes == null) { throw new ArgumentException("Null index array"); } if (indexes.Length != hypGraph.Size()) { throw new ArgumentException("Index array length " + indexes.Length + " does not match hypGraph size " + hypGraph.Size()); } IDictionary <IndexedWord, IndexedWord> map = Generics.NewHashMap(); for (int i = 0; i < indexes.Length; i++) { IndexedWord hypNode = hypGraph.GetNodeByIndex(i); IndexedWord txtNode = IndexedWord.NoWord; if (indexes[i] >= 0) { txtNode = txtGraph.GetNodeByIndex(indexes[i]); } map[hypNode] = txtNode; } return(new Edu.Stanford.Nlp.Semgraph.Semgrex.Alignment(map, score, justification)); }
private string FindNextParagraphSpeaker(IList <ICoreMap> paragraph, int paragraphOffset, Dictionaries dict) { ICoreMap lastSent = paragraph[paragraph.Count - 1]; string speaker = string.Empty; foreach (CoreLabel w in lastSent.Get(typeof(CoreAnnotations.TokensAnnotation))) { if (w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("report") || w.Get(typeof(CoreAnnotations.LemmaAnnotation)).Equals("say")) { string word = w.Get(typeof(CoreAnnotations.TextAnnotation)); SemanticGraph dependency = lastSent.Get(typeof(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation)); IndexedWord t = dependency.GetNodeByWordPattern(word); foreach (Pair <GrammaticalRelation, IndexedWord> child in dependency.ChildPairs(t)) { if (child.First().GetShortName().Equals("nsubj")) { int subjectIndex = child.Second().Index(); // start from 1 IntTuple headPosition = new IntTuple(2); headPosition.Set(0, paragraph.Count - 1 + paragraphOffset); headPosition.Set(1, subjectIndex - 1); if (mentionheadPositions.Contains(headPosition) && mentionheadPositions[headPosition].nerString.StartsWith("PER")) { speaker = int.ToString(mentionheadPositions[headPosition].mentionID); } } } } } return(speaker); }
public int CompareTo(TypedDependency tdArg) { IndexedWord depArg = tdArg.Dep; IndexedWord depThis = this.Dep; int indexArg = depArg.Index(); int indexThis = depThis.Index(); if (indexThis > indexArg) { return(1); } else if (indexThis < indexArg) { return(-1); } // dependent indices are equal, check governor int govIndexArg = tdArg.Gov.Index(); int govIndexThis = this.Gov.Index(); if (govIndexThis > govIndexArg) { return(1); } else if (govIndexThis < govIndexArg) { return(-1); } // dependent and governor indices equal, the relation decides return(this.Reln.CompareTo(tdArg.Reln)); }
/// <summary>Creates an EnglishGrammaticalRelation AddDep edit.</summary> /// <param name="newNode">String representation of new dependent IndexedFeatureNode map.</param> public static Edu.Stanford.Nlp.Semgraph.Semgrex.Ssurgeon.AddDep CreateEngAddDep(string govNodeName, string engRelation, string newNode) { GrammaticalRelation relation = EnglishGrammaticalRelations.ValueOf(engRelation); // IndexedWord newNodeObj = new IndexedWord(CoreLabel.fromAbstractMapLabel(IndexedFeatureLabel.valueOf(newNode, MapFactory.HASH_MAP_FACTORY))); IndexedWord newNodeObj = FromCheapString(newNode); return(new Edu.Stanford.Nlp.Semgraph.Semgrex.Ssurgeon.AddDep(govNodeName, relation, newNodeObj)); }
private static string IwToString(IndexedWord iw) { if (iw == null || iw.Equals(IndexedWord.NoWord)) { return("_"); } return(iw.ToString(CoreLabel.OutputFormat.Value)); }
private string FormatSGNodeOneline(SemanticGraph sg, IndexedWord node) { StringBuilder sb = new StringBuilder(); ICollection <IndexedWord> usedOneline = Generics.NewHashSet(); FormatSGNodeOnelineHelper(sg, node, sb, usedOneline); return(sb.ToString()); }
/// <summary>Parse a CoNLL formatted tree into a SemanticGraph.</summary> /// <param name="conll">The CoNLL tree to parse.</param> /// <param name="tokens">The tokens of the sentence, to form the backing labels of the tree.</param> /// <returns>A semantic graph of the sentence, according to the given tree.</returns> public static SemanticGraph ParseTree(string conll, IList <CoreLabel> tokens) { SemanticGraph tree = new SemanticGraph(); if (conll == null || conll.IsEmpty()) { return(tree); } string[] treeLines = newline.Split(conll); IndexedWord[] vertices = new IndexedWord[tokens.Count + 2]; // Add edges foreach (string line in treeLines) { // Parse row string[] fields = tab.Split(line); int dependentIndex = System.Convert.ToInt32(fields[0]); if (vertices[dependentIndex] == null) { if (dependentIndex > tokens.Count) { // Bizarre mismatch in sizes; the malt parser seems to do this often return(new SemanticGraph()); } vertices[dependentIndex] = new IndexedWord(tokens[dependentIndex - 1]); } IndexedWord dependent = vertices[dependentIndex]; int governorIndex = System.Convert.ToInt32(fields[1]); if (governorIndex > tokens.Count) { // Bizarre mismatch in sizes; the malt parser seems to do this often return(new SemanticGraph()); } if (vertices[governorIndex] == null && governorIndex > 0) { vertices[governorIndex] = new IndexedWord(tokens[governorIndex - 1]); } IndexedWord governor = vertices[governorIndex]; string relation = fields[2]; // Process row if (governorIndex == 0) { tree.AddRoot(dependent); } else { tree.AddVertex(dependent); if (!tree.ContainsVertex(governor)) { tree.AddVertex(governor); } if (!"ref".Equals(relation)) { tree.AddEdge(governor, dependent, GrammaticalRelation.ValueOf(Language.English, relation), double.NegativeInfinity, false); } } } return(tree); }
public override void Evaluate(SemanticGraph sg, SemgrexMatcher sm) { IndexedWord tgtNode = GetNamedNode(nodeName, sm); foreach (SemanticGraphEdge edge in sg.IncomingEdgeIterable(tgtNode)) { sg.RemoveEdge(edge); } }
/// <summary> /// Given a list of typedDependencies, returns true if the node "node" is the /// governor of a conj relation with a dependent which is not a preposition /// </summary> /// <param name="node">A node in this GrammaticalStructure</param> /// <param name="list">A list of typedDependencies</param> /// <returns> /// true If node is the governor of a conj relation in the list with the dep not being a preposition /// </returns> private static bool IsConjWithNoPrep(IndexedWord node, List<TypedDependency> list) { foreach (TypedDependency td in list) { if (td.Gov.Equals(node) && td.Reln == EnglishGrammaticalRelations.Conjunct) { // we have a conjunct // check the POS of the dependent string tdDepPos = td.Dep.Tag(); if (!(tdDepPos == PartsOfSpeech.PrepositionOrSubordinateConjunction || tdDepPos == PartsOfSpeech.To)) { return true; } } } return false; }