public static State InitialStateFromTaggedSentence <_T0>(IList <_T0> words) where _T0 : IHasWord { IList <Tree> preterminals = Generics.NewArrayList(); for (int index = 0; index < words.Count; ++index) { IHasWord hw = words[index]; CoreLabel wordLabel; string tag; if (hw is CoreLabel) { wordLabel = (CoreLabel)hw; tag = wordLabel.Tag(); } else { wordLabel = new CoreLabel(); wordLabel.SetValue(hw.Word()); wordLabel.SetWord(hw.Word()); if (!(hw is IHasTag)) { throw new ArgumentException("Expected tagged words"); } tag = ((IHasTag)hw).Tag(); wordLabel.SetTag(tag); } if (tag == null) { throw new ArgumentException("Input word not tagged"); } CoreLabel tagLabel = new CoreLabel(); tagLabel.SetValue(tag); // Index from 1. Tools downstream from the parser expect that // Internally this parser uses the index, so we have to // overwrite incorrect indices if the label is already indexed wordLabel.SetIndex(index + 1); tagLabel.SetIndex(index + 1); LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel); LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel); tagNode.AddChild(wordNode); // TODO: can we get away with not setting these on the wordLabel? wordLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel); wordLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel); tagLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel); tagLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel); preterminals.Add(tagNode); } return(new State(preterminals)); }
internal static Tree CreateNode(Tree top, string label, params Tree[] children) { CoreLabel headLabel = (CoreLabel)top.Label(); CoreLabel production = new CoreLabel(); production.SetValue(label); production.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation))); production.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation))); Tree newTop = new LabeledScoredTreeNode(production); foreach (Tree child in children) { newTop.AddChild(child); } return(newTop); }
/// <summary>Add a binary node to the existing node on top of the stack</summary> public virtual State Apply(State state, double scoreDelta) { TreeShapedStack <Tree> stack = state.stack; Tree right = stack.Peek(); stack = stack.Pop(); Tree left = stack.Peek(); stack = stack.Pop(); Tree head; switch (side) { case BinaryTransition.Side.Left: { head = left; break; } case BinaryTransition.Side.Right: { head = right; break; } default: { throw new ArgumentException("Unknown side " + side); } } if (!(head.Label() is CoreLabel)) { throw new ArgumentException("Stack should have CoreLabel nodes"); } CoreLabel headLabel = (CoreLabel)head.Label(); CoreLabel production = new CoreLabel(); production.SetValue(label); production.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation))); production.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation))); Tree newTop = new LabeledScoredTreeNode(production); newTop.AddChild(left); newTop.AddChild(right); stack = stack.Push(newTop); return(new State(stack, state.transitions.Push(this), state.separators, state.sentence, state.tokenPosition, state.score + scoreDelta, false)); }
/// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary> internal ParseResult(Annotation annotation) { java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList; CoreMap sentence = sentences.get(0) as CoreMap; LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode; // Skip the ROOT Tree childOfRoot = constituencyParse.firstChild(); Constituents = childOfRoot; Constituents.indexLeaves(); // Build the collection of tokens var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList; var mentions = sentence.get(MentionsAnnotationClass); for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++) { CoreLabel source = parsedTokens.get(tokenIndex) as CoreLabel; var tokenMentions = source.get(MentionTokenAnnotationClass); var tokenGender = source.get(GenderAnnotationClass); Tokens.Add(new ParseToken { Index = source.index(), Word = source.word(), Lemma = source.lemma(), PartOfSpeech = source.get(PartOfSpeechAnnotationClass) as string, NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string, }); } // Create the list of dependencies between tokens SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph; //java.util.List dependencies = dependencyGraph.edgeListSorted(); java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator(); while (dependencyGraphEdges.hasNext()) { SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge; string relationName = edge.getRelation().getShortName(); string relationSpecifier = edge.getRelation().getSpecific(); IndexedWord governor = edge.getGovernor(); IndexedWord dependent = edge.getDependent(); Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index())); } }
// static class public static Tree ConvertTree(IList <int> parentPointers, IList <string> sentence, IDictionary <IList <string>, int> phraseIds, IDictionary <int, double> sentimentScores, PTBEscapingProcessor escaper, int numClasses) { int maxNode = 0; foreach (int parent in parentPointers) { maxNode = Math.Max(maxNode, parent); } Tree[] subtrees = new Tree[maxNode + 1]; for (int i = 0; i < sentence.Count; ++i) { CoreLabel word = new CoreLabel(); word.SetValue(sentence[i]); Tree leaf = new LabeledScoredTreeNode(word); subtrees[i] = new LabeledScoredTreeNode(new CoreLabel()); subtrees[i].AddChild(leaf); } for (int i_1 = sentence.Count; i_1 <= maxNode; ++i_1) { subtrees[i_1] = new LabeledScoredTreeNode(new CoreLabel()); } bool[] connected = new bool[maxNode + 1]; Tree root = null; for (int index = 0; index < parentPointers.Count; ++index) { if (parentPointers[index] == -1) { if (root != null) { throw new Exception("Found two roots for sentence " + sentence); } root = subtrees[index]; } else { // Walk up the tree structure to make sure that leftmost // phrases are added first. Otherwise, if the numbers are // inverted, we might get the right phrase added to a parent // first, resulting in "case zero in this", for example, // instead of "in this case zero" // Note that because we keep track of which ones are already // connected, we process this at most once per parent, so the // overall construction time is still efficient. Connect(parentPointers, subtrees, connected, index); } } for (int i_2 = 0; i_2 <= maxNode; ++i_2) { IList <Tree> leaves = subtrees[i_2].GetLeaves(); IList <string> words = CollectionUtils.TransformAsList(leaves, TransformTreeToWord); // First we look for a copy of the phrase with -LRB- -RRB- // instead of (). The sentiment trees sometimes have both, and // the escaped versions seem to have more reasonable scores. // If a particular phrase doesn't have -LRB- -RRB- we fall back // to the unescaped versions. int phraseId = phraseIds[CollectionUtils.TransformAsList(words, TransformParens)]; if (phraseId == null) { phraseId = phraseIds[words]; } if (phraseId == null) { throw new Exception("Could not find phrase id for phrase " + sentence); } // TODO: should we make this an option? Perhaps we want cases // where the trees have the phrase id and not their class double score = sentimentScores[phraseId]; if (score == null) { throw new Exception("Could not find sentiment score for phrase id " + phraseId); } // TODO: make this a numClasses option int classLabel = Math.Round((float)Math.Floor(score * (float)numClasses)); if (classLabel > numClasses - 1) { classLabel = numClasses - 1; } subtrees[i_2].Label().SetValue(int.ToString(classLabel)); } for (int i_3 = 0; i_3 < sentence.Count; ++i_3) { Tree leaf = subtrees[i_3].Children()[0]; leaf.Label().SetValue(escaper.EscapeString(leaf.Label().Value())); } for (int i_4 = 0; i_4 < tregexPatterns.Length; ++i_4) { root = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPattern(tregexPatterns[i_4], tsurgeonPatterns[i_4], root); } return(root); }