Пример #1
0
        public static State InitialStateFromTaggedSentence <_T0>(IList <_T0> words)
            where _T0 : IHasWord
        {
            IList <Tree> preterminals = Generics.NewArrayList();

            for (int index = 0; index < words.Count; ++index)
            {
                IHasWord  hw = words[index];
                CoreLabel wordLabel;
                string    tag;
                if (hw is CoreLabel)
                {
                    wordLabel = (CoreLabel)hw;
                    tag       = wordLabel.Tag();
                }
                else
                {
                    wordLabel = new CoreLabel();
                    wordLabel.SetValue(hw.Word());
                    wordLabel.SetWord(hw.Word());
                    if (!(hw is IHasTag))
                    {
                        throw new ArgumentException("Expected tagged words");
                    }
                    tag = ((IHasTag)hw).Tag();
                    wordLabel.SetTag(tag);
                }
                if (tag == null)
                {
                    throw new ArgumentException("Input word not tagged");
                }
                CoreLabel tagLabel = new CoreLabel();
                tagLabel.SetValue(tag);
                // Index from 1.  Tools downstream from the parser expect that
                // Internally this parser uses the index, so we have to
                // overwrite incorrect indices if the label is already indexed
                wordLabel.SetIndex(index + 1);
                tagLabel.SetIndex(index + 1);
                LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
                LabeledScoredTreeNode tagNode  = new LabeledScoredTreeNode(tagLabel);
                tagNode.AddChild(wordNode);
                // TODO: can we get away with not setting these on the wordLabel?
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                wordLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), wordLabel);
                tagLabel.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), tagLabel);
                preterminals.Add(tagNode);
            }
            return(new State(preterminals));
        }
        internal static Tree CreateNode(Tree top, string label, params Tree[] children)
        {
            CoreLabel headLabel  = (CoreLabel)top.Label();
            CoreLabel production = new CoreLabel();

            production.SetValue(label);
            production.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation)));
            production.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation)));
            Tree newTop = new LabeledScoredTreeNode(production);

            foreach (Tree child in children)
            {
                newTop.AddChild(child);
            }
            return(newTop);
        }
        /// <summary>Add a binary node to the existing node on top of the stack</summary>
        public virtual State Apply(State state, double scoreDelta)
        {
            TreeShapedStack <Tree> stack = state.stack;
            Tree right = stack.Peek();

            stack = stack.Pop();
            Tree left = stack.Peek();

            stack = stack.Pop();
            Tree head;

            switch (side)
            {
            case BinaryTransition.Side.Left:
            {
                head = left;
                break;
            }

            case BinaryTransition.Side.Right:
            {
                head = right;
                break;
            }

            default:
            {
                throw new ArgumentException("Unknown side " + side);
            }
            }
            if (!(head.Label() is CoreLabel))
            {
                throw new ArgumentException("Stack should have CoreLabel nodes");
            }
            CoreLabel headLabel  = (CoreLabel)head.Label();
            CoreLabel production = new CoreLabel();

            production.SetValue(label);
            production.Set(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadWordLabelAnnotation)));
            production.Set(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation), headLabel.Get(typeof(TreeCoreAnnotations.HeadTagLabelAnnotation)));
            Tree newTop = new LabeledScoredTreeNode(production);

            newTop.AddChild(left);
            newTop.AddChild(right);
            stack = stack.Push(newTop);
            return(new State(stack, state.transitions.Push(this), state.separators, state.sentence, state.tokenPosition, state.score + scoreDelta, false));
        }
Пример #4
0
        /// <summary>Return a new <see cref="ParseResult"/> constructed from <paramref name="annotation"/></summary>
        internal ParseResult(Annotation annotation)
        {
            java.util.AbstractList sentences = annotation.get(SentencesAnnotationClass) as java.util.AbstractList;
            CoreMap sentence = sentences.get(0) as CoreMap;
            LabeledScoredTreeNode constituencyParse = sentence.get(TreeAnnotationClass) as LabeledScoredTreeNode;
            // Skip the ROOT
            Tree childOfRoot = constituencyParse.firstChild();

            Constituents = childOfRoot;
            Constituents.indexLeaves();

            // Build the collection of tokens
            var parsedTokens = sentence.get(TokensAnnotationClass) as java.util.AbstractList;
            var mentions     = sentence.get(MentionsAnnotationClass);

            for (int tokenIndex = 0; tokenIndex < parsedTokens.size(); tokenIndex++)
            {
                CoreLabel source        = parsedTokens.get(tokenIndex) as CoreLabel;
                var       tokenMentions = source.get(MentionTokenAnnotationClass);
                var       tokenGender   = source.get(GenderAnnotationClass);
                Tokens.Add(new ParseToken
                {
                    Index            = source.index(),
                    Word             = source.word(),
                    Lemma            = source.lemma(),
                    PartOfSpeech     = source.get(PartOfSpeechAnnotationClass) as string,
                    NamedEntityClass = source.get(NamedEntityTagAnnotationClass) as string,
                });
            }

            // Create the list of dependencies between tokens
            SemanticGraph dependencyGraph = sentence.get(DependencyAnnotationClass) as SemanticGraph;

            //java.util.List dependencies = dependencyGraph.edgeListSorted();
            java.util.Iterator dependencyGraphEdges = dependencyGraph.edgeIterable().iterator();
            while (dependencyGraphEdges.hasNext())
            {
                SemanticGraphEdge edge = dependencyGraphEdges.next() as SemanticGraphEdge;

                string      relationName      = edge.getRelation().getShortName();
                string      relationSpecifier = edge.getRelation().getSpecific();
                IndexedWord governor          = edge.getGovernor();
                IndexedWord dependent         = edge.getDependent();

                Dependencies.Add((relationName, relationSpecifier, governor.index(), dependent.index()));
            }
        }
        // static class
        public static Tree ConvertTree(IList <int> parentPointers, IList <string> sentence, IDictionary <IList <string>, int> phraseIds, IDictionary <int, double> sentimentScores, PTBEscapingProcessor escaper, int numClasses)
        {
            int maxNode = 0;

            foreach (int parent in parentPointers)
            {
                maxNode = Math.Max(maxNode, parent);
            }
            Tree[] subtrees = new Tree[maxNode + 1];
            for (int i = 0; i < sentence.Count; ++i)
            {
                CoreLabel word = new CoreLabel();
                word.SetValue(sentence[i]);
                Tree leaf = new LabeledScoredTreeNode(word);
                subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
                subtrees[i].AddChild(leaf);
            }
            for (int i_1 = sentence.Count; i_1 <= maxNode; ++i_1)
            {
                subtrees[i_1] = new LabeledScoredTreeNode(new CoreLabel());
            }
            bool[] connected = new bool[maxNode + 1];
            Tree   root      = null;

            for (int index = 0; index < parentPointers.Count; ++index)
            {
                if (parentPointers[index] == -1)
                {
                    if (root != null)
                    {
                        throw new Exception("Found two roots for sentence " + sentence);
                    }
                    root = subtrees[index];
                }
                else
                {
                    // Walk up the tree structure to make sure that leftmost
                    // phrases are added first.  Otherwise, if the numbers are
                    // inverted, we might get the right phrase added to a parent
                    // first, resulting in "case zero in this", for example,
                    // instead of "in this case zero"
                    // Note that because we keep track of which ones are already
                    // connected, we process this at most once per parent, so the
                    // overall construction time is still efficient.
                    Connect(parentPointers, subtrees, connected, index);
                }
            }
            for (int i_2 = 0; i_2 <= maxNode; ++i_2)
            {
                IList <Tree>   leaves = subtrees[i_2].GetLeaves();
                IList <string> words  = CollectionUtils.TransformAsList(leaves, TransformTreeToWord);
                // First we look for a copy of the phrase with -LRB- -RRB-
                // instead of ().  The sentiment trees sometimes have both, and
                // the escaped versions seem to have more reasonable scores.
                // If a particular phrase doesn't have -LRB- -RRB- we fall back
                // to the unescaped versions.
                int phraseId = phraseIds[CollectionUtils.TransformAsList(words, TransformParens)];
                if (phraseId == null)
                {
                    phraseId = phraseIds[words];
                }
                if (phraseId == null)
                {
                    throw new Exception("Could not find phrase id for phrase " + sentence);
                }
                // TODO: should we make this an option?  Perhaps we want cases
                // where the trees have the phrase id and not their class
                double score = sentimentScores[phraseId];
                if (score == null)
                {
                    throw new Exception("Could not find sentiment score for phrase id " + phraseId);
                }
                // TODO: make this a numClasses option
                int classLabel = Math.Round((float)Math.Floor(score * (float)numClasses));
                if (classLabel > numClasses - 1)
                {
                    classLabel = numClasses - 1;
                }
                subtrees[i_2].Label().SetValue(int.ToString(classLabel));
            }
            for (int i_3 = 0; i_3 < sentence.Count; ++i_3)
            {
                Tree leaf = subtrees[i_3].Children()[0];
                leaf.Label().SetValue(escaper.EscapeString(leaf.Label().Value()));
            }
            for (int i_4 = 0; i_4 < tregexPatterns.Length; ++i_4)
            {
                root = Edu.Stanford.Nlp.Trees.Tregex.Tsurgeon.Tsurgeon.ProcessPattern(tregexPatterns[i_4], tsurgeonPatterns[i_4], root);
            }
            return(root);
        }