/// <summary>
        /// Shifting is legal as long as the state is not finished and there
        /// are more items on the queue to be shifted.
        /// </summary>
        /// <remarks>
        /// Shifting is legal as long as the state is not finished and there
        /// are more items on the queue to be shifted.
        /// TODO: go through the papers and make sure they don't mention any
        /// other conditions where one shouldn't shift
        /// </remarks>
        public virtual bool IsLegal(State state, IList <ParserConstraint> constraints)
        {
            if (state.finished)
            {
                return(false);
            }
            if (state.tokenPosition >= state.sentence.Count)
            {
                return(false);
            }
            // We disallow shifting when the previous transition was a right
            // head transition to a partial (binarized) state
            // TODO: I don't have an explanation for this, it was just stated
            // in Zhang & Clark 2009
            if (state.stack.Size() > 0)
            {
                Tree top = state.stack.Peek();
                // Temporary node, eg part of a binarized sequence
                if (top.Label().Value().StartsWith("@") && top.Children().Length == 2 && ShiftReduceUtils.GetBinarySide(top) == BinaryTransition.Side.Right)
                {
                    return(false);
                }
            }
            if (constraints == null || state.stack.Size() == 0)
            {
                return(true);
            }
            Tree top_1 = state.stack.Peek();

            // If there are ParserConstraints, you can only shift if shifting
            // will not make a constraint unsolvable.  This happens if we
            // shift beyond the right end of a constraint which is not solved.
            foreach (ParserConstraint constraint in constraints)
            {
                // either went past or haven't gotten to this constraint yet
                if (ShiftReduceUtils.RightIndex(top_1) != constraint.end - 1)
                {
                    continue;
                }
                int left = ShiftReduceUtils.LeftIndex(top_1);
                if (left < constraint.start)
                {
                    continue;
                }
                if (left > constraint.start)
                {
                    return(false);
                }
                if (!ShiftReduceUtils.ConstraintMatchesTreeTop(top_1, constraint))
                {
                    return(false);
                }
            }
            return(true);
        }
Exemple #2
0
        internal virtual State.HeadPosition GetSeparator(int nodeNum)
        {
            if (nodeNum >= stack.Size())
            {
                return(null);
            }
            TreeShapedStack <Tree> stack = this.stack;

            for (int i = 0; i < nodeNum; ++i)
            {
                stack = stack.Pop();
            }
            Tree node = stack.Peek();
            int  head = ShiftReduceUtils.HeadIndex(node);

            if (separators[head] != null)
            {
                return(State.HeadPosition.Head);
            }
            int  left      = ShiftReduceUtils.LeftIndex(node);
            int  nextLeft  = separators.FloorKey(head);
            bool hasLeft   = (nextLeft != null && nextLeft >= left);
            int  right     = ShiftReduceUtils.RightIndex(node);
            int  nextRight = separators.CeilingKey(head);
            bool hasRight  = (nextRight != null && nextRight <= right);

            if (hasLeft && hasRight)
            {
                return(State.HeadPosition.Both);
            }
            else
            {
                if (hasLeft)
                {
                    return(State.HeadPosition.Left);
                }
                else
                {
                    if (hasRight)
                    {
                        return(State.HeadPosition.Right);
                    }
                    else
                    {
                        return(State.HeadPosition.None);
                    }
                }
            }
        }
Exemple #3
0
 public static void FindKnownStates(Tree tree, ICollection <string> knownStates)
 {
     if (tree.IsLeaf() || tree.IsPreTerminal())
     {
         return;
     }
     if (!ShiftReduceUtils.IsTemporary(tree))
     {
         knownStates.Add(tree.Value());
     }
     foreach (Tree child in tree.Children())
     {
         FindKnownStates(child, knownStates);
     }
 }
Exemple #4
0
        internal virtual string GetSeparatorBetween(Tree right, Tree left)
        {
            if (right == null || left == null)
            {
                return(null);
            }
            int leftHead  = ShiftReduceUtils.HeadIndex(left);
            int rightHead = ShiftReduceUtils.HeadIndex(right);
            KeyValuePair <int, string> nextSeparator = separators.CeilingEntry(leftHead);

            if (nextSeparator == null || nextSeparator.Key > rightHead)
            {
                return(null);
            }
            return(Sharpen.Runtime.Substring(nextSeparator.Value, 0, 1));
        }
        internal static Tree GetEnclosingTree(Tree subtree, IDictionary <Tree, Tree> parents, IList <Tree> leaves)
        {
            // TODO: make this more efficient
            int  left  = ShiftReduceUtils.LeftIndex(subtree);
            int  right = ShiftReduceUtils.RightIndex(subtree);
            Tree gold  = leaves[left];

            while (ShiftReduceUtils.RightIndex(gold) < right)
            {
                gold = parents[gold];
            }
            if (gold.IsLeaf())
            {
                gold = parents[gold];
            }
            return(gold);
        }
        public virtual void TestBinarySide()
        {
            string[] words = new string[] { "This", "is", "a", "short", "test", "." };
            string[] tags  = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." };
            NUnit.Framework.Assert.AreEqual(words.Length, tags.Length);
            IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags));
            State           state       = ShiftReduceParser.InitialStateFromTaggedSentence(sentence);
            ShiftTransition shift       = new ShiftTransition();

            state = shift.Apply(shift.Apply(state));
            BinaryTransition transition = new BinaryTransition("NP", BinaryTransition.Side.Right);
            State            next       = transition.Apply(state);

            NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Right, ShiftReduceUtils.GetBinarySide(next.stack.Peek()));
            transition = new BinaryTransition("NP", BinaryTransition.Side.Left);
            next       = transition.Apply(state);
            NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Left, ShiftReduceUtils.GetBinarySide(next.stack.Peek()));
        }
Exemple #7
0
        internal virtual int GetSeparatorCount(Tree right, Tree left)
        {
            if (right == null || left == null)
            {
                return(0);
            }
            int leftHead      = ShiftReduceUtils.HeadIndex(left);
            int rightHead     = ShiftReduceUtils.HeadIndex(right);
            int nextSeparator = separators.HigherKey(leftHead);
            int count         = 0;

            while (nextSeparator != null && nextSeparator < rightHead)
            {
                ++count;
                nextSeparator = separators.HigherKey(nextSeparator);
            }
            return(count);
        }
Exemple #8
0
        public virtual bool IsLegal(State state, IList <ParserConstraint> constraints)
        {
            bool legal = !state.finished && state.tokenPosition >= state.sentence.Count && state.stack.Size() == 1 && rootStates.Contains(state.stack.Peek().Value());

            if (!legal || constraints == null)
            {
                return(legal);
            }
            foreach (ParserConstraint constraint in constraints)
            {
                if (constraint.start != 0 || constraint.end != state.sentence.Count)
                {
                    continue;
                }
                if (!ShiftReduceUtils.ConstraintMatchesTreeTop(state.stack.Peek(), constraint))
                {
                    return(false);
                }
            }
            return(true);
        }
        /// <summary>This option also does not seem to help</summary>
        public virtual void AddEdgeFeatures2(IList <string> features, State state, string nodeName, Tree node)
        {
            if (node == null)
            {
                return;
            }
            int       left       = ShiftReduceUtils.LeftIndex(node);
            int       right      = ShiftReduceUtils.RightIndex(node);
            CoreLabel nodeLabel  = GetCoreLabel(node);
            string    nodeValue  = GetFeatureFromCoreLabel(nodeLabel, FeatureFactory.FeatureComponent.Value) + "-";
            CoreLabel leftLabel  = GetQueueLabel(state, left);
            CoreLabel rightLabel = GetQueueLabel(state, right);

            AddUnaryQueueFeatures(features, leftLabel, nodeName + "EL-" + nodeValue);
            AddUnaryQueueFeatures(features, rightLabel, nodeName + "ER-" + nodeValue);
            CoreLabel previousLabel = GetQueueLabel(state, left - 1);

            AddUnaryQueueFeatures(features, previousLabel, nodeName + "EP-" + nodeValue);
            CoreLabel nextLabel = GetQueueLabel(state, right + 1);

            AddUnaryQueueFeatures(features, nextLabel, nodeName + "EN-" + nodeValue);
        }
        /// <summary>
        /// Could potentially add the tags and words for the left and right
        /// ends of the tree.
        /// </summary>
        /// <remarks>
        /// Could potentially add the tags and words for the left and right
        /// ends of the tree.  Also adds notes about the sizes of the given
        /// tree.  However, it seems somewhat slow and doesn't help accuracy.
        /// </remarks>
        public virtual void AddEdgeFeatures(IList <string> features, State state, string nodeName, string neighborName, Tree node, Tree neighbor)
        {
            if (node == null)
            {
                return;
            }
            int left  = ShiftReduceUtils.LeftIndex(node);
            int right = ShiftReduceUtils.RightIndex(node);

            // Trees of size one are already featurized
            if (right == left)
            {
                features.Add(nodeName + "SZ1");
                return;
            }
            AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left]), nodeName + "EL-");
            AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[right]), nodeName + "ER-");
            if (neighbor != null)
            {
                AddBinaryFeatures(features, nodeName, GetCoreLabel(state.sentence[right]), FeatureFactory.FeatureComponent.Headword, FeatureFactory.FeatureComponent.Headtag, neighborName, GetCoreLabel(neighbor), FeatureFactory.FeatureComponent.Headword, FeatureFactory.FeatureComponent
                                  .Headtag);
            }
            if (right - left == 1)
            {
                features.Add(nodeName + "SZ2");
                return;
            }
            if (right - left == 2)
            {
                features.Add(nodeName + "SZ3");
                AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left + 1]), nodeName + "EM-");
                return;
            }
            features.Add(nodeName + "SZB");
            AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left + 1]), nodeName + "El-");
            AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[right - 1]), nodeName + "Er-");
        }
        /// <summary>Legal as long as there are at least two items on the state's stack.</summary>
        public virtual bool IsLegal(State state, IList <ParserConstraint> constraints)
        {
            // some of these quotes come directly from Zhang Clark 09
            if (state.finished)
            {
                return(false);
            }
            if (state.stack.Size() <= 1)
            {
                return(false);
            }
            // at least one of the two nodes on top of stack must be non-temporary
            if (ShiftReduceUtils.IsTemporary(state.stack.Peek()) && ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek()))
            {
                return(false);
            }
            if (ShiftReduceUtils.IsTemporary(state.stack.Peek()))
            {
                if (side == BinaryTransition.Side.Left)
                {
                    return(false);
                }
                if (!ShiftReduceUtils.IsEquivalentCategory(label, state.stack.Peek().Value()))
                {
                    return(false);
                }
            }
            if (ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek()))
            {
                if (side == BinaryTransition.Side.Right)
                {
                    return(false);
                }
                if (!ShiftReduceUtils.IsEquivalentCategory(label, state.stack.Pop().Peek().Value()))
                {
                    return(false);
                }
            }
            // don't allow binarized labels if it makes the state have a stack
            // of size 1 and a queue of size 0
            if (state.stack.Size() == 2 && IsBinarized() && state.EndOfQueue())
            {
                return(false);
            }
            // when the stack contains only two nodes, temporary resulting
            // nodes from binary reduce must be left-headed
            if (state.stack.Size() == 2 && IsBinarized() && side == BinaryTransition.Side.Right)
            {
                return(false);
            }
            // when the queue is empty and the stack contains more than two
            // nodes, with the third node from the top being temporary, binary
            // reduce can be applied only if the resulting node is non-temporary
            if (state.EndOfQueue() && state.stack.Size() > 2 && ShiftReduceUtils.IsTemporary(state.stack.Pop().Pop().Peek()) && IsBinarized())
            {
                return(false);
            }
            // when the stack contains more than two nodes, with the third
            // node from the top being temporary, temporary resulting nodes
            // from binary reduce must be left-headed
            if (state.stack.Size() > 2 && ShiftReduceUtils.IsTemporary(state.stack.Pop().Pop().Peek()) && IsBinarized() && side == BinaryTransition.Side.Right)
            {
                return(false);
            }
            if (constraints == null)
            {
                return(true);
            }
            Tree top      = state.stack.Peek();
            int  leftTop  = ShiftReduceUtils.LeftIndex(top);
            int  rightTop = ShiftReduceUtils.RightIndex(top);
            Tree next     = state.stack.Pop().Peek();
            int  leftNext = ShiftReduceUtils.LeftIndex(next);

            // The binary transitions are affected by constraints in the
            // following two circumstances.  If a transition would cross the
            // left boundary of a constraint, that is illegal.  If the
            // transition is exactly the right size for the constraint and
            // would make a temporary node, that is also illegal.
            foreach (ParserConstraint constraint in constraints)
            {
                if (leftTop == constraint.start)
                {
                    // can't binary reduce away from a tree which doesn't match a constraint
                    if (rightTop == constraint.end - 1)
                    {
                        if (!ShiftReduceUtils.ConstraintMatchesTreeTop(top, constraint))
                        {
                            return(false);
                        }
                        else
                        {
                            continue;
                        }
                    }
                    else
                    {
                        if (rightTop >= constraint.end)
                        {
                            continue;
                        }
                        else
                        {
                            // can't binary reduce if it would make the tree cross the left boundary
                            return(false);
                        }
                    }
                }
                // top element is further left than the constraint, so
                // there's no harm to be done by binary reduce
                if (leftTop < constraint.start)
                {
                    continue;
                }
                // top element is past the end of the constraint, so it must already be satisfied
                if (leftTop >= constraint.end)
                {
                    continue;
                }
                // now leftTop > constraint.start and < constraint.end, eg inside the constraint
                // the next case is no good because it crosses the boundary
                if (leftNext < constraint.start)
                {
                    return(false);
                }
                if (leftNext > constraint.start)
                {
                    continue;
                }
                // can't transition to a binarized node when there's a constraint that matches.
                if (rightTop == constraint.end - 1 && IsBinarized())
                {
                    return(false);
                }
            }
            return(true);
        }
 internal static bool SpansEqual(Tree subtree, Tree goldSubtree)
 {
     return((ShiftReduceUtils.LeftIndex(subtree) == ShiftReduceUtils.LeftIndex(goldSubtree)) && (ShiftReduceUtils.RightIndex(subtree) == ShiftReduceUtils.RightIndex(goldSubtree)));
 }
        /// <summary>
        /// Returns an attempt at a "gold" transition given the current state
        /// while parsing a known gold tree.
        /// </summary>
        /// <remarks>
        /// Returns an attempt at a "gold" transition given the current state
        /// while parsing a known gold tree.
        /// Tree is passed in by index so the oracle can precompute various
        /// statistics about the tree.
        /// If we already finalized, then the correct transition is to idle.
        /// If the stack is empty, shift is the only possible answer.
        /// If the first item on the stack is a correct span, correctly
        /// labeled, and it has unaries transitions above it, then if we are
        /// not doing compound unaries, the next unary up is the correct
        /// answer.  If we are doing compound unaries, and the state does not
        /// already have a transition, then the correct answer is a compound
        /// unary transition to the top of the unary chain.
        /// If the first item is the entire tree, with no remaining unary
        /// transitions, then we need to finalize.
        /// If the first item is a correct span, with or without a correct
        /// label, and there are no unary transitions to be added, then we
        /// must look at the next parent.  If it has the same left side, then
        /// we return a shift transition.  If it has the same right side,
        /// then we look at the next subtree on the stack (which must exist).
        /// If it is also correct, then the transition is to combine the two
        /// subtrees with the correct label and side.
        /// TODO: suppose the correct label is not either child label and the
        /// children are binarized states?  We should see what the
        /// debinarizer does in that case.  Perhaps a post-processing step
        /// If the previous stack item is too small, then any binary reduce
        /// action is legal, with no gold transition.  TODO: can this be improved?
        /// If the previous stack item is too large, perhaps because of
        /// incorrectly attached PP/SBAR, for example, we still need to
        /// binary reduce.  TODO: is that correct?  TODO: we could look back
        /// further in the stack to find hints at a label that would work
        /// better, for example
        /// If the current item is an incorrect span, then look at the
        /// containing item.  If it has the same left side, shift.  If it has
        /// the same right side, binary reduce (producing an exact span if
        /// possible).  If neither edge is correct, then any of shift or
        /// binary reduce are acceptable, with no gold transition.  TODO: can
        /// this be improved?
        /// </remarks>
        internal virtual OracleTransition GoldTransition(int index, State state)
        {
            if (state.finished)
            {
                return(new OracleTransition(new IdleTransition(), false, false, false));
            }
            if (state.stack.Size() == 0)
            {
                return(new OracleTransition(new ShiftTransition(), false, false, false));
            }
            IDictionary <Tree, Tree> parents = parentMaps[index];
            Tree             gold            = binarizedTrees[index];
            IList <Tree>     leaves          = leafLists[index];
            Tree             S0          = state.stack.Peek();
            Tree             enclosingS0 = GetEnclosingTree(S0, parents, leaves);
            OracleTransition result      = GetUnaryTransition(S0, enclosingS0, parents, compoundUnaries);

            if (result != null)
            {
                return(result);
            }
            // TODO: we could interject that all trees must end with ROOT, for example
            if (state.tokenPosition >= state.sentence.Count && state.stack.Size() == 1)
            {
                return(new OracleTransition(new FinalizeTransition(rootStates), false, false, false));
            }
            if (state.stack.Size() == 1)
            {
                return(new OracleTransition(new ShiftTransition(), false, false, false));
            }
            if (SpansEqual(S0, enclosingS0))
            {
                Tree parent = parents[enclosingS0];
                // cannot be root
                while (SpansEqual(parent, enclosingS0))
                {
                    // in case we had missed unary transitions
                    enclosingS0 = parent;
                    parent      = parents[parent];
                }
                if (parent.Children()[0] == enclosingS0)
                {
                    // S0 is the left child of the correct tree
                    return(new OracleTransition(new ShiftTransition(), false, false, false));
                }
                // was the second (right) child.  there must be something else on the stack...
                Tree S1          = state.stack.Pop().Peek();
                Tree enclosingS1 = GetEnclosingTree(S1, parents, leaves);
                if (SpansEqual(S1, enclosingS1))
                {
                    // the two subtrees should be combined
                    return(new OracleTransition(new BinaryTransition(parent.Value(), ShiftReduceUtils.GetBinarySide(parent)), false, false, false));
                }
                return(new OracleTransition(null, false, true, false));
            }
            if (ShiftReduceUtils.LeftIndex(S0) == ShiftReduceUtils.LeftIndex(enclosingS0))
            {
                return(new OracleTransition(new ShiftTransition(), false, false, false));
            }
            if (ShiftReduceUtils.RightIndex(S0) == ShiftReduceUtils.RightIndex(enclosingS0))
            {
                Tree S1          = state.stack.Pop().Peek();
                Tree enclosingS1 = GetEnclosingTree(S1, parents, leaves);
                if (enclosingS0 == enclosingS1)
                {
                    // BinaryTransition with enclosingS0's label, either side, but preferring LEFT
                    return(new OracleTransition(new BinaryTransition(enclosingS0.Value(), BinaryTransition.Side.Left), false, false, true));
                }
                // S1 is smaller than the next tree S0 is supposed to be part of,
                // so we must have a BinaryTransition
                if (ShiftReduceUtils.LeftIndex(S1) > ShiftReduceUtils.LeftIndex(enclosingS0))
                {
                    return(new OracleTransition(null, false, true, true));
                }
                // S1 is larger than the next tree.  This is the worst case
                return(new OracleTransition(null, true, true, true));
            }
            // S0 doesn't match either endpoint of the enclosing tree
            return(new OracleTransition(null, true, true, true));
        }
Exemple #14
0
        /// <summary>
        /// Legal as long as there is at least one item on the state's stack
        /// and that item has not already been unary transformed.
        /// </summary>
        public virtual bool IsLegal(State state, IList <ParserConstraint> constraints)
        {
            if (state.finished)
            {
                return(false);
            }
            if (state.stack.Size() == 0)
            {
                return(false);
            }
            Tree top = state.stack.Peek();

            if (top.Children().Length == 1 && !top.IsPreTerminal())
            {
                // Disallow unary transitions after we've already had a unary transition
                return(false);
            }
            if (top.Label().Value().Equals(labels[0]))
            {
                // Disallow unary transitions where the final label doesn't change
                return(false);
            }
            // TODO: need to think more about when a unary transition is
            // allowed if the top of the stack is temporary
            if (top.Label().Value().StartsWith("@") && !labels[labels.Length - 1].Equals(Sharpen.Runtime.Substring(top.Label().Value(), 1)))
            {
                // Disallow a transition if the top is a binarized node and the
                // bottom of the unary transition chain isn't the same type
                return(false);
            }
            if (isRoot && (state.stack.Size() > 1 || !state.EndOfQueue()))
            {
                return(false);
            }
            // Now we check the constraints...
            // Constraints only apply to CompoundUnaryTransitions if the tree
            // is exactly the right size and the tree has not already been
            // constructed to match the constraint.  In that case, we check to
            // see if the candidate transition contains the desired label.
            if (constraints == null)
            {
                return(true);
            }
            foreach (ParserConstraint constraint in constraints)
            {
                if (ShiftReduceUtils.LeftIndex(top) != constraint.start || ShiftReduceUtils.RightIndex(top) != constraint.end - 1)
                {
                    continue;
                }
                if (constraint.state.Matcher(top.Value()).Matches())
                {
                    continue;
                }
                bool found = false;
                foreach (string label in labels)
                {
                    if (constraint.state.Matcher(label).Matches())
                    {
                        found = true;
                        break;
                    }
                }
                if (!found)
                {
                    return(false);
                }
            }
            return(true);
        }
Exemple #15
0
        private Pair <int, int> TrainTree(int index, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle)
        {
            int              numCorrect = 0;
            int              numWrong   = 0;
            Tree             tree       = binarizedTrees[index];
            ReorderingOracle reorderer  = null;

            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
            {
                reorderer = new ReorderingOracle(op);
            }
            // TODO.  This training method seems to be working in that it
            // trains models just like the gold and early termination methods do.
            // However, it causes the feature space to go crazy.  Presumably
            // leaving out features with low weights or low frequencies would
            // significantly help with that.  Otherwise, not sure how to keep
            // it under control.
            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Oracle)
            {
                State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                while (!state.IsFinished())
                {
                    IList <string>     features   = featureFactory.Featurize(state);
                    ScoredObject <int> prediction = FindHighestScoringTransition(state, features, true);
                    if (prediction == null)
                    {
                        throw new AssertionError("Did not find a legal transition");
                    }
                    int              predictedNum = prediction.Object();
                    ITransition      predicted    = transitionIndex.Get(predictedNum);
                    OracleTransition gold         = oracle.GoldTransition(index, state);
                    if (gold.IsCorrect(predicted))
                    {
                        numCorrect++;
                        if (gold.transition != null && !gold.transition.Equals(predicted))
                        {
                            int transitionNum = transitionIndex.IndexOf(gold.transition);
                            if (transitionNum < 0)
                            {
                                // TODO: do we want to add unary transitions which are
                                // only possible when the parser has gone off the rails?
                                continue;
                            }
                            updates.Add(new PerceptronModel.Update(features, transitionNum, -1, learningRate));
                        }
                    }
                    else
                    {
                        numWrong++;
                        int transitionNum = -1;
                        if (gold.transition != null)
                        {
                            transitionNum = transitionIndex.IndexOf(gold.transition);
                        }
                        // TODO: this can theoretically result in a -1 gold
                        // transition if the transition exists, but is a
                        // CompoundUnaryTransition which only exists because the
                        // parser is wrong.  Do we want to add those transitions?
                        updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate));
                    }
                    state = predicted.Apply(state);
                }
            }
            else
            {
                if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
                {
                    if (op.TrainOptions().beamSize <= 0)
                    {
                        throw new ArgumentException("Illegal beam size " + op.TrainOptions().beamSize);
                    }
                    IList <ITransition>   transitions = Generics.NewLinkedList(transitionLists[index]);
                    PriorityQueue <State> agenda      = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator);
                    State goldState = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                    agenda.Add(goldState);
                    // int transitionCount = 0;
                    while (transitions.Count > 0)
                    {
                        ITransition           goldTransition = transitions[0];
                        ITransition           highestScoringTransitionFromGoldState = null;
                        double                highestScoreFromGoldState             = 0.0;
                        PriorityQueue <State> newAgenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator);
                        State highestScoringState       = null;
                        State highestCurrentState       = null;
                        foreach (State currentState in agenda)
                        {
                            bool           isGoldState = (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && goldState.AreTransitionsEqual(currentState));
                            IList <string> features    = featureFactory.Featurize(currentState);
                            ICollection <ScoredObject <int> > stateTransitions = FindHighestScoringTransitions(currentState, features, true, op.TrainOptions().beamSize, null);
                            foreach (ScoredObject <int> transition in stateTransitions)
                            {
                                State newState = transitionIndex.Get(transition.Object()).Apply(currentState, transition.Score());
                                newAgenda.Add(newState);
                                if (newAgenda.Count > op.TrainOptions().beamSize)
                                {
                                    newAgenda.Poll();
                                }
                                if (highestScoringState == null || highestScoringState.Score() < newState.Score())
                                {
                                    highestScoringState = newState;
                                    highestCurrentState = currentState;
                                }
                                if (isGoldState && (highestScoringTransitionFromGoldState == null || transition.Score() > highestScoreFromGoldState))
                                {
                                    highestScoringTransitionFromGoldState = transitionIndex.Get(transition.Object());
                                    highestScoreFromGoldState             = transition.Score();
                                }
                            }
                        }
                        // This can happen if the REORDER_BEAM method backs itself
                        // into a corner, such as transitioning to something that
                        // can't have a FinalizeTransition applied.  This doesn't
                        // happen for the BEAM method because in that case the correct
                        // state (eg one with ROOT) isn't on the agenda so it stops.
                        if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && highestScoringTransitionFromGoldState == null)
                        {
                            break;
                        }
                        State newGoldState = goldTransition.Apply(goldState, 0.0);
                        // if highest scoring state used the correct transition, no training
                        // otherwise, down the last transition, up the correct
                        if (!newGoldState.AreTransitionsEqual(highestScoringState))
                        {
                            ++numWrong;
                            IList <string> goldFeatures   = featureFactory.Featurize(goldState);
                            int            lastTransition = transitionIndex.IndexOf(highestScoringState.transitions.Peek());
                            updates.Add(new PerceptronModel.Update(featureFactory.Featurize(highestCurrentState), -1, lastTransition, learningRate));
                            updates.Add(new PerceptronModel.Update(goldFeatures, transitionIndex.IndexOf(goldTransition), -1, learningRate));
                            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam)
                            {
                                // If the correct state has fallen off the agenda, break
                                if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                {
                                    break;
                                }
                                else
                                {
                                    transitions.Remove(0);
                                }
                            }
                            else
                            {
                                if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
                                {
                                    if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                    {
                                        if (!reorderer.Reorder(goldState, highestScoringTransitionFromGoldState, transitions))
                                        {
                                            break;
                                        }
                                        newGoldState = highestScoringTransitionFromGoldState.Apply(goldState);
                                        if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                        {
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        transitions.Remove(0);
                                    }
                                }
                            }
                        }
                        else
                        {
                            ++numCorrect;
                            transitions.Remove(0);
                        }
                        goldState = newGoldState;
                        agenda    = newAgenda;
                    }
                }
                else
                {
                    if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.EarlyTermination || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod
                        .Gold)
                    {
                        State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                        IList <ITransition> transitions = transitionLists[index];
                        transitions = Generics.NewLinkedList(transitions);
                        bool keepGoing = true;
                        while (transitions.Count > 0 && keepGoing)
                        {
                            ITransition    transition    = transitions[0];
                            int            transitionNum = transitionIndex.IndexOf(transition);
                            IList <string> features      = featureFactory.Featurize(state);
                            int            predictedNum  = FindHighestScoringTransition(state, features, false).Object();
                            ITransition    predicted     = transitionIndex.Get(predictedNum);
                            if (transitionNum == predictedNum)
                            {
                                transitions.Remove(0);
                                state = transition.Apply(state);
                                numCorrect++;
                            }
                            else
                            {
                                numWrong++;
                                // TODO: allow weighted features, weighted training, etc
                                updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate));
                                switch (op.TrainOptions().trainingMethod)
                                {
                                case ShiftReduceTrainOptions.TrainingMethod.EarlyTermination:
                                {
                                    keepGoing = false;
                                    break;
                                }

                                case ShiftReduceTrainOptions.TrainingMethod.Gold:
                                {
                                    transitions.Remove(0);
                                    state = transition.Apply(state);
                                    break;
                                }

                                case ShiftReduceTrainOptions.TrainingMethod.ReorderOracle:
                                {
                                    keepGoing = reorderer.Reorder(state, predicted, transitions);
                                    if (keepGoing)
                                    {
                                        state = predicted.Apply(state);
                                    }
                                    break;
                                }

                                default:
                                {
                                    throw new ArgumentException("Unexpected method " + op.TrainOptions().trainingMethod);
                                }
                                }
                            }
                        }
                    }
                }
            }
            return(Pair.MakePair(numCorrect, numWrong));
        }
Exemple #16
0
 /// <summary>
 /// Returns a transition which might not even be part of the model,
 /// but will hopefully allow progress in an otherwise stuck parse
 /// TODO: perhaps we want to create an EmergencyTransition class
 /// which indicates that something has gone wrong
 /// </summary>
 public virtual ITransition FindEmergencyTransition(State state, IList <ParserConstraint> constraints)
 {
     if (state.stack.Size() == 0)
     {
         return(null);
     }
     // See if there is a constraint whose boundaries match the end
     // points of the top node on the stack.  If so, we can apply a
     // UnaryTransition / CompoundUnaryTransition if that would solve
     // the constraint
     if (constraints != null)
     {
         Tree top = state.stack.Peek();
         foreach (ParserConstraint constraint in constraints)
         {
             if (ShiftReduceUtils.LeftIndex(top) != constraint.start || ShiftReduceUtils.RightIndex(top) != constraint.end - 1)
             {
                 continue;
             }
             if (ShiftReduceUtils.ConstraintMatchesTreeTop(top, constraint))
             {
                 continue;
             }
             // found an unmatched constraint that can be fixed with a unary transition
             // now we need to find a matching state for the transition
             foreach (string label in knownStates)
             {
                 if (constraint.state.Matcher(label).Matches())
                 {
                     return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(label), false) : new UnaryTransition(label, false));
                 }
             }
         }
     }
     if (ShiftReduceUtils.IsTemporary(state.stack.Peek()) && (state.stack.Size() == 1 || ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek())))
     {
         return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1)), false) : new UnaryTransition(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1),
                                                                                                                                                                                               false));
     }
     if (state.stack.Size() == 1 && state.tokenPosition >= state.sentence.Count)
     {
         // either need to finalize or transition to a root state
         if (!rootStates.Contains(state.stack.Peek().Value()))
         {
             string root = rootStates.GetEnumerator().Current;
             return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(root), false) : new UnaryTransition(root, false));
         }
     }
     if (state.stack.Size() == 1)
     {
         return(null);
     }
     if (ShiftReduceUtils.IsTemporary(state.stack.Peek()))
     {
         return(new BinaryTransition(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1), BinaryTransition.Side.Right));
     }
     if (ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek()))
     {
         return(new BinaryTransition(Sharpen.Runtime.Substring(state.stack.Pop().Peek().Value(), 1), BinaryTransition.Side.Left));
     }
     return(null);
 }