Esempio n. 1
0
        public virtual void TestRightTransition()
        {
            State            state      = BuildState(2);
            BinaryTransition transition = new BinaryTransition("NP", BinaryTransition.Side.Right);

            state = transition.Apply(state);
            NUnit.Framework.Assert.AreEqual(2, state.tokenPosition);
            NUnit.Framework.Assert.AreEqual(1, state.stack.Size());
            NUnit.Framework.Assert.AreEqual(2, state.stack.Peek().Children().Length);
            NUnit.Framework.Assert.AreEqual("NP", state.stack.Peek().Value());
            CheckHeads(state.stack.Peek(), state.stack.Peek().Children()[1]);
        }
        public virtual void TestBinarySide()
        {
            string[] words = new string[] { "This", "is", "a", "short", "test", "." };
            string[] tags  = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." };
            NUnit.Framework.Assert.AreEqual(words.Length, tags.Length);
            IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags));
            State           state       = ShiftReduceParser.InitialStateFromTaggedSentence(sentence);
            ShiftTransition shift       = new ShiftTransition();

            state = shift.Apply(shift.Apply(state));
            BinaryTransition transition = new BinaryTransition("NP", BinaryTransition.Side.Right);
            State            next       = transition.Apply(state);

            NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Right, ShiftReduceUtils.GetBinarySide(next.stack.Peek()));
            transition = new BinaryTransition("NP", BinaryTransition.Side.Left);
            next       = transition.Apply(state);
            NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Left, ShiftReduceUtils.GetBinarySide(next.stack.Peek()));
        }
        /// <summary>
        /// Given a predicted transition and a state, this method rearranges
        /// the list of transitions and returns whether or not training can
        /// continue.
        /// </summary>
        internal virtual bool Reorder(State state, ITransition chosenTransition, IList <ITransition> transitions)
        {
            if (transitions.Count == 0)
            {
                throw new AssertionError();
            }
            ITransition goldTransition = transitions[0];

            // If the transition is gold, we are already satisfied.
            if (chosenTransition.Equals(goldTransition))
            {
                transitions.Remove(0);
                return(true);
            }
            // If the transition should have been a Unary/CompoundUnary
            // transition and it was something else or a different Unary
            // transition, see if the transition sequence can be continued
            // after skipping past the unary
            if ((goldTransition is UnaryTransition) || (goldTransition is CompoundUnaryTransition))
            {
                transitions.Remove(0);
                return(Reorder(state, chosenTransition, transitions));
            }
            // If the chosen transition was an incorrect Unary/CompoundUnary
            // transition, skip past it and hope to continue the gold
            // transition sequence.  However, if we have Unary/CompoundUnary
            // in a row, we have to return false to prevent loops.
            // Also, if the state stack size is 0, can't keep going
            if ((chosenTransition is UnaryTransition) || (chosenTransition is CompoundUnaryTransition))
            {
                if (state.transitions.Size() > 0)
                {
                    ITransition previous = state.transitions.Peek();
                    if ((previous is UnaryTransition) || (previous is CompoundUnaryTransition))
                    {
                        return(false);
                    }
                }
                if (state.stack.Size() == 0)
                {
                    return(false);
                }
                return(true);
            }
            if (chosenTransition is BinaryTransition)
            {
                if (state.stack.Size() < 2)
                {
                    return(false);
                }
                if (goldTransition is ShiftTransition)
                {
                    // Helps, but adds quite a bit of size to the model and only helps a tiny bit
                    return(op.TrainOptions().oracleBinaryToShift&& ReorderIncorrectBinaryTransition(transitions));
                }
                if (!(goldTransition is BinaryTransition))
                {
                    return(false);
                }
                BinaryTransition chosenBinary = (BinaryTransition)chosenTransition;
                BinaryTransition goldBinary   = (BinaryTransition)goldTransition;
                if (chosenBinary.IsBinarized())
                {
                    // Binarized labels only work (for now, at least) if the side
                    // is wrong but the label itself is correct
                    if (goldBinary.IsBinarized() && chosenBinary.label.Equals(goldBinary.label))
                    {
                        transitions.Remove(0);
                        return(true);
                    }
                    else
                    {
                        return(false);
                    }
                }
                // In all other binarized situations, essentially what has
                // happened is we added a bracket error, but future brackets can
                // still wind up being correct
                transitions.Remove(0);
                return(true);
            }
            if ((chosenTransition is ShiftTransition) && (goldTransition is BinaryTransition))
            {
                // can't shift at the end of the queue
                if (state.EndOfQueue())
                {
                    return(false);
                }
                // doesn't help, sadly
                BinaryTransition goldBinary = (BinaryTransition)goldTransition;
                if (!goldBinary.IsBinarized())
                {
                    return(op.TrainOptions().oracleShiftToBinary&& ReorderIncorrectShiftTransition(transitions));
                }
            }
            return(false);
        }
        /// <summary>
        /// In this case, we are starting to build a new subtree when instead
        /// we should have been combining existing trees.
        /// </summary>
        /// <remarks>
        /// In this case, we are starting to build a new subtree when instead
        /// we should have been combining existing trees.  What we can do is
        /// find the transitions that build up the next subtree in the gold
        /// transition list, figure out how it gets applied to a
        /// BinaryTransition, and make that the next BinaryTransition we
        /// perform after finishing the subtree.  If there are multiple
        /// BinaryTransitions in a row, we ignore any associated
        /// UnaryTransitions (unfixable) and try to transition to the final
        /// state.  The assumption is that we can't do anything about the
        /// incorrect subtrees any more, so we skip them all.
        /// <br />
        /// Sadly, this does not seem to help - the parser gets worse when it
        /// learns these states
        /// </remarks>
        internal static bool ReorderIncorrectShiftTransition(IList <ITransition> transitions)
        {
            IList <BinaryTransition> leftoverBinary = Generics.NewArrayList();

            while (transitions.Count > 0)
            {
                ITransition head = transitions.Remove(0);
                if (head is ShiftTransition)
                {
                    break;
                }
                if (head is BinaryTransition)
                {
                    leftoverBinary.Add((BinaryTransition)head);
                }
            }
            if (transitions.Count == 0 || leftoverBinary.Count == 0)
            {
                // honestly this is an error we should probably just throw
                return(false);
            }
            int shiftCount = 0;
            IListIterator <ITransition> cursor     = transitions.ListIterator();
            BinaryTransition            lastBinary = null;

            while (cursor.MoveNext() && shiftCount >= 0)
            {
                ITransition next = cursor.Current;
                if (next is ShiftTransition)
                {
                    ++shiftCount;
                }
                else
                {
                    if (next is BinaryTransition)
                    {
                        --shiftCount;
                        if (shiftCount < 0)
                        {
                            lastBinary = (BinaryTransition)next;
                            cursor.Remove();
                        }
                    }
                }
            }
            if (!cursor.MoveNext() || lastBinary == null)
            {
                // once again, an error.  even if the sequence of tree altering
                // gold transitions ends with a BinaryTransition, there should
                // be a FinalizeTransition after that
                return(false);
            }
            string label = lastBinary.label;

            if (lastBinary.IsBinarized())
            {
                label = Sharpen.Runtime.Substring(label, 1);
            }
            if (lastBinary.side == BinaryTransition.Side.Right)
            {
                // When we finally transition all the binary transitions, we
                // will want to have the new node be the right head.  Therefore,
                // we add a bunch of temporary binary transitions with a right
                // head, ending up with a binary transition with a right head
                for (int i = 0; i < leftoverBinary.Count; ++i)
                {
                    cursor.Add(new BinaryTransition("@" + label, BinaryTransition.Side.Right));
                }
                // use lastBinary.label in case the last transition is temporary
                cursor.Add(new BinaryTransition(lastBinary.label, BinaryTransition.Side.Right));
            }
            else
            {
                cursor.Add(new BinaryTransition("@" + label, BinaryTransition.Side.Left));
                for (int i = 0; i < leftoverBinary.Count - 1; ++i)
                {
                    cursor.Add(new BinaryTransition("@" + label, leftoverBinary[i].side));
                }
                cursor.Add(new BinaryTransition(lastBinary.label, leftoverBinary[leftoverBinary.Count - 1].side));
            }
            return(true);
        }