예제 #1
0
        public virtual Tree GetBestParse <_T0>(IList <_T0> sentence)
            where _T0 : IHasWord
        {
            ScoredObject <Tree> scoredParse = GetBestScoredParse(sentence);

            return((scoredParse != null) ? scoredParse.Object() : null);
        }
예제 #2
0
        private Pair <int, int> TrainTree(int index, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle)
        {
            int              numCorrect = 0;
            int              numWrong   = 0;
            Tree             tree       = binarizedTrees[index];
            ReorderingOracle reorderer  = null;

            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
            {
                reorderer = new ReorderingOracle(op);
            }
            // TODO.  This training method seems to be working in that it
            // trains models just like the gold and early termination methods do.
            // However, it causes the feature space to go crazy.  Presumably
            // leaving out features with low weights or low frequencies would
            // significantly help with that.  Otherwise, not sure how to keep
            // it under control.
            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Oracle)
            {
                State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                while (!state.IsFinished())
                {
                    IList <string>     features   = featureFactory.Featurize(state);
                    ScoredObject <int> prediction = FindHighestScoringTransition(state, features, true);
                    if (prediction == null)
                    {
                        throw new AssertionError("Did not find a legal transition");
                    }
                    int              predictedNum = prediction.Object();
                    ITransition      predicted    = transitionIndex.Get(predictedNum);
                    OracleTransition gold         = oracle.GoldTransition(index, state);
                    if (gold.IsCorrect(predicted))
                    {
                        numCorrect++;
                        if (gold.transition != null && !gold.transition.Equals(predicted))
                        {
                            int transitionNum = transitionIndex.IndexOf(gold.transition);
                            if (transitionNum < 0)
                            {
                                // TODO: do we want to add unary transitions which are
                                // only possible when the parser has gone off the rails?
                                continue;
                            }
                            updates.Add(new PerceptronModel.Update(features, transitionNum, -1, learningRate));
                        }
                    }
                    else
                    {
                        numWrong++;
                        int transitionNum = -1;
                        if (gold.transition != null)
                        {
                            transitionNum = transitionIndex.IndexOf(gold.transition);
                        }
                        // TODO: this can theoretically result in a -1 gold
                        // transition if the transition exists, but is a
                        // CompoundUnaryTransition which only exists because the
                        // parser is wrong.  Do we want to add those transitions?
                        updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate));
                    }
                    state = predicted.Apply(state);
                }
            }
            else
            {
                if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
                {
                    if (op.TrainOptions().beamSize <= 0)
                    {
                        throw new ArgumentException("Illegal beam size " + op.TrainOptions().beamSize);
                    }
                    IList <ITransition>   transitions = Generics.NewLinkedList(transitionLists[index]);
                    PriorityQueue <State> agenda      = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator);
                    State goldState = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                    agenda.Add(goldState);
                    // int transitionCount = 0;
                    while (transitions.Count > 0)
                    {
                        ITransition           goldTransition = transitions[0];
                        ITransition           highestScoringTransitionFromGoldState = null;
                        double                highestScoreFromGoldState             = 0.0;
                        PriorityQueue <State> newAgenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator);
                        State highestScoringState       = null;
                        State highestCurrentState       = null;
                        foreach (State currentState in agenda)
                        {
                            bool           isGoldState = (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && goldState.AreTransitionsEqual(currentState));
                            IList <string> features    = featureFactory.Featurize(currentState);
                            ICollection <ScoredObject <int> > stateTransitions = FindHighestScoringTransitions(currentState, features, true, op.TrainOptions().beamSize, null);
                            foreach (ScoredObject <int> transition in stateTransitions)
                            {
                                State newState = transitionIndex.Get(transition.Object()).Apply(currentState, transition.Score());
                                newAgenda.Add(newState);
                                if (newAgenda.Count > op.TrainOptions().beamSize)
                                {
                                    newAgenda.Poll();
                                }
                                if (highestScoringState == null || highestScoringState.Score() < newState.Score())
                                {
                                    highestScoringState = newState;
                                    highestCurrentState = currentState;
                                }
                                if (isGoldState && (highestScoringTransitionFromGoldState == null || transition.Score() > highestScoreFromGoldState))
                                {
                                    highestScoringTransitionFromGoldState = transitionIndex.Get(transition.Object());
                                    highestScoreFromGoldState             = transition.Score();
                                }
                            }
                        }
                        // This can happen if the REORDER_BEAM method backs itself
                        // into a corner, such as transitioning to something that
                        // can't have a FinalizeTransition applied.  This doesn't
                        // happen for the BEAM method because in that case the correct
                        // state (eg one with ROOT) isn't on the agenda so it stops.
                        if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && highestScoringTransitionFromGoldState == null)
                        {
                            break;
                        }
                        State newGoldState = goldTransition.Apply(goldState, 0.0);
                        // if highest scoring state used the correct transition, no training
                        // otherwise, down the last transition, up the correct
                        if (!newGoldState.AreTransitionsEqual(highestScoringState))
                        {
                            ++numWrong;
                            IList <string> goldFeatures   = featureFactory.Featurize(goldState);
                            int            lastTransition = transitionIndex.IndexOf(highestScoringState.transitions.Peek());
                            updates.Add(new PerceptronModel.Update(featureFactory.Featurize(highestCurrentState), -1, lastTransition, learningRate));
                            updates.Add(new PerceptronModel.Update(goldFeatures, transitionIndex.IndexOf(goldTransition), -1, learningRate));
                            if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam)
                            {
                                // If the correct state has fallen off the agenda, break
                                if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                {
                                    break;
                                }
                                else
                                {
                                    transitions.Remove(0);
                                }
                            }
                            else
                            {
                                if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam)
                                {
                                    if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                    {
                                        if (!reorderer.Reorder(goldState, highestScoringTransitionFromGoldState, transitions))
                                        {
                                            break;
                                        }
                                        newGoldState = highestScoringTransitionFromGoldState.Apply(goldState);
                                        if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState))
                                        {
                                            break;
                                        }
                                    }
                                    else
                                    {
                                        transitions.Remove(0);
                                    }
                                }
                            }
                        }
                        else
                        {
                            ++numCorrect;
                            transitions.Remove(0);
                        }
                        goldState = newGoldState;
                        agenda    = newAgenda;
                    }
                }
                else
                {
                    if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.EarlyTermination || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod
                        .Gold)
                    {
                        State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
                        IList <ITransition> transitions = transitionLists[index];
                        transitions = Generics.NewLinkedList(transitions);
                        bool keepGoing = true;
                        while (transitions.Count > 0 && keepGoing)
                        {
                            ITransition    transition    = transitions[0];
                            int            transitionNum = transitionIndex.IndexOf(transition);
                            IList <string> features      = featureFactory.Featurize(state);
                            int            predictedNum  = FindHighestScoringTransition(state, features, false).Object();
                            ITransition    predicted     = transitionIndex.Get(predictedNum);
                            if (transitionNum == predictedNum)
                            {
                                transitions.Remove(0);
                                state = transition.Apply(state);
                                numCorrect++;
                            }
                            else
                            {
                                numWrong++;
                                // TODO: allow weighted features, weighted training, etc
                                updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate));
                                switch (op.TrainOptions().trainingMethod)
                                {
                                case ShiftReduceTrainOptions.TrainingMethod.EarlyTermination:
                                {
                                    keepGoing = false;
                                    break;
                                }

                                case ShiftReduceTrainOptions.TrainingMethod.Gold:
                                {
                                    transitions.Remove(0);
                                    state = transition.Apply(state);
                                    break;
                                }

                                case ShiftReduceTrainOptions.TrainingMethod.ReorderOracle:
                                {
                                    keepGoing = reorderer.Reorder(state, predicted, transitions);
                                    if (keepGoing)
                                    {
                                        state = predicted.Apply(state);
                                    }
                                    break;
                                }

                                default:
                                {
                                    throw new ArgumentException("Unexpected method " + op.TrainOptions().trainingMethod);
                                }
                                }
                            }
                        }
                    }
                }
            }
            return(Pair.MakePair(numCorrect, numWrong));
        }