public virtual void TestReorderIncorrectShiftResultingTree() { for (int testcase = 0; testcase < correctTrees.Length; ++testcase) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(correctTrees[testcase]); IList <ITransition> gold = CreateTransitionSequence.CreateTransitionSequence(binarizedTrees[testcase]); // System.err.println(correctTrees[testcase]); // System.err.println(gold); int tnum = 0; for (; tnum < gold.Count; ++tnum) { if (gold[tnum] is BinaryTransition) { break; } state = gold[tnum].Apply(state); } state = shift.Apply(state); IList <ITransition> reordered = Generics.NewLinkedList(gold.SubList(tnum, gold.Count)); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(reordered)); // System.err.println(reordered); foreach (ITransition transition in reordered) { state = transition.Apply(state); } Tree debinarized = debinarizer.TransformTree(state.stack.Peek()); // System.err.println(debinarized); NUnit.Framework.Assert.AreEqual(incorrectShiftTrees[testcase].ToString(), debinarized.ToString()); } }
public virtual void TestReorderIncorrectShift() { IList <ITransition> transitions = BuildTransitionList(rightNP, shift, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(rightNP, shift, shift, leftNP, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, leftNP, tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(rightNP, shift, unaryADVP, shift, leftNP, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(unaryADVP, shift, leftNP, tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(rightNP, shift, shift, unaryADVP, leftNP, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(leftNP, shift, shift, unaryADVP, leftNP, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(leftNP, shift, shift, unaryADVP, leftNP, leftVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempLeftVP, leftVP, finalize), transitions); transitions = BuildTransitionList(rightNP, shift, shift, unaryADVP, leftNP, leftVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempLeftVP, rightVP, finalize), transitions); transitions = BuildTransitionList(leftNP, leftNP, shift, shift, unaryADVP, leftNP, rightVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempRightVP, tempRightVP, rightVP, finalize), transitions); transitions = BuildTransitionList(leftNP, rightNP, shift, shift, unaryADVP, leftNP, leftVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempLeftVP, tempLeftVP, rightVP, finalize), transitions); transitions = BuildTransitionList(leftNP, leftNP, shift, shift, unaryADVP, leftNP, leftVP, finalize); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(transitions)); NUnit.Framework.Assert.AreEqual(BuildTransitionList(shift, unaryADVP, leftNP, tempLeftVP, tempLeftVP, leftVP, finalize), transitions); }
private Pair <int, int> TrainTree(int index, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle) { int numCorrect = 0; int numWrong = 0; Tree tree = binarizedTrees[index]; ReorderingOracle reorderer = null; if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { reorderer = new ReorderingOracle(op); } // TODO. This training method seems to be working in that it // trains models just like the gold and early termination methods do. // However, it causes the feature space to go crazy. Presumably // leaving out features with low weights or low frequencies would // significantly help with that. Otherwise, not sure how to keep // it under control. if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Oracle) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); while (!state.IsFinished()) { IList <string> features = featureFactory.Featurize(state); ScoredObject <int> prediction = FindHighestScoringTransition(state, features, true); if (prediction == null) { throw new AssertionError("Did not find a legal transition"); } int predictedNum = prediction.Object(); ITransition predicted = transitionIndex.Get(predictedNum); OracleTransition gold = oracle.GoldTransition(index, state); if (gold.IsCorrect(predicted)) { numCorrect++; if (gold.transition != null && !gold.transition.Equals(predicted)) { int transitionNum = transitionIndex.IndexOf(gold.transition); if (transitionNum < 0) { // TODO: do we want to add unary transitions which are // only possible when the parser has gone off the rails? continue; } updates.Add(new PerceptronModel.Update(features, transitionNum, -1, learningRate)); } } else { numWrong++; int transitionNum = -1; if (gold.transition != null) { transitionNum = transitionIndex.IndexOf(gold.transition); } // TODO: this can theoretically result in a -1 gold // transition if the transition exists, but is a // CompoundUnaryTransition which only exists because the // parser is wrong. Do we want to add those transitions? updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate)); } state = predicted.Apply(state); } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { if (op.TrainOptions().beamSize <= 0) { throw new ArgumentException("Illegal beam size " + op.TrainOptions().beamSize); } IList <ITransition> transitions = Generics.NewLinkedList(transitionLists[index]); PriorityQueue <State> agenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator); State goldState = ShiftReduceParser.InitialStateFromGoldTagTree(tree); agenda.Add(goldState); // int transitionCount = 0; while (transitions.Count > 0) { ITransition goldTransition = transitions[0]; ITransition highestScoringTransitionFromGoldState = null; double highestScoreFromGoldState = 0.0; PriorityQueue <State> newAgenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator); State highestScoringState = null; State highestCurrentState = null; foreach (State currentState in agenda) { bool isGoldState = (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && goldState.AreTransitionsEqual(currentState)); IList <string> features = featureFactory.Featurize(currentState); ICollection <ScoredObject <int> > stateTransitions = FindHighestScoringTransitions(currentState, features, true, op.TrainOptions().beamSize, null); foreach (ScoredObject <int> transition in stateTransitions) { State newState = transitionIndex.Get(transition.Object()).Apply(currentState, transition.Score()); newAgenda.Add(newState); if (newAgenda.Count > op.TrainOptions().beamSize) { newAgenda.Poll(); } if (highestScoringState == null || highestScoringState.Score() < newState.Score()) { highestScoringState = newState; highestCurrentState = currentState; } if (isGoldState && (highestScoringTransitionFromGoldState == null || transition.Score() > highestScoreFromGoldState)) { highestScoringTransitionFromGoldState = transitionIndex.Get(transition.Object()); highestScoreFromGoldState = transition.Score(); } } } // This can happen if the REORDER_BEAM method backs itself // into a corner, such as transitioning to something that // can't have a FinalizeTransition applied. This doesn't // happen for the BEAM method because in that case the correct // state (eg one with ROOT) isn't on the agenda so it stops. if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && highestScoringTransitionFromGoldState == null) { break; } State newGoldState = goldTransition.Apply(goldState, 0.0); // if highest scoring state used the correct transition, no training // otherwise, down the last transition, up the correct if (!newGoldState.AreTransitionsEqual(highestScoringState)) { ++numWrong; IList <string> goldFeatures = featureFactory.Featurize(goldState); int lastTransition = transitionIndex.IndexOf(highestScoringState.transitions.Peek()); updates.Add(new PerceptronModel.Update(featureFactory.Featurize(highestCurrentState), -1, lastTransition, learningRate)); updates.Add(new PerceptronModel.Update(goldFeatures, transitionIndex.IndexOf(goldTransition), -1, learningRate)); if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam) { // If the correct state has fallen off the agenda, break if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { break; } else { transitions.Remove(0); } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { if (!reorderer.Reorder(goldState, highestScoringTransitionFromGoldState, transitions)) { break; } newGoldState = highestScoringTransitionFromGoldState.Apply(goldState); if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { break; } } else { transitions.Remove(0); } } } } else { ++numCorrect; transitions.Remove(0); } goldState = newGoldState; agenda = newAgenda; } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.EarlyTermination || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod .Gold) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); IList <ITransition> transitions = transitionLists[index]; transitions = Generics.NewLinkedList(transitions); bool keepGoing = true; while (transitions.Count > 0 && keepGoing) { ITransition transition = transitions[0]; int transitionNum = transitionIndex.IndexOf(transition); IList <string> features = featureFactory.Featurize(state); int predictedNum = FindHighestScoringTransition(state, features, false).Object(); ITransition predicted = transitionIndex.Get(predictedNum); if (transitionNum == predictedNum) { transitions.Remove(0); state = transition.Apply(state); numCorrect++; } else { numWrong++; // TODO: allow weighted features, weighted training, etc updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate)); switch (op.TrainOptions().trainingMethod) { case ShiftReduceTrainOptions.TrainingMethod.EarlyTermination: { keepGoing = false; break; } case ShiftReduceTrainOptions.TrainingMethod.Gold: { transitions.Remove(0); state = transition.Apply(state); break; } case ShiftReduceTrainOptions.TrainingMethod.ReorderOracle: { keepGoing = reorderer.Reorder(state, predicted, transitions); if (keepGoing) { state = predicted.Apply(state); } break; } default: { throw new ArgumentException("Unexpected method " + op.TrainOptions().trainingMethod); } } } } } } } return(Pair.MakePair(numCorrect, numWrong)); }