/// <summary> /// Shifting is legal as long as the state is not finished and there /// are more items on the queue to be shifted. /// </summary> /// <remarks> /// Shifting is legal as long as the state is not finished and there /// are more items on the queue to be shifted. /// TODO: go through the papers and make sure they don't mention any /// other conditions where one shouldn't shift /// </remarks> public virtual bool IsLegal(State state, IList <ParserConstraint> constraints) { if (state.finished) { return(false); } if (state.tokenPosition >= state.sentence.Count) { return(false); } // We disallow shifting when the previous transition was a right // head transition to a partial (binarized) state // TODO: I don't have an explanation for this, it was just stated // in Zhang & Clark 2009 if (state.stack.Size() > 0) { Tree top = state.stack.Peek(); // Temporary node, eg part of a binarized sequence if (top.Label().Value().StartsWith("@") && top.Children().Length == 2 && ShiftReduceUtils.GetBinarySide(top) == BinaryTransition.Side.Right) { return(false); } } if (constraints == null || state.stack.Size() == 0) { return(true); } Tree top_1 = state.stack.Peek(); // If there are ParserConstraints, you can only shift if shifting // will not make a constraint unsolvable. This happens if we // shift beyond the right end of a constraint which is not solved. foreach (ParserConstraint constraint in constraints) { // either went past or haven't gotten to this constraint yet if (ShiftReduceUtils.RightIndex(top_1) != constraint.end - 1) { continue; } int left = ShiftReduceUtils.LeftIndex(top_1); if (left < constraint.start) { continue; } if (left > constraint.start) { return(false); } if (!ShiftReduceUtils.ConstraintMatchesTreeTop(top_1, constraint)) { return(false); } } return(true); }
internal virtual State.HeadPosition GetSeparator(int nodeNum) { if (nodeNum >= stack.Size()) { return(null); } TreeShapedStack <Tree> stack = this.stack; for (int i = 0; i < nodeNum; ++i) { stack = stack.Pop(); } Tree node = stack.Peek(); int head = ShiftReduceUtils.HeadIndex(node); if (separators[head] != null) { return(State.HeadPosition.Head); } int left = ShiftReduceUtils.LeftIndex(node); int nextLeft = separators.FloorKey(head); bool hasLeft = (nextLeft != null && nextLeft >= left); int right = ShiftReduceUtils.RightIndex(node); int nextRight = separators.CeilingKey(head); bool hasRight = (nextRight != null && nextRight <= right); if (hasLeft && hasRight) { return(State.HeadPosition.Both); } else { if (hasLeft) { return(State.HeadPosition.Left); } else { if (hasRight) { return(State.HeadPosition.Right); } else { return(State.HeadPosition.None); } } } }
public static void FindKnownStates(Tree tree, ICollection <string> knownStates) { if (tree.IsLeaf() || tree.IsPreTerminal()) { return; } if (!ShiftReduceUtils.IsTemporary(tree)) { knownStates.Add(tree.Value()); } foreach (Tree child in tree.Children()) { FindKnownStates(child, knownStates); } }
internal virtual string GetSeparatorBetween(Tree right, Tree left) { if (right == null || left == null) { return(null); } int leftHead = ShiftReduceUtils.HeadIndex(left); int rightHead = ShiftReduceUtils.HeadIndex(right); KeyValuePair <int, string> nextSeparator = separators.CeilingEntry(leftHead); if (nextSeparator == null || nextSeparator.Key > rightHead) { return(null); } return(Sharpen.Runtime.Substring(nextSeparator.Value, 0, 1)); }
internal static Tree GetEnclosingTree(Tree subtree, IDictionary <Tree, Tree> parents, IList <Tree> leaves) { // TODO: make this more efficient int left = ShiftReduceUtils.LeftIndex(subtree); int right = ShiftReduceUtils.RightIndex(subtree); Tree gold = leaves[left]; while (ShiftReduceUtils.RightIndex(gold) < right) { gold = parents[gold]; } if (gold.IsLeaf()) { gold = parents[gold]; } return(gold); }
public virtual void TestBinarySide() { string[] words = new string[] { "This", "is", "a", "short", "test", "." }; string[] tags = new string[] { "DT", "VBZ", "DT", "JJ", "NN", "." }; NUnit.Framework.Assert.AreEqual(words.Length, tags.Length); IList <TaggedWord> sentence = SentenceUtils.ToTaggedList(Arrays.AsList(words), Arrays.AsList(tags)); State state = ShiftReduceParser.InitialStateFromTaggedSentence(sentence); ShiftTransition shift = new ShiftTransition(); state = shift.Apply(shift.Apply(state)); BinaryTransition transition = new BinaryTransition("NP", BinaryTransition.Side.Right); State next = transition.Apply(state); NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Right, ShiftReduceUtils.GetBinarySide(next.stack.Peek())); transition = new BinaryTransition("NP", BinaryTransition.Side.Left); next = transition.Apply(state); NUnit.Framework.Assert.AreEqual(BinaryTransition.Side.Left, ShiftReduceUtils.GetBinarySide(next.stack.Peek())); }
internal virtual int GetSeparatorCount(Tree right, Tree left) { if (right == null || left == null) { return(0); } int leftHead = ShiftReduceUtils.HeadIndex(left); int rightHead = ShiftReduceUtils.HeadIndex(right); int nextSeparator = separators.HigherKey(leftHead); int count = 0; while (nextSeparator != null && nextSeparator < rightHead) { ++count; nextSeparator = separators.HigherKey(nextSeparator); } return(count); }
public virtual bool IsLegal(State state, IList <ParserConstraint> constraints) { bool legal = !state.finished && state.tokenPosition >= state.sentence.Count && state.stack.Size() == 1 && rootStates.Contains(state.stack.Peek().Value()); if (!legal || constraints == null) { return(legal); } foreach (ParserConstraint constraint in constraints) { if (constraint.start != 0 || constraint.end != state.sentence.Count) { continue; } if (!ShiftReduceUtils.ConstraintMatchesTreeTop(state.stack.Peek(), constraint)) { return(false); } } return(true); }
/// <summary>This option also does not seem to help</summary> public virtual void AddEdgeFeatures2(IList <string> features, State state, string nodeName, Tree node) { if (node == null) { return; } int left = ShiftReduceUtils.LeftIndex(node); int right = ShiftReduceUtils.RightIndex(node); CoreLabel nodeLabel = GetCoreLabel(node); string nodeValue = GetFeatureFromCoreLabel(nodeLabel, FeatureFactory.FeatureComponent.Value) + "-"; CoreLabel leftLabel = GetQueueLabel(state, left); CoreLabel rightLabel = GetQueueLabel(state, right); AddUnaryQueueFeatures(features, leftLabel, nodeName + "EL-" + nodeValue); AddUnaryQueueFeatures(features, rightLabel, nodeName + "ER-" + nodeValue); CoreLabel previousLabel = GetQueueLabel(state, left - 1); AddUnaryQueueFeatures(features, previousLabel, nodeName + "EP-" + nodeValue); CoreLabel nextLabel = GetQueueLabel(state, right + 1); AddUnaryQueueFeatures(features, nextLabel, nodeName + "EN-" + nodeValue); }
/// <summary> /// Could potentially add the tags and words for the left and right /// ends of the tree. /// </summary> /// <remarks> /// Could potentially add the tags and words for the left and right /// ends of the tree. Also adds notes about the sizes of the given /// tree. However, it seems somewhat slow and doesn't help accuracy. /// </remarks> public virtual void AddEdgeFeatures(IList <string> features, State state, string nodeName, string neighborName, Tree node, Tree neighbor) { if (node == null) { return; } int left = ShiftReduceUtils.LeftIndex(node); int right = ShiftReduceUtils.RightIndex(node); // Trees of size one are already featurized if (right == left) { features.Add(nodeName + "SZ1"); return; } AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left]), nodeName + "EL-"); AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[right]), nodeName + "ER-"); if (neighbor != null) { AddBinaryFeatures(features, nodeName, GetCoreLabel(state.sentence[right]), FeatureFactory.FeatureComponent.Headword, FeatureFactory.FeatureComponent.Headtag, neighborName, GetCoreLabel(neighbor), FeatureFactory.FeatureComponent.Headword, FeatureFactory.FeatureComponent .Headtag); } if (right - left == 1) { features.Add(nodeName + "SZ2"); return; } if (right - left == 2) { features.Add(nodeName + "SZ3"); AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left + 1]), nodeName + "EM-"); return; } features.Add(nodeName + "SZB"); AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[left + 1]), nodeName + "El-"); AddUnaryQueueFeatures(features, GetCoreLabel(state.sentence[right - 1]), nodeName + "Er-"); }
/// <summary>Legal as long as there are at least two items on the state's stack.</summary> public virtual bool IsLegal(State state, IList <ParserConstraint> constraints) { // some of these quotes come directly from Zhang Clark 09 if (state.finished) { return(false); } if (state.stack.Size() <= 1) { return(false); } // at least one of the two nodes on top of stack must be non-temporary if (ShiftReduceUtils.IsTemporary(state.stack.Peek()) && ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek())) { return(false); } if (ShiftReduceUtils.IsTemporary(state.stack.Peek())) { if (side == BinaryTransition.Side.Left) { return(false); } if (!ShiftReduceUtils.IsEquivalentCategory(label, state.stack.Peek().Value())) { return(false); } } if (ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek())) { if (side == BinaryTransition.Side.Right) { return(false); } if (!ShiftReduceUtils.IsEquivalentCategory(label, state.stack.Pop().Peek().Value())) { return(false); } } // don't allow binarized labels if it makes the state have a stack // of size 1 and a queue of size 0 if (state.stack.Size() == 2 && IsBinarized() && state.EndOfQueue()) { return(false); } // when the stack contains only two nodes, temporary resulting // nodes from binary reduce must be left-headed if (state.stack.Size() == 2 && IsBinarized() && side == BinaryTransition.Side.Right) { return(false); } // when the queue is empty and the stack contains more than two // nodes, with the third node from the top being temporary, binary // reduce can be applied only if the resulting node is non-temporary if (state.EndOfQueue() && state.stack.Size() > 2 && ShiftReduceUtils.IsTemporary(state.stack.Pop().Pop().Peek()) && IsBinarized()) { return(false); } // when the stack contains more than two nodes, with the third // node from the top being temporary, temporary resulting nodes // from binary reduce must be left-headed if (state.stack.Size() > 2 && ShiftReduceUtils.IsTemporary(state.stack.Pop().Pop().Peek()) && IsBinarized() && side == BinaryTransition.Side.Right) { return(false); } if (constraints == null) { return(true); } Tree top = state.stack.Peek(); int leftTop = ShiftReduceUtils.LeftIndex(top); int rightTop = ShiftReduceUtils.RightIndex(top); Tree next = state.stack.Pop().Peek(); int leftNext = ShiftReduceUtils.LeftIndex(next); // The binary transitions are affected by constraints in the // following two circumstances. If a transition would cross the // left boundary of a constraint, that is illegal. If the // transition is exactly the right size for the constraint and // would make a temporary node, that is also illegal. foreach (ParserConstraint constraint in constraints) { if (leftTop == constraint.start) { // can't binary reduce away from a tree which doesn't match a constraint if (rightTop == constraint.end - 1) { if (!ShiftReduceUtils.ConstraintMatchesTreeTop(top, constraint)) { return(false); } else { continue; } } else { if (rightTop >= constraint.end) { continue; } else { // can't binary reduce if it would make the tree cross the left boundary return(false); } } } // top element is further left than the constraint, so // there's no harm to be done by binary reduce if (leftTop < constraint.start) { continue; } // top element is past the end of the constraint, so it must already be satisfied if (leftTop >= constraint.end) { continue; } // now leftTop > constraint.start and < constraint.end, eg inside the constraint // the next case is no good because it crosses the boundary if (leftNext < constraint.start) { return(false); } if (leftNext > constraint.start) { continue; } // can't transition to a binarized node when there's a constraint that matches. if (rightTop == constraint.end - 1 && IsBinarized()) { return(false); } } return(true); }
internal static bool SpansEqual(Tree subtree, Tree goldSubtree) { return((ShiftReduceUtils.LeftIndex(subtree) == ShiftReduceUtils.LeftIndex(goldSubtree)) && (ShiftReduceUtils.RightIndex(subtree) == ShiftReduceUtils.RightIndex(goldSubtree))); }
/// <summary> /// Returns an attempt at a "gold" transition given the current state /// while parsing a known gold tree. /// </summary> /// <remarks> /// Returns an attempt at a "gold" transition given the current state /// while parsing a known gold tree. /// Tree is passed in by index so the oracle can precompute various /// statistics about the tree. /// If we already finalized, then the correct transition is to idle. /// If the stack is empty, shift is the only possible answer. /// If the first item on the stack is a correct span, correctly /// labeled, and it has unaries transitions above it, then if we are /// not doing compound unaries, the next unary up is the correct /// answer. If we are doing compound unaries, and the state does not /// already have a transition, then the correct answer is a compound /// unary transition to the top of the unary chain. /// If the first item is the entire tree, with no remaining unary /// transitions, then we need to finalize. /// If the first item is a correct span, with or without a correct /// label, and there are no unary transitions to be added, then we /// must look at the next parent. If it has the same left side, then /// we return a shift transition. If it has the same right side, /// then we look at the next subtree on the stack (which must exist). /// If it is also correct, then the transition is to combine the two /// subtrees with the correct label and side. /// TODO: suppose the correct label is not either child label and the /// children are binarized states? We should see what the /// debinarizer does in that case. Perhaps a post-processing step /// If the previous stack item is too small, then any binary reduce /// action is legal, with no gold transition. TODO: can this be improved? /// If the previous stack item is too large, perhaps because of /// incorrectly attached PP/SBAR, for example, we still need to /// binary reduce. TODO: is that correct? TODO: we could look back /// further in the stack to find hints at a label that would work /// better, for example /// If the current item is an incorrect span, then look at the /// containing item. If it has the same left side, shift. If it has /// the same right side, binary reduce (producing an exact span if /// possible). If neither edge is correct, then any of shift or /// binary reduce are acceptable, with no gold transition. TODO: can /// this be improved? /// </remarks> internal virtual OracleTransition GoldTransition(int index, State state) { if (state.finished) { return(new OracleTransition(new IdleTransition(), false, false, false)); } if (state.stack.Size() == 0) { return(new OracleTransition(new ShiftTransition(), false, false, false)); } IDictionary <Tree, Tree> parents = parentMaps[index]; Tree gold = binarizedTrees[index]; IList <Tree> leaves = leafLists[index]; Tree S0 = state.stack.Peek(); Tree enclosingS0 = GetEnclosingTree(S0, parents, leaves); OracleTransition result = GetUnaryTransition(S0, enclosingS0, parents, compoundUnaries); if (result != null) { return(result); } // TODO: we could interject that all trees must end with ROOT, for example if (state.tokenPosition >= state.sentence.Count && state.stack.Size() == 1) { return(new OracleTransition(new FinalizeTransition(rootStates), false, false, false)); } if (state.stack.Size() == 1) { return(new OracleTransition(new ShiftTransition(), false, false, false)); } if (SpansEqual(S0, enclosingS0)) { Tree parent = parents[enclosingS0]; // cannot be root while (SpansEqual(parent, enclosingS0)) { // in case we had missed unary transitions enclosingS0 = parent; parent = parents[parent]; } if (parent.Children()[0] == enclosingS0) { // S0 is the left child of the correct tree return(new OracleTransition(new ShiftTransition(), false, false, false)); } // was the second (right) child. there must be something else on the stack... Tree S1 = state.stack.Pop().Peek(); Tree enclosingS1 = GetEnclosingTree(S1, parents, leaves); if (SpansEqual(S1, enclosingS1)) { // the two subtrees should be combined return(new OracleTransition(new BinaryTransition(parent.Value(), ShiftReduceUtils.GetBinarySide(parent)), false, false, false)); } return(new OracleTransition(null, false, true, false)); } if (ShiftReduceUtils.LeftIndex(S0) == ShiftReduceUtils.LeftIndex(enclosingS0)) { return(new OracleTransition(new ShiftTransition(), false, false, false)); } if (ShiftReduceUtils.RightIndex(S0) == ShiftReduceUtils.RightIndex(enclosingS0)) { Tree S1 = state.stack.Pop().Peek(); Tree enclosingS1 = GetEnclosingTree(S1, parents, leaves); if (enclosingS0 == enclosingS1) { // BinaryTransition with enclosingS0's label, either side, but preferring LEFT return(new OracleTransition(new BinaryTransition(enclosingS0.Value(), BinaryTransition.Side.Left), false, false, true)); } // S1 is smaller than the next tree S0 is supposed to be part of, // so we must have a BinaryTransition if (ShiftReduceUtils.LeftIndex(S1) > ShiftReduceUtils.LeftIndex(enclosingS0)) { return(new OracleTransition(null, false, true, true)); } // S1 is larger than the next tree. This is the worst case return(new OracleTransition(null, true, true, true)); } // S0 doesn't match either endpoint of the enclosing tree return(new OracleTransition(null, true, true, true)); }
/// <summary> /// Legal as long as there is at least one item on the state's stack /// and that item has not already been unary transformed. /// </summary> public virtual bool IsLegal(State state, IList <ParserConstraint> constraints) { if (state.finished) { return(false); } if (state.stack.Size() == 0) { return(false); } Tree top = state.stack.Peek(); if (top.Children().Length == 1 && !top.IsPreTerminal()) { // Disallow unary transitions after we've already had a unary transition return(false); } if (top.Label().Value().Equals(labels[0])) { // Disallow unary transitions where the final label doesn't change return(false); } // TODO: need to think more about when a unary transition is // allowed if the top of the stack is temporary if (top.Label().Value().StartsWith("@") && !labels[labels.Length - 1].Equals(Sharpen.Runtime.Substring(top.Label().Value(), 1))) { // Disallow a transition if the top is a binarized node and the // bottom of the unary transition chain isn't the same type return(false); } if (isRoot && (state.stack.Size() > 1 || !state.EndOfQueue())) { return(false); } // Now we check the constraints... // Constraints only apply to CompoundUnaryTransitions if the tree // is exactly the right size and the tree has not already been // constructed to match the constraint. In that case, we check to // see if the candidate transition contains the desired label. if (constraints == null) { return(true); } foreach (ParserConstraint constraint in constraints) { if (ShiftReduceUtils.LeftIndex(top) != constraint.start || ShiftReduceUtils.RightIndex(top) != constraint.end - 1) { continue; } if (constraint.state.Matcher(top.Value()).Matches()) { continue; } bool found = false; foreach (string label in labels) { if (constraint.state.Matcher(label).Matches()) { found = true; break; } } if (!found) { return(false); } } return(true); }
private Pair <int, int> TrainTree(int index, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle) { int numCorrect = 0; int numWrong = 0; Tree tree = binarizedTrees[index]; ReorderingOracle reorderer = null; if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { reorderer = new ReorderingOracle(op); } // TODO. This training method seems to be working in that it // trains models just like the gold and early termination methods do. // However, it causes the feature space to go crazy. Presumably // leaving out features with low weights or low frequencies would // significantly help with that. Otherwise, not sure how to keep // it under control. if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Oracle) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); while (!state.IsFinished()) { IList <string> features = featureFactory.Featurize(state); ScoredObject <int> prediction = FindHighestScoringTransition(state, features, true); if (prediction == null) { throw new AssertionError("Did not find a legal transition"); } int predictedNum = prediction.Object(); ITransition predicted = transitionIndex.Get(predictedNum); OracleTransition gold = oracle.GoldTransition(index, state); if (gold.IsCorrect(predicted)) { numCorrect++; if (gold.transition != null && !gold.transition.Equals(predicted)) { int transitionNum = transitionIndex.IndexOf(gold.transition); if (transitionNum < 0) { // TODO: do we want to add unary transitions which are // only possible when the parser has gone off the rails? continue; } updates.Add(new PerceptronModel.Update(features, transitionNum, -1, learningRate)); } } else { numWrong++; int transitionNum = -1; if (gold.transition != null) { transitionNum = transitionIndex.IndexOf(gold.transition); } // TODO: this can theoretically result in a -1 gold // transition if the transition exists, but is a // CompoundUnaryTransition which only exists because the // parser is wrong. Do we want to add those transitions? updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate)); } state = predicted.Apply(state); } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { if (op.TrainOptions().beamSize <= 0) { throw new ArgumentException("Illegal beam size " + op.TrainOptions().beamSize); } IList <ITransition> transitions = Generics.NewLinkedList(transitionLists[index]); PriorityQueue <State> agenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator); State goldState = ShiftReduceParser.InitialStateFromGoldTagTree(tree); agenda.Add(goldState); // int transitionCount = 0; while (transitions.Count > 0) { ITransition goldTransition = transitions[0]; ITransition highestScoringTransitionFromGoldState = null; double highestScoreFromGoldState = 0.0; PriorityQueue <State> newAgenda = new PriorityQueue <State>(op.TrainOptions().beamSize + 1, ScoredComparator.AscendingComparator); State highestScoringState = null; State highestCurrentState = null; foreach (State currentState in agenda) { bool isGoldState = (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && goldState.AreTransitionsEqual(currentState)); IList <string> features = featureFactory.Featurize(currentState); ICollection <ScoredObject <int> > stateTransitions = FindHighestScoringTransitions(currentState, features, true, op.TrainOptions().beamSize, null); foreach (ScoredObject <int> transition in stateTransitions) { State newState = transitionIndex.Get(transition.Object()).Apply(currentState, transition.Score()); newAgenda.Add(newState); if (newAgenda.Count > op.TrainOptions().beamSize) { newAgenda.Poll(); } if (highestScoringState == null || highestScoringState.Score() < newState.Score()) { highestScoringState = newState; highestCurrentState = currentState; } if (isGoldState && (highestScoringTransitionFromGoldState == null || transition.Score() > highestScoreFromGoldState)) { highestScoringTransitionFromGoldState = transitionIndex.Get(transition.Object()); highestScoreFromGoldState = transition.Score(); } } } // This can happen if the REORDER_BEAM method backs itself // into a corner, such as transitioning to something that // can't have a FinalizeTransition applied. This doesn't // happen for the BEAM method because in that case the correct // state (eg one with ROOT) isn't on the agenda so it stops. if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam && highestScoringTransitionFromGoldState == null) { break; } State newGoldState = goldTransition.Apply(goldState, 0.0); // if highest scoring state used the correct transition, no training // otherwise, down the last transition, up the correct if (!newGoldState.AreTransitionsEqual(highestScoringState)) { ++numWrong; IList <string> goldFeatures = featureFactory.Featurize(goldState); int lastTransition = transitionIndex.IndexOf(highestScoringState.transitions.Peek()); updates.Add(new PerceptronModel.Update(featureFactory.Featurize(highestCurrentState), -1, lastTransition, learningRate)); updates.Add(new PerceptronModel.Update(goldFeatures, transitionIndex.IndexOf(goldTransition), -1, learningRate)); if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.Beam) { // If the correct state has fallen off the agenda, break if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { break; } else { transitions.Remove(0); } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderBeam) { if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { if (!reorderer.Reorder(goldState, highestScoringTransitionFromGoldState, transitions)) { break; } newGoldState = highestScoringTransitionFromGoldState.Apply(goldState); if (!ShiftReduceUtils.FindStateOnAgenda(newAgenda, newGoldState)) { break; } } else { transitions.Remove(0); } } } } else { ++numCorrect; transitions.Remove(0); } goldState = newGoldState; agenda = newAgenda; } } else { if (op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.ReorderOracle || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod.EarlyTermination || op.TrainOptions().trainingMethod == ShiftReduceTrainOptions.TrainingMethod .Gold) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); IList <ITransition> transitions = transitionLists[index]; transitions = Generics.NewLinkedList(transitions); bool keepGoing = true; while (transitions.Count > 0 && keepGoing) { ITransition transition = transitions[0]; int transitionNum = transitionIndex.IndexOf(transition); IList <string> features = featureFactory.Featurize(state); int predictedNum = FindHighestScoringTransition(state, features, false).Object(); ITransition predicted = transitionIndex.Get(predictedNum); if (transitionNum == predictedNum) { transitions.Remove(0); state = transition.Apply(state); numCorrect++; } else { numWrong++; // TODO: allow weighted features, weighted training, etc updates.Add(new PerceptronModel.Update(features, transitionNum, predictedNum, learningRate)); switch (op.TrainOptions().trainingMethod) { case ShiftReduceTrainOptions.TrainingMethod.EarlyTermination: { keepGoing = false; break; } case ShiftReduceTrainOptions.TrainingMethod.Gold: { transitions.Remove(0); state = transition.Apply(state); break; } case ShiftReduceTrainOptions.TrainingMethod.ReorderOracle: { keepGoing = reorderer.Reorder(state, predicted, transitions); if (keepGoing) { state = predicted.Apply(state); } break; } default: { throw new ArgumentException("Unexpected method " + op.TrainOptions().trainingMethod); } } } } } } } return(Pair.MakePair(numCorrect, numWrong)); }
/// <summary> /// Returns a transition which might not even be part of the model, /// but will hopefully allow progress in an otherwise stuck parse /// TODO: perhaps we want to create an EmergencyTransition class /// which indicates that something has gone wrong /// </summary> public virtual ITransition FindEmergencyTransition(State state, IList <ParserConstraint> constraints) { if (state.stack.Size() == 0) { return(null); } // See if there is a constraint whose boundaries match the end // points of the top node on the stack. If so, we can apply a // UnaryTransition / CompoundUnaryTransition if that would solve // the constraint if (constraints != null) { Tree top = state.stack.Peek(); foreach (ParserConstraint constraint in constraints) { if (ShiftReduceUtils.LeftIndex(top) != constraint.start || ShiftReduceUtils.RightIndex(top) != constraint.end - 1) { continue; } if (ShiftReduceUtils.ConstraintMatchesTreeTop(top, constraint)) { continue; } // found an unmatched constraint that can be fixed with a unary transition // now we need to find a matching state for the transition foreach (string label in knownStates) { if (constraint.state.Matcher(label).Matches()) { return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(label), false) : new UnaryTransition(label, false)); } } } } if (ShiftReduceUtils.IsTemporary(state.stack.Peek()) && (state.stack.Size() == 1 || ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek()))) { return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1)), false) : new UnaryTransition(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1), false)); } if (state.stack.Size() == 1 && state.tokenPosition >= state.sentence.Count) { // either need to finalize or transition to a root state if (!rootStates.Contains(state.stack.Peek().Value())) { string root = rootStates.GetEnumerator().Current; return((op.compoundUnaries) ? new CompoundUnaryTransition(Java.Util.Collections.SingletonList(root), false) : new UnaryTransition(root, false)); } } if (state.stack.Size() == 1) { return(null); } if (ShiftReduceUtils.IsTemporary(state.stack.Peek())) { return(new BinaryTransition(Sharpen.Runtime.Substring(state.stack.Peek().Value(), 1), BinaryTransition.Side.Right)); } if (ShiftReduceUtils.IsTemporary(state.stack.Pop().Peek())) { return(new BinaryTransition(Sharpen.Runtime.Substring(state.stack.Pop().Peek().Value(), 1), BinaryTransition.Side.Left)); } return(null); }