public virtual void TestReorderIncorrectShiftResultingTree() { for (int testcase = 0; testcase < correctTrees.Length; ++testcase) { State state = ShiftReduceParser.InitialStateFromGoldTagTree(correctTrees[testcase]); IList <ITransition> gold = CreateTransitionSequence.CreateTransitionSequence(binarizedTrees[testcase]); // System.err.println(correctTrees[testcase]); // System.err.println(gold); int tnum = 0; for (; tnum < gold.Count; ++tnum) { if (gold[tnum] is BinaryTransition) { break; } state = gold[tnum].Apply(state); } state = shift.Apply(state); IList <ITransition> reordered = Generics.NewLinkedList(gold.SubList(tnum, gold.Count)); NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(reordered)); // System.err.println(reordered); foreach (ITransition transition in reordered) { state = transition.Apply(state); } Tree debinarized = debinarizer.TransformTree(state.stack.Peek()); // System.err.println(debinarized); NUnit.Framework.Assert.AreEqual(incorrectShiftTrees[testcase].ToString(), debinarized.ToString()); } }
public virtual void TestSeparators() { Tree tree = ConvertTree(commaTreeString); IList <ITransition> transitions = CreateTransitionSequence.CreateTransitionSequence(tree, true, Java.Util.Collections.Singleton("ROOT"), Java.Util.Collections.Singleton("ROOT")); IList <string> expectedTransitions = Arrays.AsList(new string[] { "Shift", "Shift", "Shift", "Shift", "RightBinary(@ADJP)", "RightBinary(ADJP)", "Shift", "RightBinary(@NP)", "RightBinary(NP)", "CompoundUnary*([ROOT, FRAG])", "Finalize", "Idle" }); NUnit.Framework.Assert.AreEqual(expectedTransitions, CollectionUtils.TransformAsList(transitions, null)); string expectedSeparators = "[{2=,}]"; State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); NUnit.Framework.Assert.AreEqual(1, state.separators.Count); NUnit.Framework.Assert.AreEqual(2, state.separators.FirstKey()); NUnit.Framework.Assert.AreEqual(",", state.separators[2]); }
public virtual void TestCompoundUnaryTransitions() { foreach (string treeText in treeStrings) { Tree tree = ConvertTree(treeText); IList <ITransition> transitions = CreateTransitionSequence.CreateTransitionSequence(tree, true, Java.Util.Collections.Singleton("ROOT"), Java.Util.Collections.Singleton("ROOT")); State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree); foreach (ITransition transition in transitions) { state = transition.Apply(state); } NUnit.Framework.Assert.AreEqual(tree, state.stack.Peek()); } }
private void Train(IList <Pair <string, IFileFilter> > trainTreebankPath, Pair <string, IFileFilter> devTreebankPath, string serializedPath) { log.Info("Training method: " + op.TrainOptions().trainingMethod); IList <Tree> binarizedTrees = Generics.NewArrayList(); foreach (Pair <string, IFileFilter> treebank in trainTreebankPath) { Sharpen.Collections.AddAll(binarizedTrees, ReadBinarizedTreebank(treebank.First(), treebank.Second())); } int nThreads = op.trainOptions.trainingThreads; nThreads = nThreads <= 0 ? Runtime.GetRuntime().AvailableProcessors() : nThreads; Edu.Stanford.Nlp.Tagger.Common.Tagger tagger = null; if (op.testOptions.preTag) { Timing retagTimer = new Timing(); tagger = Edu.Stanford.Nlp.Tagger.Common.Tagger.LoadModel(op.testOptions.taggerSerializedFile); RedoTags(binarizedTrees, tagger, nThreads); retagTimer.Done("Retagging"); } ICollection <string> knownStates = FindKnownStates(binarizedTrees); ICollection <string> rootStates = FindRootStates(binarizedTrees); ICollection <string> rootOnlyStates = FindRootOnlyStates(binarizedTrees, rootStates); log.Info("Known states: " + knownStates); log.Info("States which occur at the root: " + rootStates); log.Info("States which only occur at the root: " + rootStates); Timing transitionTimer = new Timing(); IList <IList <ITransition> > transitionLists = CreateTransitionSequence.CreateTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates); IIndex <ITransition> transitionIndex = new HashIndex <ITransition>(); foreach (IList <ITransition> transitions in transitionLists) { transitionIndex.AddAll(transitions); } transitionTimer.Done("Converting trees into transition lists"); log.Info("Number of transitions: " + transitionIndex.Size()); Random random = new Random(op.trainOptions.randomSeed); Treebank devTreebank = null; if (devTreebankPath != null) { devTreebank = ReadTreebank(devTreebankPath.First(), devTreebankPath.Second()); } PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates); newModel.TrainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads); this.model = newModel; }