public virtual void TestReorderIncorrectShiftResultingTree()
 {
     for (int testcase = 0; testcase < correctTrees.Length; ++testcase)
     {
         State state = ShiftReduceParser.InitialStateFromGoldTagTree(correctTrees[testcase]);
         IList <ITransition> gold = CreateTransitionSequence.CreateTransitionSequence(binarizedTrees[testcase]);
         // System.err.println(correctTrees[testcase]);
         // System.err.println(gold);
         int tnum = 0;
         for (; tnum < gold.Count; ++tnum)
         {
             if (gold[tnum] is BinaryTransition)
             {
                 break;
             }
             state = gold[tnum].Apply(state);
         }
         state = shift.Apply(state);
         IList <ITransition> reordered = Generics.NewLinkedList(gold.SubList(tnum, gold.Count));
         NUnit.Framework.Assert.IsTrue(ReorderingOracle.ReorderIncorrectShiftTransition(reordered));
         // System.err.println(reordered);
         foreach (ITransition transition in reordered)
         {
             state = transition.Apply(state);
         }
         Tree debinarized = debinarizer.TransformTree(state.stack.Peek());
         // System.err.println(debinarized);
         NUnit.Framework.Assert.AreEqual(incorrectShiftTrees[testcase].ToString(), debinarized.ToString());
     }
 }
Esempio n. 2
0
        public virtual void TestSeparators()
        {
            Tree tree = ConvertTree(commaTreeString);
            IList <ITransition> transitions         = CreateTransitionSequence.CreateTransitionSequence(tree, true, Java.Util.Collections.Singleton("ROOT"), Java.Util.Collections.Singleton("ROOT"));
            IList <string>      expectedTransitions = Arrays.AsList(new string[] { "Shift", "Shift", "Shift", "Shift", "RightBinary(@ADJP)", "RightBinary(ADJP)", "Shift", "RightBinary(@NP)", "RightBinary(NP)", "CompoundUnary*([ROOT, FRAG])", "Finalize", "Idle" });

            NUnit.Framework.Assert.AreEqual(expectedTransitions, CollectionUtils.TransformAsList(transitions, null));
            string expectedSeparators = "[{2=,}]";
            State  state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);

            NUnit.Framework.Assert.AreEqual(1, state.separators.Count);
            NUnit.Framework.Assert.AreEqual(2, state.separators.FirstKey());
            NUnit.Framework.Assert.AreEqual(",", state.separators[2]);
        }
Esempio n. 3
0
 public virtual void TestCompoundUnaryTransitions()
 {
     foreach (string treeText in treeStrings)
     {
         Tree tree = ConvertTree(treeText);
         IList <ITransition> transitions = CreateTransitionSequence.CreateTransitionSequence(tree, true, Java.Util.Collections.Singleton("ROOT"), Java.Util.Collections.Singleton("ROOT"));
         State state = ShiftReduceParser.InitialStateFromGoldTagTree(tree);
         foreach (ITransition transition in transitions)
         {
             state = transition.Apply(state);
         }
         NUnit.Framework.Assert.AreEqual(tree, state.stack.Peek());
     }
 }
Esempio n. 4
0
        private void Train(IList <Pair <string, IFileFilter> > trainTreebankPath, Pair <string, IFileFilter> devTreebankPath, string serializedPath)
        {
            log.Info("Training method: " + op.TrainOptions().trainingMethod);
            IList <Tree> binarizedTrees = Generics.NewArrayList();

            foreach (Pair <string, IFileFilter> treebank in trainTreebankPath)
            {
                Sharpen.Collections.AddAll(binarizedTrees, ReadBinarizedTreebank(treebank.First(), treebank.Second()));
            }
            int nThreads = op.trainOptions.trainingThreads;

            nThreads = nThreads <= 0 ? Runtime.GetRuntime().AvailableProcessors() : nThreads;
            Edu.Stanford.Nlp.Tagger.Common.Tagger tagger = null;
            if (op.testOptions.preTag)
            {
                Timing retagTimer = new Timing();
                tagger = Edu.Stanford.Nlp.Tagger.Common.Tagger.LoadModel(op.testOptions.taggerSerializedFile);
                RedoTags(binarizedTrees, tagger, nThreads);
                retagTimer.Done("Retagging");
            }
            ICollection <string> knownStates    = FindKnownStates(binarizedTrees);
            ICollection <string> rootStates     = FindRootStates(binarizedTrees);
            ICollection <string> rootOnlyStates = FindRootOnlyStates(binarizedTrees, rootStates);

            log.Info("Known states: " + knownStates);
            log.Info("States which occur at the root: " + rootStates);
            log.Info("States which only occur at the root: " + rootStates);
            Timing transitionTimer = new Timing();
            IList <IList <ITransition> > transitionLists = CreateTransitionSequence.CreateTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates);
            IIndex <ITransition>         transitionIndex = new HashIndex <ITransition>();

            foreach (IList <ITransition> transitions in transitionLists)
            {
                transitionIndex.AddAll(transitions);
            }
            transitionTimer.Done("Converting trees into transition lists");
            log.Info("Number of transitions: " + transitionIndex.Size());
            Random   random      = new Random(op.trainOptions.randomSeed);
            Treebank devTreebank = null;

            if (devTreebankPath != null)
            {
                devTreebank = ReadTreebank(devTreebankPath.First(), devTreebankPath.Second());
            }
            PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates);

            newModel.TrainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads);
            this.model = newModel;
        }