示例#1
0
 public TrainTreeProcessor(PerceptronModel _enclosing, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle)
 {
     this._enclosing = _enclosing;
     // this needs to be a synchronized list
     this.binarizedTrees  = binarizedTrees;
     this.transitionLists = transitionLists;
     this.updates         = updates;
     this.oracle          = oracle;
 }
示例#2
0
        private void Train(IList <Pair <string, IFileFilter> > trainTreebankPath, Pair <string, IFileFilter> devTreebankPath, string serializedPath)
        {
            log.Info("Training method: " + op.TrainOptions().trainingMethod);
            IList <Tree> binarizedTrees = Generics.NewArrayList();

            foreach (Pair <string, IFileFilter> treebank in trainTreebankPath)
            {
                Sharpen.Collections.AddAll(binarizedTrees, ReadBinarizedTreebank(treebank.First(), treebank.Second()));
            }
            int nThreads = op.trainOptions.trainingThreads;

            nThreads = nThreads <= 0 ? Runtime.GetRuntime().AvailableProcessors() : nThreads;
            Edu.Stanford.Nlp.Tagger.Common.Tagger tagger = null;
            if (op.testOptions.preTag)
            {
                Timing retagTimer = new Timing();
                tagger = Edu.Stanford.Nlp.Tagger.Common.Tagger.LoadModel(op.testOptions.taggerSerializedFile);
                RedoTags(binarizedTrees, tagger, nThreads);
                retagTimer.Done("Retagging");
            }
            ICollection <string> knownStates    = FindKnownStates(binarizedTrees);
            ICollection <string> rootStates     = FindRootStates(binarizedTrees);
            ICollection <string> rootOnlyStates = FindRootOnlyStates(binarizedTrees, rootStates);

            log.Info("Known states: " + knownStates);
            log.Info("States which occur at the root: " + rootStates);
            log.Info("States which only occur at the root: " + rootStates);
            Timing transitionTimer = new Timing();
            IList <IList <ITransition> > transitionLists = CreateTransitionSequence.CreateTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates);
            IIndex <ITransition>         transitionIndex = new HashIndex <ITransition>();

            foreach (IList <ITransition> transitions in transitionLists)
            {
                transitionIndex.AddAll(transitions);
            }
            transitionTimer.Done("Converting trees into transition lists");
            log.Info("Number of transitions: " + transitionIndex.Size());
            Random   random      = new Random(op.trainOptions.randomSeed);
            Treebank devTreebank = null;

            if (devTreebankPath != null)
            {
                devTreebank = ReadTreebank(devTreebankPath.First(), devTreebankPath.Second());
            }
            PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates);

            newModel.TrainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads);
            this.model = newModel;
        }