public TrainTreeProcessor(PerceptronModel _enclosing, IList <Tree> binarizedTrees, IList <IList <ITransition> > transitionLists, IList <PerceptronModel.Update> updates, Oracle oracle) { this._enclosing = _enclosing; // this needs to be a synchronized list this.binarizedTrees = binarizedTrees; this.transitionLists = transitionLists; this.updates = updates; this.oracle = oracle; }
private void Train(IList <Pair <string, IFileFilter> > trainTreebankPath, Pair <string, IFileFilter> devTreebankPath, string serializedPath) { log.Info("Training method: " + op.TrainOptions().trainingMethod); IList <Tree> binarizedTrees = Generics.NewArrayList(); foreach (Pair <string, IFileFilter> treebank in trainTreebankPath) { Sharpen.Collections.AddAll(binarizedTrees, ReadBinarizedTreebank(treebank.First(), treebank.Second())); } int nThreads = op.trainOptions.trainingThreads; nThreads = nThreads <= 0 ? Runtime.GetRuntime().AvailableProcessors() : nThreads; Edu.Stanford.Nlp.Tagger.Common.Tagger tagger = null; if (op.testOptions.preTag) { Timing retagTimer = new Timing(); tagger = Edu.Stanford.Nlp.Tagger.Common.Tagger.LoadModel(op.testOptions.taggerSerializedFile); RedoTags(binarizedTrees, tagger, nThreads); retagTimer.Done("Retagging"); } ICollection <string> knownStates = FindKnownStates(binarizedTrees); ICollection <string> rootStates = FindRootStates(binarizedTrees); ICollection <string> rootOnlyStates = FindRootOnlyStates(binarizedTrees, rootStates); log.Info("Known states: " + knownStates); log.Info("States which occur at the root: " + rootStates); log.Info("States which only occur at the root: " + rootStates); Timing transitionTimer = new Timing(); IList <IList <ITransition> > transitionLists = CreateTransitionSequence.CreateTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates); IIndex <ITransition> transitionIndex = new HashIndex <ITransition>(); foreach (IList <ITransition> transitions in transitionLists) { transitionIndex.AddAll(transitions); } transitionTimer.Done("Converting trees into transition lists"); log.Info("Number of transitions: " + transitionIndex.Size()); Random random = new Random(op.trainOptions.randomSeed); Treebank devTreebank = null; if (devTreebankPath != null) { devTreebank = ReadTreebank(devTreebankPath.First(), devTreebankPath.Second()); } PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates); newModel.TrainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads); this.model = newModel; }