Beispiel #1
0
 public override double Score <F>(IClassifier <L, F> classifier, GeneralDataset <L, F> data)
 {
     labelIndex = new HashIndex <L>();
     labelIndex.AddAll(classifier.Labels());
     labelIndex.AddAll(data.labelIndex.ObjectsList());
     ClearCounts();
     int[] labelsArr = data.GetLabelsArray();
     for (int i = 0; i < data.Size(); i++)
     {
         IDatum <L, F> d     = data.GetRVFDatum(i);
         L             guess = classifier.ClassOf(d);
         AddGuess(guess, labelIndex.Get(labelsArr[i]));
     }
     FinalizeCounts();
     return(GetFMeasure());
 }
Beispiel #2
0
        private void Train(IList <Pair <string, IFileFilter> > trainTreebankPath, Pair <string, IFileFilter> devTreebankPath, string serializedPath)
        {
            log.Info("Training method: " + op.TrainOptions().trainingMethod);
            IList <Tree> binarizedTrees = Generics.NewArrayList();

            foreach (Pair <string, IFileFilter> treebank in trainTreebankPath)
            {
                Sharpen.Collections.AddAll(binarizedTrees, ReadBinarizedTreebank(treebank.First(), treebank.Second()));
            }
            int nThreads = op.trainOptions.trainingThreads;

            nThreads = nThreads <= 0 ? Runtime.GetRuntime().AvailableProcessors() : nThreads;
            Edu.Stanford.Nlp.Tagger.Common.Tagger tagger = null;
            if (op.testOptions.preTag)
            {
                Timing retagTimer = new Timing();
                tagger = Edu.Stanford.Nlp.Tagger.Common.Tagger.LoadModel(op.testOptions.taggerSerializedFile);
                RedoTags(binarizedTrees, tagger, nThreads);
                retagTimer.Done("Retagging");
            }
            ICollection <string> knownStates    = FindKnownStates(binarizedTrees);
            ICollection <string> rootStates     = FindRootStates(binarizedTrees);
            ICollection <string> rootOnlyStates = FindRootOnlyStates(binarizedTrees, rootStates);

            log.Info("Known states: " + knownStates);
            log.Info("States which occur at the root: " + rootStates);
            log.Info("States which only occur at the root: " + rootStates);
            Timing transitionTimer = new Timing();
            IList <IList <ITransition> > transitionLists = CreateTransitionSequence.CreateTransitionSequences(binarizedTrees, op.compoundUnaries, rootStates, rootOnlyStates);
            IIndex <ITransition>         transitionIndex = new HashIndex <ITransition>();

            foreach (IList <ITransition> transitions in transitionLists)
            {
                transitionIndex.AddAll(transitions);
            }
            transitionTimer.Done("Converting trees into transition lists");
            log.Info("Number of transitions: " + transitionIndex.Size());
            Random   random      = new Random(op.trainOptions.randomSeed);
            Treebank devTreebank = null;

            if (devTreebankPath != null)
            {
                devTreebank = ReadTreebank(devTreebankPath.First(), devTreebankPath.Second());
            }
            PerceptronModel newModel = new PerceptronModel(this.op, transitionIndex, knownStates, rootStates, rootOnlyStates);

            newModel.TrainModel(serializedPath, tagger, random, binarizedTrees, transitionLists, devTreebank, nThreads);
            this.model = newModel;
        }