/// <exception cref="System.IO.IOException"/>
        public virtual void Train(IList <Tree> sentences, IdentityHashMap <Tree, byte[]> compressedParses, Treebank testTreebank, string modelPath, string resultsRecordPath)
        {
            // process:
            //   we come up with a cost and a derivative for the model
            //   we always use the gold tree as the example to train towards
            //   every time through, we will look at the top N trees from
            //     the LexicalizedParser and pick the best one according to
            //     our model (at the start, this is essentially random)
            // we use QN to minimize the cost function for the model
            // to do this minimization, we turn all of the matrices in the
            //   DVModel into one big Theta, which is the set of variables to
            //   be optimized by the QN.
            Timing timing             = new Timing();
            long   maxTrainTimeMillis = op.trainOptions.maxTrainTimeSeconds * 1000;
            int    batchCount         = 0;
            int    debugCycle         = 0;
            double bestLabelF1        = 0.0;

            if (op.trainOptions.useContextWords)
            {
                foreach (Tree tree in sentences)
                {
                    Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(tree);
                    tree.SetSpans();
                }
            }
            // for AdaGrad
            double[] sumGradSquare = new double[dvModel.TotalParamSize()];
            Arrays.Fill(sumGradSquare, 1.0);
            int numBatches = sentences.Count / op.trainOptions.batchSize + 1;

            log.Info("Training on " + sentences.Count + " trees in " + numBatches + " batches");
            log.Info("Times through each training batch: " + op.trainOptions.trainingIterations);
            log.Info("QN iterations per batch: " + op.trainOptions.qnIterationsPerBatch);
            for (int iter = 0; iter < op.trainOptions.trainingIterations; ++iter)
            {
                IList <Tree> shuffledSentences = new List <Tree>(sentences);
                Java.Util.Collections.Shuffle(shuffledSentences, dvModel.rand);
                for (int batch = 0; batch < numBatches; ++batch)
                {
                    ++batchCount;
                    // This did not help performance
                    //log.info("Setting AdaGrad's sum of squares to 1...");
                    //Arrays.fill(sumGradSquare, 1.0);
                    log.Info("======================================");
                    log.Info("Iteration " + iter + " batch " + batch);
                    // Each batch will be of the specified batch size, except the
                    // last batch will include any leftover trees at the end of
                    // the list
                    int startTree = batch * op.trainOptions.batchSize;
                    int endTree   = (batch + 1) * op.trainOptions.batchSize;
                    if (endTree > shuffledSentences.Count)
                    {
                        endTree = shuffledSentences.Count;
                    }
                    ExecuteOneTrainingBatch(shuffledSentences.SubList(startTree, endTree), compressedParses, sumGradSquare);
                    long totalElapsed = timing.Report();
                    log.Info("Finished iteration " + iter + " batch " + batch + "; total training time " + totalElapsed + " ms");
                    if (maxTrainTimeMillis > 0 && totalElapsed > maxTrainTimeMillis)
                    {
                        // no need to debug output, we're done now
                        break;
                    }
                    if (op.trainOptions.debugOutputFrequency > 0 && batchCount % op.trainOptions.debugOutputFrequency == 0)
                    {
                        log.Info("Finished " + batchCount + " total batches, running evaluation cycle");
                        // Time for debugging output!
                        double tagF1   = 0.0;
                        double labelF1 = 0.0;
                        if (testTreebank != null)
                        {
                            EvaluateTreebank evaluator = new EvaluateTreebank(AttachModelToLexicalizedParser());
                            evaluator.TestOnTreebank(testTreebank);
                            labelF1 = evaluator.GetLBScore();
                            tagF1   = evaluator.GetTagScore();
                            if (labelF1 > bestLabelF1)
                            {
                                bestLabelF1 = labelF1;
                            }
                            log.Info("Best label f1 on dev set so far: " + Nf.Format(bestLabelF1));
                        }
                        string tempName = null;
                        if (modelPath != null)
                        {
                            tempName = modelPath;
                            if (modelPath.EndsWith(".ser.gz"))
                            {
                                tempName = Sharpen.Runtime.Substring(modelPath, 0, modelPath.Length - 7) + "-" + Filename.Format(debugCycle) + "-" + Nf.Format(labelF1) + ".ser.gz";
                            }
                            SaveModel(tempName);
                        }
                        string statusLine = ("CHECKPOINT:" + " iteration " + iter + " batch " + batch + " labelF1 " + Nf.Format(labelF1) + " tagF1 " + Nf.Format(tagF1) + " bestLabelF1 " + Nf.Format(bestLabelF1) + " model " + tempName + op.trainOptions + " word vectors: "
                                             + op.lexOptions.wordVectorFile + " numHid: " + op.lexOptions.numHid);
                        log.Info(statusLine);
                        if (resultsRecordPath != null)
                        {
                            FileWriter fout = new FileWriter(resultsRecordPath, true);
                            // append
                            fout.Write(statusLine);
                            fout.Write("\n");
                            fout.Close();
                        }
                        ++debugCycle;
                    }
                }
                long totalElapsed_1 = timing.Report();
                if (maxTrainTimeMillis > 0 && totalElapsed_1 > maxTrainTimeMillis)
                {
                    // no need to debug output, we're done now
                    log.Info("Max training time exceeded, exiting");
                    break;
                }
            }
        }
Пример #2
0
 //log.info(Double.toString(score)+" ");
 public override int DomainDimension()
 {
     // TODO: cache this for speed?
     return(dvModel.TotalParamSize());
 }