/// <exception cref="System.IO.IOException"/> public virtual void Train(IList <Tree> sentences, IdentityHashMap <Tree, byte[]> compressedParses, Treebank testTreebank, string modelPath, string resultsRecordPath) { // process: // we come up with a cost and a derivative for the model // we always use the gold tree as the example to train towards // every time through, we will look at the top N trees from // the LexicalizedParser and pick the best one according to // our model (at the start, this is essentially random) // we use QN to minimize the cost function for the model // to do this minimization, we turn all of the matrices in the // DVModel into one big Theta, which is the set of variables to // be optimized by the QN. Timing timing = new Timing(); long maxTrainTimeMillis = op.trainOptions.maxTrainTimeSeconds * 1000; int batchCount = 0; int debugCycle = 0; double bestLabelF1 = 0.0; if (op.trainOptions.useContextWords) { foreach (Tree tree in sentences) { Edu.Stanford.Nlp.Trees.Trees.ConvertToCoreLabels(tree); tree.SetSpans(); } } // for AdaGrad double[] sumGradSquare = new double[dvModel.TotalParamSize()]; Arrays.Fill(sumGradSquare, 1.0); int numBatches = sentences.Count / op.trainOptions.batchSize + 1; log.Info("Training on " + sentences.Count + " trees in " + numBatches + " batches"); log.Info("Times through each training batch: " + op.trainOptions.trainingIterations); log.Info("QN iterations per batch: " + op.trainOptions.qnIterationsPerBatch); for (int iter = 0; iter < op.trainOptions.trainingIterations; ++iter) { IList <Tree> shuffledSentences = new List <Tree>(sentences); Java.Util.Collections.Shuffle(shuffledSentences, dvModel.rand); for (int batch = 0; batch < numBatches; ++batch) { ++batchCount; // This did not help performance //log.info("Setting AdaGrad's sum of squares to 1..."); //Arrays.fill(sumGradSquare, 1.0); log.Info("======================================"); log.Info("Iteration " + iter + " batch " + batch); // Each batch will be of the specified batch size, except the // last batch will include any leftover trees at the end of // the list int startTree = batch * op.trainOptions.batchSize; int endTree = (batch + 1) * op.trainOptions.batchSize; if (endTree > shuffledSentences.Count) { endTree = shuffledSentences.Count; } ExecuteOneTrainingBatch(shuffledSentences.SubList(startTree, endTree), compressedParses, sumGradSquare); long totalElapsed = timing.Report(); log.Info("Finished iteration " + iter + " batch " + batch + "; total training time " + totalElapsed + " ms"); if (maxTrainTimeMillis > 0 && totalElapsed > maxTrainTimeMillis) { // no need to debug output, we're done now break; } if (op.trainOptions.debugOutputFrequency > 0 && batchCount % op.trainOptions.debugOutputFrequency == 0) { log.Info("Finished " + batchCount + " total batches, running evaluation cycle"); // Time for debugging output! double tagF1 = 0.0; double labelF1 = 0.0; if (testTreebank != null) { EvaluateTreebank evaluator = new EvaluateTreebank(AttachModelToLexicalizedParser()); evaluator.TestOnTreebank(testTreebank); labelF1 = evaluator.GetLBScore(); tagF1 = evaluator.GetTagScore(); if (labelF1 > bestLabelF1) { bestLabelF1 = labelF1; } log.Info("Best label f1 on dev set so far: " + Nf.Format(bestLabelF1)); } string tempName = null; if (modelPath != null) { tempName = modelPath; if (modelPath.EndsWith(".ser.gz")) { tempName = Sharpen.Runtime.Substring(modelPath, 0, modelPath.Length - 7) + "-" + Filename.Format(debugCycle) + "-" + Nf.Format(labelF1) + ".ser.gz"; } SaveModel(tempName); } string statusLine = ("CHECKPOINT:" + " iteration " + iter + " batch " + batch + " labelF1 " + Nf.Format(labelF1) + " tagF1 " + Nf.Format(tagF1) + " bestLabelF1 " + Nf.Format(bestLabelF1) + " model " + tempName + op.trainOptions + " word vectors: " + op.lexOptions.wordVectorFile + " numHid: " + op.lexOptions.numHid); log.Info(statusLine); if (resultsRecordPath != null) { FileWriter fout = new FileWriter(resultsRecordPath, true); // append fout.Write(statusLine); fout.Write("\n"); fout.Close(); } ++debugCycle; } } long totalElapsed_1 = timing.Report(); if (maxTrainTimeMillis > 0 && totalElapsed_1 > maxTrainTimeMillis) { // no need to debug output, we're done now log.Info("Max training time exceeded, exiting"); break; } } }
//log.info(Double.toString(score)+" "); public override int DomainDimension() { // TODO: cache this for speed? return(dvModel.TotalParamSize()); }