// todo: The current version has variables for a 2 model version and arrays for an n-model version. Unify. /// <summary> /// <inheritDoc/> /// /// </summary> public virtual double[] ScoresOf(int[] sequence, int pos) { if (models != null) { double[] dist = ArrayMath.Multiply(models[0].ScoresOf(sequence, pos), wts[0]); for (int i = 1; i < models.Length; i++) { double[] dist_i = models[i].ScoresOf(sequence, pos); ArrayMath.AddMultInPlace(dist, dist_i, wts[i]); } return(dist); } double[] dist1 = model1.ScoresOf(sequence, pos); double[] dist2 = model2.ScoresOf(sequence, pos); double[] dist_1 = new double[dist1.Length]; for (int i_1 = 0; i_1 < dist1.Length; i_1++) { dist_1[i_1] = model1Wt * dist1[i_1] + model2Wt * dist2[i_1]; } return(dist_1); }
public virtual void ExecuteOneTrainingBatch(IList <Tree> trainingBatch, IdentityHashMap <Tree, byte[]> compressedParses, double[] sumGradSquare) { Timing convertTiming = new Timing(); convertTiming.Doing("Converting trees"); IdentityHashMap <Tree, IList <Tree> > topParses = CacheParseHypotheses.ConvertToTrees(trainingBatch, compressedParses, op.trainOptions.trainingThreads); convertTiming.Done(); DVParserCostAndGradient gcFunc = new DVParserCostAndGradient(trainingBatch, topParses, dvModel, op); double[] theta = dvModel.ParamsToVector(); switch (Minimizer) { case (1): { //maxFuncIter = 10; // 1: QNMinimizer, 2: SGD QNMinimizer qn = new QNMinimizer(op.trainOptions.qnEstimates, true); qn.UseMinPackSearch(); qn.UseDiagonalScaling(); qn.TerminateOnAverageImprovement(true); qn.TerminateOnNumericalZero(true); qn.TerminateOnRelativeNorm(true); theta = qn.Minimize(gcFunc, op.trainOptions.qnTolerance, theta, op.trainOptions.qnIterationsPerBatch); break; } case 2: { //Minimizer smd = new SGDMinimizer(); double tol = 1e-4; theta = smd.minimize(gcFunc,tol,theta,op.trainOptions.qnIterationsPerBatch); double lastCost = 0; double currCost = 0; bool firstTime = true; for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++) { //gcFunc.calculate(theta); double[] grad = gcFunc.DerivativeAt(theta); currCost = gcFunc.ValueAt(theta); log.Info("batch cost: " + currCost); // if(!firstTime){ // if(currCost > lastCost){ // System.out.println("HOW IS FUNCTION VALUE INCREASING????!!! ... still updating theta"); // } // if(Math.abs(currCost - lastCost) < 0.0001){ // System.out.println("function value is not decreasing. stop"); // } // }else{ // firstTime = false; // } lastCost = currCost; ArrayMath.AddMultInPlace(theta, grad, -1 * op.trainOptions.learningRate); } break; } case 3: { // AdaGrad double eps = 1e-3; double currCost = 0; for (int i = 0; i < op.trainOptions.qnIterationsPerBatch; i++) { double[] gradf = gcFunc.DerivativeAt(theta); currCost = gcFunc.ValueAt(theta); log.Info("batch cost: " + currCost); for (int feature = 0; feature < gradf.Length; feature++) { sumGradSquare[feature] = sumGradSquare[feature] + gradf[feature] * gradf[feature]; theta[feature] = theta[feature] - (op.trainOptions.learningRate * gradf[feature] / (System.Math.Sqrt(sumGradSquare[feature]) + eps)); } } break; } default: { throw new ArgumentException("Unsupported minimizer " + Minimizer); } } dvModel.VectorToParams(theta); }