/// <summary> /// Constrained line search (see section 3.2 in the paper "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007) /// </summary> /// <param name="function">The function.</param> /// <param name="direction">The direction.</param> /// <param name="lsr">The line search result.</param> /// <param name="l1Cost">The l1 cost.</param> /// <param name="initialStepSize">Initial size of the step.</param> public static void DoConstrainedLineSearch( IFunction function, double[] direction, LineSearchResult lsr, double l1Cost, double initialStepSize) { var stepSize = initialStepSize; var currFctEvalCount = lsr.FctEvalCount; var x = lsr.NextPoint; var signX = lsr.SignVector; // existing sign vector var gradAtX = lsr.GradAtNext; var pseudoGradAtX = lsr.PseudoGradAtNext; var valueAtX = lsr.ValueAtNext; var dimension = x.Length; // Retrieve current points and gradient for array reuse purpose var nextPoint = lsr.CurrPoint; var gradAtNextPoint = lsr.GradAtCurr; double valueAtNextPoint; // New sign vector for (var i = 0; i < dimension; i++) { signX[i] = x[i].Equals(0d) ? -pseudoGradAtX[i] : x[i]; } while (true) { // Get next point for (var i = 0; i < dimension; i++) { nextPoint[i] = x[i] + direction[i] * stepSize; } // Projection for (var i = 0; i < dimension; i++) { if (nextPoint[i] * signX[i] <= 0) { nextPoint[i] = 0; } } // New value valueAtNextPoint = function.ValueAt(nextPoint) + l1Cost * ArrayMath.L1Norm(nextPoint); currFctEvalCount++; double dirGradientAtX = 0; for (var i = 0; i < dimension; i++) { dirGradientAtX += (nextPoint[i] - x[i]) * pseudoGradAtX[i]; } // Check the sufficient decrease condition if (valueAtNextPoint <= valueAtX + C * dirGradientAtX) { break; } // Shrink step size stepSize *= RHO; } // Compute and save gradient at the new point Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length); // Update line search result lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, pseudoGradAtX, x, nextPoint, signX, currFctEvalCount); }
/// <summary> /// Find the parameters that minimize the objective function. /// </summary> /// <param name="function">The objective function.</param> /// <returns>The minimizing parameters.</returns> /// <exception cref="OperationCanceledException">Occurs when the evaluation monitor cancels the operation.</exception> public double[] Minimize(IFunction function) { var l2RegFunction = new L2RegFunction(function, l2Cost); dimension = l2RegFunction.Dimension; updateInfo = new UpdateInfo(updates, dimension); // Current point is at the origin var currPoint = new double[dimension]; var currValue = l2RegFunction.ValueAt(currPoint); // Gradient at the current point var currGrad = new double[dimension]; Array.Copy(l2RegFunction.GradientAt(currPoint), 0, currGrad, 0, dimension); // Pseudo-gradient - only use when L1-regularization is enabled double[] pseudoGrad = null; if (l1Cost > 0) { currValue += l1Cost * ArrayMath.L1Norm(currPoint); pseudoGrad = new double[dimension]; ComputePseudoGrad(currPoint, currGrad, pseudoGrad); } var lsr = l1Cost > 0 ? LineSearchResult.GetInitialObjectForL1(currValue, currGrad, pseudoGrad, currPoint) : LineSearchResult.GetInitialObject(currValue, currGrad, currPoint); if (monitor != null) { Display("\nSolving convex optimization problem."); Display("\nObjective function has " + dimension + " variable(s)."); Display("\n\nPerforming " + iterations + " iterations with " + "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n"); } var direction = new double[dimension]; var startTime = DateTime.Now; var token = monitor != null ? monitor.Token : CancellationToken.None; // Initial step size for the 1st iteration var initialStepSize = l1Cost > 0 ? ArrayMath.InvL2Norm(lsr.PseudoGradAtNext) : ArrayMath.InvL2Norm(lsr.GradAtNext); for (var iteration = 1; iteration <= iterations; iteration++) { // cancel if requested token.ThrowIfCancellationRequested(); // Find direction Array.Copy(l1Cost > 0 ? lsr.PseudoGradAtNext : lsr.GradAtNext, 0, direction, 0, direction.Length); ComputeDirection(direction); // Line search if (l1Cost > 0) { // Constrain the search direction pseudoGrad = lsr.PseudoGradAtNext; for (var i = 0; i < dimension; i++) { if (direction[i] * pseudoGrad[i] >= 0) { direction[i] = 0; } } LineSearch.DoConstrainedLineSearch(l2RegFunction, direction, lsr, l1Cost, initialStepSize); ComputePseudoGrad(lsr.NextPoint, lsr.GradAtNext, pseudoGrad); lsr.PseudoGradAtNext = pseudoGrad; } else { LineSearch.DoLineSearch(l2RegFunction, direction, lsr, initialStepSize); } // Save Hessian updates updateInfo.Update(lsr); if (monitor != null) { if (iteration < 10) { Display(" " + iteration + ": "); } else if (iteration < 100) { Display(" " + iteration + ": "); } else { Display(iteration + ": "); } if (Evaluator != null) { Display("\t" + lsr.ValueAtNext + "\t" + lsr.FuncChangeRate + "\t" + Evaluator.Evaluate(lsr.NextPoint) + "\n"); } else { Display("\t " + lsr.ValueAtNext + "\t" + lsr.FuncChangeRate + "\n"); } } if (IsConverged(lsr)) { break; } initialStepSize = InitialStepSize; } // Undo L2-shrinkage if Elastic Net is used (since in that case, the shrinkage is done twice) // // Knuppe: The original code makes no sense, so I change the NextPoint value! // // if (l1Cost > 0 && l2Cost > 0) { // double[] x = lsr.getNextPoint(); // for (int i = 0; i < dimension; i++) { // x[i] = Math.sqrt(1 + l2Cost) * x[i]; // } // } if (l1Cost > 0 && l2Cost > 0) { for (var i = 0; i < dimension; i++) { lsr.NextPoint[i] = Math.Sqrt(1 + l2Cost) * lsr.NextPoint[i]; } } if (monitor != null) { var endTime = DateTime.Now; var duration = endTime - startTime; Display("Running time: " + duration.TotalSeconds + "s\n"); } // Release memory updateInfo = null; // Avoid returning the reference to LineSearchResult's member so that GC can // collect memory occupied by lsr after this function completes (is it necessary?) // double[] parameters = new double[dimension]; // System.arraycopy(lsr.getNextPoint(), 0, parameters, 0, dimension); return(lsr.NextPoint); }