Ejemplo n.º 1
0
        private static readonly double RHO = 0.5; // decrease of step size (must be from 0 to 1)

        /// <summary>
        /// Backtracking line search. (see Nocedal &amp; Wright 2006, Numerical Optimization, p. 37)
        /// </summary>
        /// <param name="function">The function.</param>
        /// <param name="direction">The direction.</param>
        /// <param name="lsr">The result.</param>
        /// <param name="initialStepSize">Initial step size.</param>
        public static void DoLineSearch(
            IFunction function,
            double[] direction,
            LineSearchResult lsr,
            double initialStepSize)
        {
            var stepSize         = initialStepSize;
            var currFctEvalCount = lsr.FctEvalCount;
            var x         = lsr.NextPoint;
            var gradAtX   = lsr.GradAtNext;
            var valueAtX  = lsr.ValueAtNext;
            var dimension = x.Length;

            // Retrieve current points and gradient for array reuse purpose
            var    nextPoint       = lsr.CurrPoint;
            var    gradAtNextPoint = lsr.GradAtCurr;
            double valueAtNextPoint;

            var dirGradientAtX = ArrayMath.InnerProduct(direction, gradAtX);

            // To avoid recomputing in the loop
            var cachedProd = C * dirGradientAtX;

            while (true)
            {
                // Get next point
                for (var i = 0; i < dimension; i++)
                {
                    nextPoint[i] = x[i] + direction[i] * stepSize;
                }

                // New value
                valueAtNextPoint = function.ValueAt(nextPoint);

                currFctEvalCount++;

                // Check Armijo condition
                if (valueAtNextPoint <= valueAtX + cachedProd * stepSize)
                {
                    break;
                }



                // Shrink step size
                stepSize *= RHO;
            }

            // Compute and save gradient at the new point
            Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length);

            // Update line search result
            lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, x, nextPoint, currFctEvalCount);
        }
Ejemplo n.º 2
0
        private bool IsConverged(LineSearchResult lsr)
        {
            // Check function's change rate
            if (lsr.FuncChangeRate < ConvergeTolerance)
            {
                if (monitor != null)
                {
                    Display("Function change rate is smaller than the threshold " + ConvergeTolerance + ".\nTraining will stop.\n\n");
                }

                return(true);
            }

            // Check gradient's norm using the criteria: ||g(x)|| / max(1, ||x||) < threshold
            var xNorm    = Math.Max(1, ArrayMath.L2Norm(lsr.NextPoint));
            var gradNorm = l1Cost > 0 ? ArrayMath.L2Norm(lsr.PseudoGradAtNext) : ArrayMath.L2Norm(lsr.GradAtNext);

            if (gradNorm / xNorm < RelGradNormTol)
            {
                if (monitor != null)
                {
                    Display("Relative L2-norm of the gradient is smaller than the threshold "
                            + RelGradNormTol + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            // Check step size
            if (lsr.StepSize < MinStepSize)
            {
                if (monitor != null)
                {
                    Display("Step size is smaller than the minimum step size "
                            + MinStepSize + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            // Check number of function evaluations
            if (lsr.FctEvalCount > maxFctEval)
            {
                if (monitor != null)
                {
                    Display("Maximum number of function evaluations has exceeded the threshold "
                            + maxFctEval + ".\nTraining will stop.\n\n");
                }
                return(true);
            }

            return(false);
        }
Ejemplo n.º 3
0
        /// <summary>
        /// Constrained line search (see section 3.2 in the paper "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007)
        /// </summary>
        /// <param name="function">The function.</param>
        /// <param name="direction">The direction.</param>
        /// <param name="lsr">The line search result.</param>
        /// <param name="l1Cost">The l1 cost.</param>
        /// <param name="initialStepSize">Initial size of the step.</param>
        public static void DoConstrainedLineSearch(
            IFunction function,
            double[] direction,
            LineSearchResult lsr,
            double l1Cost,
            double initialStepSize)
        {
            var stepSize         = initialStepSize;
            var currFctEvalCount = lsr.FctEvalCount;
            var x             = lsr.NextPoint;
            var signX         = lsr.SignVector; // existing sign vector
            var gradAtX       = lsr.GradAtNext;
            var pseudoGradAtX = lsr.PseudoGradAtNext;
            var valueAtX      = lsr.ValueAtNext;
            var dimension     = x.Length;

            // Retrieve current points and gradient for array reuse purpose
            var    nextPoint       = lsr.CurrPoint;
            var    gradAtNextPoint = lsr.GradAtCurr;
            double valueAtNextPoint;

            // New sign vector
            for (var i = 0; i < dimension; i++)
            {
                signX[i] = x[i].Equals(0d) ? -pseudoGradAtX[i] : x[i];
            }

            while (true)
            {
                // Get next point
                for (var i = 0; i < dimension; i++)
                {
                    nextPoint[i] = x[i] + direction[i] * stepSize;
                }

                // Projection
                for (var i = 0; i < dimension; i++)
                {
                    if (nextPoint[i] * signX[i] <= 0)
                    {
                        nextPoint[i] = 0;
                    }
                }

                // New value
                valueAtNextPoint = function.ValueAt(nextPoint) + l1Cost * ArrayMath.L1Norm(nextPoint);

                currFctEvalCount++;

                double dirGradientAtX = 0;
                for (var i = 0; i < dimension; i++)
                {
                    dirGradientAtX += (nextPoint[i] - x[i]) * pseudoGradAtX[i];
                }

                // Check the sufficient decrease condition
                if (valueAtNextPoint <= valueAtX + C * dirGradientAtX)
                {
                    break;
                }

                // Shrink step size
                stepSize *= RHO;
            }

            // Compute and save gradient at the new point
            Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length);

            // Update line search result
            lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, pseudoGradAtX, x, nextPoint,
                       signX, currFctEvalCount);
        }
Ejemplo n.º 4
0
        /// <summary>
        /// Find the parameters that minimize the objective function.
        /// </summary>
        /// <param name="function">The objective function.</param>
        /// <returns>The minimizing parameters.</returns>
        /// <exception cref="OperationCanceledException">Occurs when the evaluation monitor cancels the operation.</exception>
        public double[] Minimize(IFunction function)
        {
            var l2RegFunction = new L2RegFunction(function, l2Cost);

            dimension  = l2RegFunction.Dimension;
            updateInfo = new UpdateInfo(updates, dimension);

            // Current point is at the origin
            var currPoint = new double[dimension];
            var currValue = l2RegFunction.ValueAt(currPoint);

            // Gradient at the current point
            var currGrad = new double[dimension];

            Array.Copy(l2RegFunction.GradientAt(currPoint), 0, currGrad, 0, dimension);

            // Pseudo-gradient - only use when L1-regularization is enabled
            double[] pseudoGrad = null;
            if (l1Cost > 0)
            {
                currValue += l1Cost * ArrayMath.L1Norm(currPoint);
                pseudoGrad = new double[dimension];
                ComputePseudoGrad(currPoint, currGrad, pseudoGrad);
            }

            var lsr = l1Cost > 0
                ? LineSearchResult.GetInitialObjectForL1(currValue, currGrad, pseudoGrad, currPoint)
                : LineSearchResult.GetInitialObject(currValue, currGrad, currPoint);

            if (monitor != null)
            {
                Display("\nSolving convex optimization problem.");
                Display("\nObjective function has " + dimension + " variable(s).");
                Display("\n\nPerforming " + iterations + " iterations with " + "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n");
            }

            var direction = new double[dimension];
            var startTime = DateTime.Now;
            var token     = monitor != null ? monitor.Token : CancellationToken.None;


            // Initial step size for the 1st iteration
            var initialStepSize = l1Cost > 0
                ? ArrayMath.InvL2Norm(lsr.PseudoGradAtNext)
                : ArrayMath.InvL2Norm(lsr.GradAtNext);

            for (var iteration = 1; iteration <= iterations; iteration++)
            {
                // cancel if requested
                token.ThrowIfCancellationRequested();

                // Find direction
                Array.Copy(l1Cost > 0
                    ? lsr.PseudoGradAtNext
                    : lsr.GradAtNext, 0, direction, 0, direction.Length);

                ComputeDirection(direction);

                // Line search
                if (l1Cost > 0)
                {
                    // Constrain the search direction
                    pseudoGrad = lsr.PseudoGradAtNext;

                    for (var i = 0; i < dimension; i++)
                    {
                        if (direction[i] * pseudoGrad[i] >= 0)
                        {
                            direction[i] = 0;
                        }
                    }

                    LineSearch.DoConstrainedLineSearch(l2RegFunction, direction, lsr, l1Cost, initialStepSize);

                    ComputePseudoGrad(lsr.NextPoint, lsr.GradAtNext, pseudoGrad);

                    lsr.PseudoGradAtNext = pseudoGrad;
                }
                else
                {
                    LineSearch.DoLineSearch(l2RegFunction, direction, lsr, initialStepSize);
                }

                // Save Hessian updates
                updateInfo.Update(lsr);

                if (monitor != null)
                {
                    if (iteration < 10)
                    {
                        Display("  " + iteration + ":  ");
                    }
                    else if (iteration < 100)
                    {
                        Display(" " + iteration + ":  ");
                    }
                    else
                    {
                        Display(iteration + ":  ");
                    }

                    if (Evaluator != null)
                    {
                        Display("\t" + lsr.ValueAtNext
                                + "\t" + lsr.FuncChangeRate
                                + "\t" + Evaluator.Evaluate(lsr.NextPoint) + "\n");
                    }
                    else
                    {
                        Display("\t " + lsr.ValueAtNext +
                                "\t" + lsr.FuncChangeRate + "\n");
                    }
                }

                if (IsConverged(lsr))
                {
                    break;
                }

                initialStepSize = InitialStepSize;
            }

            // Undo L2-shrinkage if Elastic Net is used (since in that case, the shrinkage is done twice)
            //
            // Knuppe: The original code makes no sense, so I change the NextPoint value!
            //
            // if (l1Cost > 0 && l2Cost > 0) {
            //     double[] x = lsr.getNextPoint();
            //     for (int i = 0; i < dimension; i++) {
            //         x[i] = Math.sqrt(1 + l2Cost) * x[i];
            //     }
            // }

            if (l1Cost > 0 && l2Cost > 0)
            {
                for (var i = 0; i < dimension; i++)
                {
                    lsr.NextPoint[i] = Math.Sqrt(1 + l2Cost) * lsr.NextPoint[i];
                }
            }

            if (monitor != null)
            {
                var endTime  = DateTime.Now;
                var duration = endTime - startTime;

                Display("Running time: " + duration.TotalSeconds + "s\n");
            }


            // Release memory
            updateInfo = null;

            // Avoid returning the reference to LineSearchResult's member so that GC can
            // collect memory occupied by lsr after this function completes (is it necessary?)
            // double[] parameters = new double[dimension];
            // System.arraycopy(lsr.getNextPoint(), 0, parameters, 0, dimension);

            return(lsr.NextPoint);
        }