Пример #1
0
        /// <summary>
        /// Constrained line search (see section 3.2 in the paper "Scalable Training of L1-Regularized Log-Linear Models", Andrew et al. 2007)
        /// </summary>
        /// <param name="function">The function.</param>
        /// <param name="direction">The direction.</param>
        /// <param name="lsr">The line search result.</param>
        /// <param name="l1Cost">The l1 cost.</param>
        /// <param name="initialStepSize">Initial size of the step.</param>
        public static void DoConstrainedLineSearch(
            IFunction function,
            double[] direction,
            LineSearchResult lsr,
            double l1Cost,
            double initialStepSize)
        {
            var stepSize         = initialStepSize;
            var currFctEvalCount = lsr.FctEvalCount;
            var x             = lsr.NextPoint;
            var signX         = lsr.SignVector; // existing sign vector
            var gradAtX       = lsr.GradAtNext;
            var pseudoGradAtX = lsr.PseudoGradAtNext;
            var valueAtX      = lsr.ValueAtNext;
            var dimension     = x.Length;

            // Retrieve current points and gradient for array reuse purpose
            var    nextPoint       = lsr.CurrPoint;
            var    gradAtNextPoint = lsr.GradAtCurr;
            double valueAtNextPoint;

            // New sign vector
            for (var i = 0; i < dimension; i++)
            {
                signX[i] = x[i].Equals(0d) ? -pseudoGradAtX[i] : x[i];
            }

            while (true)
            {
                // Get next point
                for (var i = 0; i < dimension; i++)
                {
                    nextPoint[i] = x[i] + direction[i] * stepSize;
                }

                // Projection
                for (var i = 0; i < dimension; i++)
                {
                    if (nextPoint[i] * signX[i] <= 0)
                    {
                        nextPoint[i] = 0;
                    }
                }

                // New value
                valueAtNextPoint = function.ValueAt(nextPoint) + l1Cost * ArrayMath.L1Norm(nextPoint);

                currFctEvalCount++;

                double dirGradientAtX = 0;
                for (var i = 0; i < dimension; i++)
                {
                    dirGradientAtX += (nextPoint[i] - x[i]) * pseudoGradAtX[i];
                }

                // Check the sufficient decrease condition
                if (valueAtNextPoint <= valueAtX + C * dirGradientAtX)
                {
                    break;
                }

                // Shrink step size
                stepSize *= RHO;
            }

            // Compute and save gradient at the new point
            Array.Copy(function.GradientAt(nextPoint), 0, gradAtNextPoint, 0, gradAtNextPoint.Length);

            // Update line search result
            lsr.SetAll(stepSize, valueAtX, valueAtNextPoint, gradAtX, gradAtNextPoint, pseudoGradAtX, x, nextPoint,
                       signX, currFctEvalCount);
        }
Пример #2
0
        /// <summary>
        /// Find the parameters that minimize the objective function.
        /// </summary>
        /// <param name="function">The objective function.</param>
        /// <returns>The minimizing parameters.</returns>
        /// <exception cref="OperationCanceledException">Occurs when the evaluation monitor cancels the operation.</exception>
        public double[] Minimize(IFunction function)
        {
            var l2RegFunction = new L2RegFunction(function, l2Cost);

            dimension  = l2RegFunction.Dimension;
            updateInfo = new UpdateInfo(updates, dimension);

            // Current point is at the origin
            var currPoint = new double[dimension];
            var currValue = l2RegFunction.ValueAt(currPoint);

            // Gradient at the current point
            var currGrad = new double[dimension];

            Array.Copy(l2RegFunction.GradientAt(currPoint), 0, currGrad, 0, dimension);

            // Pseudo-gradient - only use when L1-regularization is enabled
            double[] pseudoGrad = null;
            if (l1Cost > 0)
            {
                currValue += l1Cost * ArrayMath.L1Norm(currPoint);
                pseudoGrad = new double[dimension];
                ComputePseudoGrad(currPoint, currGrad, pseudoGrad);
            }

            var lsr = l1Cost > 0
                ? LineSearchResult.GetInitialObjectForL1(currValue, currGrad, pseudoGrad, currPoint)
                : LineSearchResult.GetInitialObject(currValue, currGrad, currPoint);

            if (monitor != null)
            {
                Display("\nSolving convex optimization problem.");
                Display("\nObjective function has " + dimension + " variable(s).");
                Display("\n\nPerforming " + iterations + " iterations with " + "L1Cost=" + l1Cost + " and L2Cost=" + l2Cost + "\n");
            }

            var direction = new double[dimension];
            var startTime = DateTime.Now;
            var token     = monitor != null ? monitor.Token : CancellationToken.None;


            // Initial step size for the 1st iteration
            var initialStepSize = l1Cost > 0
                ? ArrayMath.InvL2Norm(lsr.PseudoGradAtNext)
                : ArrayMath.InvL2Norm(lsr.GradAtNext);

            for (var iteration = 1; iteration <= iterations; iteration++)
            {
                // cancel if requested
                token.ThrowIfCancellationRequested();

                // Find direction
                Array.Copy(l1Cost > 0
                    ? lsr.PseudoGradAtNext
                    : lsr.GradAtNext, 0, direction, 0, direction.Length);

                ComputeDirection(direction);

                // Line search
                if (l1Cost > 0)
                {
                    // Constrain the search direction
                    pseudoGrad = lsr.PseudoGradAtNext;

                    for (var i = 0; i < dimension; i++)
                    {
                        if (direction[i] * pseudoGrad[i] >= 0)
                        {
                            direction[i] = 0;
                        }
                    }

                    LineSearch.DoConstrainedLineSearch(l2RegFunction, direction, lsr, l1Cost, initialStepSize);

                    ComputePseudoGrad(lsr.NextPoint, lsr.GradAtNext, pseudoGrad);

                    lsr.PseudoGradAtNext = pseudoGrad;
                }
                else
                {
                    LineSearch.DoLineSearch(l2RegFunction, direction, lsr, initialStepSize);
                }

                // Save Hessian updates
                updateInfo.Update(lsr);

                if (monitor != null)
                {
                    if (iteration < 10)
                    {
                        Display("  " + iteration + ":  ");
                    }
                    else if (iteration < 100)
                    {
                        Display(" " + iteration + ":  ");
                    }
                    else
                    {
                        Display(iteration + ":  ");
                    }

                    if (Evaluator != null)
                    {
                        Display("\t" + lsr.ValueAtNext
                                + "\t" + lsr.FuncChangeRate
                                + "\t" + Evaluator.Evaluate(lsr.NextPoint) + "\n");
                    }
                    else
                    {
                        Display("\t " + lsr.ValueAtNext +
                                "\t" + lsr.FuncChangeRate + "\n");
                    }
                }

                if (IsConverged(lsr))
                {
                    break;
                }

                initialStepSize = InitialStepSize;
            }

            // Undo L2-shrinkage if Elastic Net is used (since in that case, the shrinkage is done twice)
            //
            // Knuppe: The original code makes no sense, so I change the NextPoint value!
            //
            // if (l1Cost > 0 && l2Cost > 0) {
            //     double[] x = lsr.getNextPoint();
            //     for (int i = 0; i < dimension; i++) {
            //         x[i] = Math.sqrt(1 + l2Cost) * x[i];
            //     }
            // }

            if (l1Cost > 0 && l2Cost > 0)
            {
                for (var i = 0; i < dimension; i++)
                {
                    lsr.NextPoint[i] = Math.Sqrt(1 + l2Cost) * lsr.NextPoint[i];
                }
            }

            if (monitor != null)
            {
                var endTime  = DateTime.Now;
                var duration = endTime - startTime;

                Display("Running time: " + duration.TotalSeconds + "s\n");
            }


            // Release memory
            updateInfo = null;

            // Avoid returning the reference to LineSearchResult's member so that GC can
            // collect memory occupied by lsr after this function completes (is it necessary?)
            // double[] parameters = new double[dimension];
            // System.arraycopy(lsr.getNextPoint(), 0, parameters, 0, dimension);

            return(lsr.NextPoint);
        }