예제 #1
0
        /// <summary>
        /// Run an unconstrained minimization using BFGS
        /// </summary>
        /// <param name="x0">Starting step</param>
        /// <param name="normOfFirstStep">Norm of first step</param>
        /// <param name="func">Multivariate function and derivative evaluator</param>
        /// <returns>The local minimum point</returns>
        public virtual Vector Run(Vector x0, double normOfFirstStep, FunctionEval func)
        {
            dimension = x0.Count;
            currentX  = Vector.Copy(x0);

            if (func == null)
            {
                throw new ArgumentException("Null function");
            }

            cancel = false;

            // Initialize all the fields needed for optimisation
            feval        = new FunctionEval(func);
            searchDir    = Vector.Zero(dimension);
            currentDeriv = Vector.Zero(dimension);

            // Following vectors are the components of the
            // inverse Hessian update
            Vector S                 = Vector.Zero(dimension);
            Vector Y                 = Vector.Zero(dimension);
            Vector minusRhoS         = Vector.Zero(dimension);
            Matrix EyeMinusSYt       = new Matrix(dimension, dimension);
            Vector prevDeriv         = Vector.Zero(dimension);
            PositiveDefiniteMatrix H = new PositiveDefiniteMatrix(dimension, dimension);

            H.SetToIdentity();
            Matrix HWork = new Matrix(dimension, dimension);

            // Get the derivatives
            currentObj = feval(currentX, ref currentDeriv);
            double prevObj = currentObj;
            double rho;
            double step             = 0.0;
            double step0            = initialStep;
            double convergenceCheck = double.MaxValue;
            double invDim           = 1.0 / dimension;

            iter = 0;
            while (convergenceCheck > this.epsilon && !cancel)
            {
                prevDeriv.SetTo(currentDeriv);

                // Compute search direction
                searchDir.SetToProduct(H, currentDeriv);

                // Negate
                for (int i = 0; i < dimension; i++)
                {
                    searchDir[i] = -searchDir[i];
                }

                if (iter == 0)
                {
                    double sdNorm = System.Math.Sqrt(searchDir.Inner(searchDir));
                    double sdMult = normOfFirstStep / sdNorm;
                    searchDir.SetToProduct(searchDir, sdMult);
                    for (int i = 0; i < dimension; i++)
                    {
                        H[i, i] = sdMult;
                    }
                }

                //------------------------------------------------------
                // Line search enforces strong Wolfe conditions
                // so curvature condition is guaranteed
                //------------------------------------------------------
                step = TryLineSearch(step0, stepMax, currentDeriv.Inner(searchDir));

                // Get the delta between the old and new point
                S.SetToProduct(searchDir, step);

                // Update the current point
                currentX.SetToSum(currentX, S);

                // Calculate the objective and the derivative at the new point
                //double objVal = feval(currentX, ref currentDeriv);

                // If the step is 0, break now, once we have re-evaluated the function and derivs.
                if (step == 0.0)
                {
                    break;
                }

                // Difference of derivs
                Y.SetToDifference(currentDeriv, prevDeriv);

                // BFGS update
                rho = 1.0 / (S.Inner(Y));
                if (iter == 0)
                {
                    // Modify the Hessian to yTs/yTy times Identity
                    double beta = S.Inner(S) * rho;
                    for (int i = 0; i < dimension; i++)
                    {
                        H[i, i] = beta;
                    }
                }


                EyeMinusSYt.SetToIdentity();
                EyeMinusSYt.SetToSumWithOuter(EyeMinusSYt, -rho, S, Y);
                HWork.SetToProduct(EyeMinusSYt, H);
                H.SetToProduct(HWork, EyeMinusSYt.Transpose());
                H.SetToSumWithOuter(H, rho, S, S);

                switch (convergenceCriteria)
                {
                case ConvergenceCriteria.Gradient:
                    convergenceCheck = System.Math.Sqrt(invDim * currentDeriv.Inner(currentDeriv));
                    break;

                case ConvergenceCriteria.Objective:
                    convergenceCheck = System.Math.Abs(prevObj - currentObj) / System.Math.Max(1, System.Math.Max(prevObj, currentObj));
                    break;
                }
                prevObj = currentObj;
                // Let anyone who's interested know that we have completed an iteration
                RaiseIterationEvent(iter, currentObj, convergenceCheck);

                if (++iter > maxIterations)
                {
                    break;
                }
            }
            return(currentX);
        }
 /// <summary>
 /// Sets this positive-definite matrix to the sum of two positive-definite matrices.
 /// Assumes compatible matrices
 /// </summary>
 /// <param name="a">First matrix, which must have the same size as <c>this</c>.</param>
 /// <param name="b">Second matrix, which must have the same size as <c>this</c>.</param>
 /// <returns><c>this</c></returns>
 /// <remarks><c>this</c> receives the sum, and must already be the correct size.
 /// <paramref name="a"/> and/or <paramref name="b"/> may be the same object as <c>this</c>.
 /// If <c>this</c> and <paramref name="a"/>/<paramref name="b"/> occupy distinct yet overlapping portions of the same source array, the results are undefined.
 /// </remarks>
 public PositiveDefiniteMatrix SetToSum(PositiveDefiniteMatrix a, PositiveDefiniteMatrix b)
 {
     base.SetToSum(a, b);
     return(this);
 }
 /// <summary>
 /// Constructs an accumulator for matrix observations
 /// </summary>
 /// <param name="rows"></param>
 /// <param name="cols"></param>
 public MatrixMeanVarianceAccumulator(int rows, int cols)
 {
     Mean     = new Matrix(rows, cols);
     Variance = new PositiveDefiniteMatrix(rows, cols);
     diff     = new Matrix(rows, cols);
 }