/// <summary>
        /// Tests the gradient reported by <paramref name="f"/>.
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="dir">Direction to test derivative</param>
        /// <param name="quiet">Whether to disable output</param>
        /// <param name="newGrad">This is a reusable working buffer for intermediate calculations</param>
        /// <param name="newX">This is a reusable working buffer for intermediate calculations</param>
        /// <returns>Normalized difference between analytic and numeric directional derivative</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, ref VBuffer <Float> dir, bool quiet,
                                 ref VBuffer <Float> newGrad, ref VBuffer <Float> newX)
        {
            Float normDir = VectorUtils.Norm(dir);

            Float val      = f(ref x, ref newGrad, null);
            Float dirDeriv = VectorUtils.DotProduct(ref newGrad, ref dir);

            Float scaledEps = Eps / normDir;

            VectorUtils.AddMultInto(ref x, scaledEps, ref dir, ref newX);
            Float rVal = f(ref newX, ref newGrad, null);

            VectorUtils.AddMultInto(ref x, -scaledEps, ref dir, ref newX);
            Float lVal = f(ref newX, ref newGrad, null);

            Float numDeriv = (rVal - lVal) / (2 * scaledEps);

            Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
            Float diff     = numDeriv - dirDeriv;

            if (!quiet)
            {
                Console.WriteLine("{0,-18:0.0000e0}{1,-18:0.0000e0}{2,-15:0.0000e0}{3,0:0.0000e0}", numDeriv, dirDeriv, diff, normDiff);
            }

            return(normDiff);
        }
        /// <summary>
        /// Tests the gradient using finite differences on each axis in the list
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="coords">List of coordinates to test</param>
        public static void TestCoords(DifferentiableFunction f, ref VBuffer <Float> x, IList <int> coords)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           val     = f(ref x, ref grad, null);
            Float           normX   = VectorUtils.Norm(x);

            Console.WriteLine(Header);

            Random r = new Random(5);

            VBuffer <Float> dir = new VBuffer <Float>(x.Length, 1, new Float[] { 1 }, new int[] { 0 });

            foreach (int n in coords)
            {
                dir.Values[0] = n;
                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
            }
        }
Exemplo n.º 3
0
        /// <summary>
        /// Returns true if the norm of the gradient, divided by the value, is less than the tolerance.
        /// </summary>
        /// <param name="state">current state of the optimzer</param>
        /// <param name="message">the current value of the criterion</param>
        /// <returns>true iff criterion is less than the tolerance</returns>
        public override bool Terminate(Optimizer.OptimizerState state, out string message)
        {
            var   grad = state.Grad;
            Float norm = VectorUtils.Norm(grad);
            Float val  = norm / Math.Abs(state.Value);

            message = string.Format("{0,0:0.0000e0}", val);
            return(val < _tol);
        }
Exemplo n.º 4
0
            /// <summary>
            /// Backtracking line search with Armijo-like condition, from Andrew &amp; Gao
            /// </summary>
            internal override bool LineSearch(IChannel ch, bool force)
            {
                Float dirDeriv = -VectorUtils.DotProduct(ref _dir, ref _steepestDescDir);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reason for this is a bug in your function's gradient computation
                // It may also indicate that your function is not convex.
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                GetNextPoint(alpha);
                Float unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);

                if (unnormCos < 0)
                {
                    VBufferUtils.ApplyWith(ref _steepestDescDir, ref _dir,
                                           (int ind, Float sdVal, ref Float dirVal) =>
                    {
                        if (sdVal * dirVal < 0 && ind >= _biasCount)
                        {
                            dirVal = 0;
                        }
                    });

                    GetNextPoint(alpha);
                    unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);
                }

                int i = 0;

                while (true)
                {
                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;

                    if (Value <= LastValue - Gamma * unnormCos)
                    {
                        return(true);
                    }

                    ++i;
                    if (!force && i == MaxLineSearch)
                    {
                        return(false);
                    }

                    alpha *= (Float)0.25;
                    GetNextPoint(alpha);
                    unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);
                }
            }
Exemplo n.º 5
0
        public static void Main(string[] argv)
        {
            RunTest(QuadTest);
            RunTest(LogTest);

            VBuffer <Float> grad  = VBufferUtils.CreateEmpty <Float>(2);
            int             n     = 0;
            bool            print = false;
            DTerminate      term  =
                (ref VBuffer <Float> x) =>
            {
                QuadTest2D(ref x, ref grad);
                Float norm = VectorUtils.Norm(grad);
                if (++n % 1000 == 0 || print)
                {
                    Console.WriteLine("{0}\t{1}", n, norm);
                }
                return(norm < 1e-5);
            };
            SgdOptimizer    sgdo = new SgdOptimizer(term, SgdOptimizer.RateScheduleType.Constant, false, 100, 1, (Float)0.99);
            VBuffer <Float> init;

            CreateWrapped(out init, 0, 0);
            VBuffer <Float> ans = default(VBuffer <Float>);

            sgdo.Minimize(StochasticQuadTest2D, ref init, ref ans);
            QuadTest2D(ref ans, ref grad);
            Console.WriteLine(VectorUtils.Norm(grad));
            Console.WriteLine();
            Console.WriteLine();
            n = 0;
            GDOptimizer gdo = new GDOptimizer(term, null, true);

            print = true;
            CreateWrapped(out init, 0, 0);
            gdo.Minimize(QuadTest2D, ref init, ref ans);
            QuadTest2D(ref ans, ref grad);
            Console.WriteLine(VectorUtils.Norm(grad));
        }
Exemplo n.º 6
0
            /// <summary>
            /// An implementation of the line search for the Wolfe conditions, from Nocedal &amp; Wright
            /// </summary>
            internal virtual bool LineSearch(IChannel ch, bool force)
            {
                Contracts.AssertValue(ch);
                Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reasons for this is a bug in your function's gradient computation,
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float c1 = (Float)1e-4 * dirDeriv;
                Float c2 = (Float)0.9 * dirDeriv;

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv);
                PointValueDeriv aLo  = new PointValueDeriv();
                PointValueDeriv aHi  = new PointValueDeriv();

                // initial bracketing phase
                while (true)
                {
                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (Float.IsPositiveInfinity(Value))
                    {
                        alpha /= 2;
                        continue;
                    }

                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }

                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);
                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V))
                    {
                        aLo = last;
                        aHi = curr;
                        break;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else if (curr.D >= 0)
                    {
                        aLo = curr;
                        aHi = last;
                        break;
                    }

                    last = curr;
                    if (alpha == 0)
                    {
                        alpha = Float.Epsilon; // Robust to divisional underflow.
                    }
                    else
                    {
                        alpha *= 2;
                    }
                }

                Float minChange = (Float)0.01;
                int   maxSteps  = 10;

                // this loop is the "zoom" procedure described in Nocedal & Wright
                for (int step = 0; ; ++step)
                {
                    if (step == maxSteps && !force)
                    {
                        return(false);
                    }

                    PointValueDeriv left  = aLo.A < aHi.A ? aLo : aHi;
                    PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo;
                    if (left.D > 0 && right.D < 0)
                    {
                        // interpolating cubic would have max in range, not min (can this happen?)
                        // set a to the one with smaller value
                        alpha = aLo.V < aHi.V ? aLo.A : aHi.A;
                    }
                    else
                    {
                        alpha = CubicInterp(aLo, aHi);
                        if (Float.IsNaN(alpha) || Float.IsInfinity(alpha))
                        {
                            alpha = (aLo.A + aHi.A) / 2;
                        }
                    }

                    // this is to ensure that the new point is within bounds
                    // and that the change is reasonably sized
                    Float ub = (minChange * left.A + (1 - minChange) * right.A);
                    if (alpha > ub)
                    {
                        alpha = ub;
                    }
                    Float lb = (minChange * right.A + (1 - minChange) * left.A);
                    if (alpha < lb)
                    {
                        alpha = lb;
                    }

                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }
                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);

                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V))
                    {
                        if (aHi.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aHi = curr;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else
                    {
                        if (curr.D * (aHi.A - aLo.A) >= 0)
                        {
                            aHi = aLo;
                        }
                        if (aLo.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aLo = curr;
                    }
                }
            }
        /// <summary>
        /// Tests the gradient reported by f.
        /// </summary>
        /// <param name="f">function to test</param>
        /// <param name="x">point at which to test</param>
        /// <param name="quiet">If false, outputs detailed info.</param>
        /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           normX   = VectorUtils.Norm(x);

            f(ref x, ref grad, null);

            if (!quiet)
            {
                Console.WriteLine(Header);
            }

            Float maxNormDiff = Float.NegativeInfinity;

            int numIters    = Math.Min((int)x.Length, 10);
            int maxDirCount = Math.Min((int)x.Length / 2, 100);

            for (int n = 1; n <= numIters; n++)
            {
                int          dirCount = Math.Min(n * 10, maxDirCount);
                List <int>   indices  = new List <int>(dirCount);
                List <Float> values   = new List <Float>(dirCount);
                for (int i = 0; i < dirCount; i++)
                {
                    int index = _r.Next((int)x.Length);
                    while (indices.IndexOf(index) >= 0)
                    {
                        index = _r.Next((int)x.Length);
                    }
                    indices.Add(index);
                    values.Add(SampleFromGaussian(_r));
                }
                VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray());

                Float norm = VectorUtils.Norm(dir);
                VectorUtils.ScaleBy(ref dir, 1 / norm);

                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                if (!quiet)
                {
                    Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
                }

                maxNormDiff = Math.Max(maxNormDiff, normDiff);
            }

            return(maxNormDiff);
        }