/// <summary> /// Tests the gradient reported by <paramref name="f"/>. /// </summary> /// <param name="f">Function to test</param> /// <param name="x">Point at which to test</param> /// <param name="dir">Direction to test derivative</param> /// <param name="quiet">Whether to disable output</param> /// <param name="newGrad">This is a reusable working buffer for intermediate calculations</param> /// <param name="newX">This is a reusable working buffer for intermediate calculations</param> /// <returns>Normalized difference between analytic and numeric directional derivative</returns> public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, ref VBuffer <Float> dir, bool quiet, ref VBuffer <Float> newGrad, ref VBuffer <Float> newX) { Float normDir = VectorUtils.Norm(dir); Float val = f(ref x, ref newGrad, null); Float dirDeriv = VectorUtils.DotProduct(ref newGrad, ref dir); Float scaledEps = Eps / normDir; VectorUtils.AddMultInto(ref x, scaledEps, ref dir, ref newX); Float rVal = f(ref newX, ref newGrad, null); VectorUtils.AddMultInto(ref x, -scaledEps, ref dir, ref newX); Float lVal = f(ref newX, ref newGrad, null); Float numDeriv = (rVal - lVal) / (2 * scaledEps); Float normDiff = Math.Abs(1 - numDeriv / dirDeriv); Float diff = numDeriv - dirDeriv; if (!quiet) { Console.WriteLine("{0,-18:0.0000e0}{1,-18:0.0000e0}{2,-15:0.0000e0}{3,0:0.0000e0}", numDeriv, dirDeriv, diff, normDiff); } return(normDiff); }
/// <summary> /// Tests the gradient using finite differences on each axis in the list /// </summary> /// <param name="f">Function to test</param> /// <param name="x">Point at which to test</param> /// <param name="coords">List of coordinates to test</param> public static void TestCoords(DifferentiableFunction f, ref VBuffer <Float> x, IList <int> coords) { // REVIEW: Delete this method? VBuffer <Float> grad = default(VBuffer <Float>); VBuffer <Float> newGrad = default(VBuffer <Float>); VBuffer <Float> newX = default(VBuffer <Float>); Float val = f(ref x, ref grad, null); Float normX = VectorUtils.Norm(x); Console.WriteLine(Header); Random r = new Random(5); VBuffer <Float> dir = new VBuffer <Float>(x.Length, 1, new Float[] { 1 }, new int[] { 0 }); foreach (int n in coords) { dir.Values[0] = n; VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX); Float rVal = f(ref newX, ref newGrad, null); VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX); Float lVal = f(ref newX, ref newGrad, null); Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir); Float numDeriv = (rVal - lVal) / (2 * Eps); Float normDiff = Math.Abs(1 - numDeriv / dirDeriv); Float diff = numDeriv - dirDeriv; Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff); } }
/// <summary> /// Returns true if the norm of the gradient, divided by the value, is less than the tolerance. /// </summary> /// <param name="state">current state of the optimzer</param> /// <param name="message">the current value of the criterion</param> /// <returns>true iff criterion is less than the tolerance</returns> public override bool Terminate(Optimizer.OptimizerState state, out string message) { var grad = state.Grad; Float norm = VectorUtils.Norm(grad); Float val = norm / Math.Abs(state.Value); message = string.Format("{0,0:0.0000e0}", val); return(val < _tol); }
/// <summary> /// Backtracking line search with Armijo-like condition, from Andrew & Gao /// </summary> internal override bool LineSearch(IChannel ch, bool force) { Float dirDeriv = -VectorUtils.DotProduct(ref _dir, ref _steepestDescDir); if (dirDeriv == 0) { throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum.")); } // if a non-descent direction is chosen, the line search will break anyway, so throw here // The most likely reason for this is a bug in your function's gradient computation // It may also indicate that your function is not convex. ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction."); Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1); GetNextPoint(alpha); Float unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x); if (unnormCos < 0) { VBufferUtils.ApplyWith(ref _steepestDescDir, ref _dir, (int ind, Float sdVal, ref Float dirVal) => { if (sdVal * dirVal < 0 && ind >= _biasCount) { dirVal = 0; } }); GetNextPoint(alpha); unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x); } int i = 0; while (true) { Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (Value <= LastValue - Gamma * unnormCos) { return(true); } ++i; if (!force && i == MaxLineSearch) { return(false); } alpha *= (Float)0.25; GetNextPoint(alpha); unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x); } }
public static void Main(string[] argv) { RunTest(QuadTest); RunTest(LogTest); VBuffer <Float> grad = VBufferUtils.CreateEmpty <Float>(2); int n = 0; bool print = false; DTerminate term = (ref VBuffer <Float> x) => { QuadTest2D(ref x, ref grad); Float norm = VectorUtils.Norm(grad); if (++n % 1000 == 0 || print) { Console.WriteLine("{0}\t{1}", n, norm); } return(norm < 1e-5); }; SgdOptimizer sgdo = new SgdOptimizer(term, SgdOptimizer.RateScheduleType.Constant, false, 100, 1, (Float)0.99); VBuffer <Float> init; CreateWrapped(out init, 0, 0); VBuffer <Float> ans = default(VBuffer <Float>); sgdo.Minimize(StochasticQuadTest2D, ref init, ref ans); QuadTest2D(ref ans, ref grad); Console.WriteLine(VectorUtils.Norm(grad)); Console.WriteLine(); Console.WriteLine(); n = 0; GDOptimizer gdo = new GDOptimizer(term, null, true); print = true; CreateWrapped(out init, 0, 0); gdo.Minimize(QuadTest2D, ref init, ref ans); QuadTest2D(ref ans, ref grad); Console.WriteLine(VectorUtils.Norm(grad)); }
/// <summary> /// An implementation of the line search for the Wolfe conditions, from Nocedal & Wright /// </summary> internal virtual bool LineSearch(IChannel ch, bool force) { Contracts.AssertValue(ch); Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad); if (dirDeriv == 0) { throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum.")); } // if a non-descent direction is chosen, the line search will break anyway, so throw here // The most likely reasons for this is a bug in your function's gradient computation, ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction."); Float c1 = (Float)1e-4 * dirDeriv; Float c2 = (Float)0.9 * dirDeriv; Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1); PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv); PointValueDeriv aLo = new PointValueDeriv(); PointValueDeriv aHi = new PointValueDeriv(); // initial bracketing phase while (true) { VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (Float.IsPositiveInfinity(Value)) { alpha /= 2; continue; } if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V)) { aLo = last; aHi = curr; break; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else if (curr.D >= 0) { aLo = curr; aHi = last; break; } last = curr; if (alpha == 0) { alpha = Float.Epsilon; // Robust to divisional underflow. } else { alpha *= 2; } } Float minChange = (Float)0.01; int maxSteps = 10; // this loop is the "zoom" procedure described in Nocedal & Wright for (int step = 0; ; ++step) { if (step == maxSteps && !force) { return(false); } PointValueDeriv left = aLo.A < aHi.A ? aLo : aHi; PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo; if (left.D > 0 && right.D < 0) { // interpolating cubic would have max in range, not min (can this happen?) // set a to the one with smaller value alpha = aLo.V < aHi.V ? aLo.A : aHi.A; } else { alpha = CubicInterp(aLo, aHi); if (Float.IsNaN(alpha) || Float.IsInfinity(alpha)) { alpha = (aLo.A + aHi.A) / 2; } } // this is to ensure that the new point is within bounds // and that the change is reasonably sized Float ub = (minChange * left.A + (1 - minChange) * right.A); if (alpha > ub) { alpha = ub; } Float lb = (minChange * right.A + (1 - minChange) * left.A); if (alpha < lb) { alpha = lb; } VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V)) { if (aHi.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aHi = curr; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else { if (curr.D * (aHi.A - aLo.A) >= 0) { aHi = aLo; } if (aLo.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aLo = curr; } } }
/// <summary> /// Tests the gradient reported by f. /// </summary> /// <param name="f">function to test</param> /// <param name="x">point at which to test</param> /// <param name="quiet">If false, outputs detailed info.</param> /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns> public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet) { // REVIEW: Delete this method? VBuffer <Float> grad = default(VBuffer <Float>); VBuffer <Float> newGrad = default(VBuffer <Float>); VBuffer <Float> newX = default(VBuffer <Float>); Float normX = VectorUtils.Norm(x); f(ref x, ref grad, null); if (!quiet) { Console.WriteLine(Header); } Float maxNormDiff = Float.NegativeInfinity; int numIters = Math.Min((int)x.Length, 10); int maxDirCount = Math.Min((int)x.Length / 2, 100); for (int n = 1; n <= numIters; n++) { int dirCount = Math.Min(n * 10, maxDirCount); List <int> indices = new List <int>(dirCount); List <Float> values = new List <Float>(dirCount); for (int i = 0; i < dirCount; i++) { int index = _r.Next((int)x.Length); while (indices.IndexOf(index) >= 0) { index = _r.Next((int)x.Length); } indices.Add(index); values.Add(SampleFromGaussian(_r)); } VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray()); Float norm = VectorUtils.Norm(dir); VectorUtils.ScaleBy(ref dir, 1 / norm); VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX); Float rVal = f(ref newX, ref newGrad, null); VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX); Float lVal = f(ref newX, ref newGrad, null); Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir); Float numDeriv = (rVal - lVal) / (2 * Eps); Float normDiff = Math.Abs(1 - numDeriv / dirDeriv); Float diff = numDeriv - dirDeriv; if (!quiet) { Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff); } maxNormDiff = Math.Max(maxNormDiff, normDiff); } return(maxNormDiff); }