/// <summary> /// An implementation of the line search for the Wolfe conditions, from Nocedal & Wright /// </summary> internal virtual bool LineSearch(IChannel ch, bool force) { Contracts.AssertValue(ch); Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad); if (dirDeriv == 0) { throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum.")); } // if a non-descent direction is chosen, the line search will break anyway, so throw here // The most likely reasons for this is a bug in your function's gradient computation, ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction."); Float c1 = (Float)1e-4 * dirDeriv; Float c2 = (Float)0.9 * dirDeriv; Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1); PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv); PointValueDeriv aLo = new PointValueDeriv(); PointValueDeriv aHi = new PointValueDeriv(); // initial bracketing phase while (true) { VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (Float.IsPositiveInfinity(Value)) { alpha /= 2; continue; } if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V)) { aLo = last; aHi = curr; break; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else if (curr.D >= 0) { aLo = curr; aHi = last; break; } last = curr; if (alpha == 0) { alpha = Float.Epsilon; // Robust to divisional underflow. } else { alpha *= 2; } } Float minChange = (Float)0.01; int maxSteps = 10; // this loop is the "zoom" procedure described in Nocedal & Wright for (int step = 0; ; ++step) { if (step == maxSteps && !force) { return(false); } PointValueDeriv left = aLo.A < aHi.A ? aLo : aHi; PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo; if (left.D > 0 && right.D < 0) { // interpolating cubic would have max in range, not min (can this happen?) // set a to the one with smaller value alpha = aLo.V < aHi.V ? aLo.A : aHi.A; } else { alpha = CubicInterp(aLo, aHi); if (Float.IsNaN(alpha) || Float.IsInfinity(alpha)) { alpha = (aLo.A + aHi.A) / 2; } } // this is to ensure that the new point is within bounds // and that the change is reasonably sized Float ub = (minChange * left.A + (1 - minChange) * right.A); if (alpha > ub) { alpha = ub; } Float lb = (minChange * right.A + (1 - minChange) * left.A); if (alpha < lb) { alpha = lb; } VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX); if (EnforceNonNegativity) { VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval) { if (newXval < 0.0) { newXval = 0; } }); } Value = Eval(ref _newX, ref _newGrad); GradientCalculations++; if (!FloatUtils.IsFinite(Value)) { throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value); } dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad); PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv); if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V)) { if (aHi.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aHi = curr; } else if (Math.Abs(curr.D) <= -c2) { return(true); } else { if (curr.D * (aHi.A - aLo.A) >= 0) { aHi = aLo; } if (aLo.A == curr.A) { if (force) { throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero.")); } else { return(false); } } aLo = curr; } } }
/// <summary> /// Tests the gradient reported by f. /// </summary> /// <param name="f">function to test</param> /// <param name="x">point at which to test</param> /// <param name="quiet">If false, outputs detailed info.</param> /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns> public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet) { // REVIEW: Delete this method? VBuffer <Float> grad = default(VBuffer <Float>); VBuffer <Float> newGrad = default(VBuffer <Float>); VBuffer <Float> newX = default(VBuffer <Float>); Float normX = VectorUtils.Norm(x); f(ref x, ref grad, null); if (!quiet) { Console.WriteLine(Header); } Float maxNormDiff = Float.NegativeInfinity; int numIters = Math.Min((int)x.Length, 10); int maxDirCount = Math.Min((int)x.Length / 2, 100); for (int n = 1; n <= numIters; n++) { int dirCount = Math.Min(n * 10, maxDirCount); List <int> indices = new List <int>(dirCount); List <Float> values = new List <Float>(dirCount); for (int i = 0; i < dirCount; i++) { int index = _r.Next((int)x.Length); while (indices.IndexOf(index) >= 0) { index = _r.Next((int)x.Length); } indices.Add(index); values.Add(SampleFromGaussian(_r)); } VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray()); Float norm = VectorUtils.Norm(dir); VectorUtils.ScaleBy(ref dir, 1 / norm); VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX); Float rVal = f(ref newX, ref newGrad, null); VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX); Float lVal = f(ref newX, ref newGrad, null); Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir); Float numDeriv = (rVal - lVal) / (2 * Eps); Float normDiff = Math.Abs(1 - numDeriv / dirDeriv); Float diff = numDeriv - dirDeriv; if (!quiet) { Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff); } maxNormDiff = Math.Max(maxNormDiff, normDiff); } return(maxNormDiff); }