/// <summary>
        /// Tests the gradient reported by <paramref name="f"/>.
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="dir">Direction to test derivative</param>
        /// <param name="quiet">Whether to disable output</param>
        /// <param name="newGrad">This is a reusable working buffer for intermediate calculations</param>
        /// <param name="newX">This is a reusable working buffer for intermediate calculations</param>
        /// <returns>Normalized difference between analytic and numeric directional derivative</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, ref VBuffer <Float> dir, bool quiet,
                                 ref VBuffer <Float> newGrad, ref VBuffer <Float> newX)
        {
            Float normDir = VectorUtils.Norm(dir);

            Float val      = f(ref x, ref newGrad, null);
            Float dirDeriv = VectorUtils.DotProduct(ref newGrad, ref dir);

            Float scaledEps = Eps / normDir;

            VectorUtils.AddMultInto(ref x, scaledEps, ref dir, ref newX);
            Float rVal = f(ref newX, ref newGrad, null);

            VectorUtils.AddMultInto(ref x, -scaledEps, ref dir, ref newX);
            Float lVal = f(ref newX, ref newGrad, null);

            Float numDeriv = (rVal - lVal) / (2 * scaledEps);

            Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
            Float diff     = numDeriv - dirDeriv;

            if (!quiet)
            {
                Console.WriteLine("{0,-18:0.0000e0}{1,-18:0.0000e0}{2,-15:0.0000e0}{3,0:0.0000e0}", numDeriv, dirDeriv, diff, normDiff);
            }

            return(normDiff);
        }
        /// <summary>
        /// Tests the gradient using finite differences on each axis in the list
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="coords">List of coordinates to test</param>
        public static void TestCoords(DifferentiableFunction f, ref VBuffer <Float> x, IList <int> coords)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           val     = f(ref x, ref grad, null);
            Float           normX   = VectorUtils.Norm(x);

            Console.WriteLine(Header);

            Random r = new Random(5);

            VBuffer <Float> dir = new VBuffer <Float>(x.Length, 1, new Float[] { 1 }, new int[] { 0 });

            foreach (int n in coords)
            {
                dir.Values[0] = n;
                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
            }
        }
Пример #3
0
 public Float Eval(Float step, out Float deriv)
 {
     VectorUtils.AddMultInto(ref _point, step, ref _dir, ref _newPoint);
     _newValue = _func(ref _newPoint, ref _newGrad, null);
     deriv     = VectorUtils.DotProduct(ref _dir, ref _newGrad);
     return(_newValue);
 }
Пример #4
0
            /// <summary>
            /// Backtracking line search with Armijo-like condition, from Andrew &amp; Gao
            /// </summary>
            internal override bool LineSearch(IChannel ch, bool force)
            {
                Float dirDeriv = -VectorUtils.DotProduct(ref _dir, ref _steepestDescDir);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reason for this is a bug in your function's gradient computation
                // It may also indicate that your function is not convex.
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                GetNextPoint(alpha);
                Float unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);

                if (unnormCos < 0)
                {
                    VBufferUtils.ApplyWith(ref _steepestDescDir, ref _dir,
                                           (int ind, Float sdVal, ref Float dirVal) =>
                    {
                        if (sdVal * dirVal < 0 && ind >= _biasCount)
                        {
                            dirVal = 0;
                        }
                    });

                    GetNextPoint(alpha);
                    unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);
                }

                int i = 0;

                while (true)
                {
                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;

                    if (Value <= LastValue - Gamma * unnormCos)
                    {
                        return(true);
                    }

                    ++i;
                    if (!force && i == MaxLineSearch)
                    {
                        return(false);
                    }

                    alpha *= (Float)0.25;
                    GetNextPoint(alpha);
                    unnormCos = VectorUtils.DotProduct(ref _steepestDescDir, ref _newX) - VectorUtils.DotProduct(ref _steepestDescDir, ref _x);
                }
            }
Пример #5
0
            internal void Shift()
            {
                if (_roList.Count < _m)
                {
                    if (_totalMemLimit > 0)
                    {
                        long totalMem = GC.GetTotalMemory(true);
                        if (totalMem > _totalMemLimit)
                        {
                            _m = _roList.Count;
                        }
                    }
                }

                VBuffer <Float> nextS;
                VBuffer <Float> nextY;

                if (_roList.Count == _m)
                {
                    // REVIEW: Goofy. Instead somehow consider the array
                    // "circular" in some sense.
                    nextS = _sList[0];
                    Array.Copy(_sList, 1, _sList, 0, _m - 1);
                    nextY = _yList[0];
                    Array.Copy(_yList, 1, _yList, 0, _m - 1);
                    _roList.RemoveAt(0);
                }
                else
                {
                    nextS = CreateWorkingVector();
                    nextY = CreateWorkingVector();
                }

                VectorUtils.AddMultInto(ref _newX, -1, ref _x, ref nextS);
                VectorUtils.AddMultInto(ref _newGrad, -1, ref _grad, ref nextY);
                Float ro = VectorUtils.DotProduct(ref nextS, ref nextY);

                if (ro == 0)
                {
                    throw Ch.Process(new PrematureConvergenceException(this, "ro equals zero. Is your function linear?"));
                }

                _sList[_roList.Count] = nextS;
                _yList[_roList.Count] = nextY;
                _roList.Add(ro);

                var temp = LastValue;

                LastValue = Value;
                Value     = temp;
                Utils.Swap(ref _x, ref _newX);
                Utils.Swap(ref _grad, ref _newGrad);

                Iter++;
                GradientCalculations = 0;
            }
Пример #6
0
        private static Float QuadTest2D(ref VBuffer <Float> x, ref VBuffer <Float> grad, IProgressChannelProvider progress = null)
        {
            Float d1 = VectorUtils.DotProduct(ref x, ref _c1);
            Float d2 = VectorUtils.DotProduct(ref x, ref _c2);
            Float d3 = VectorUtils.DotProduct(ref x, ref _c3);

            _c3.CopyTo(ref grad);
            VectorUtils.AddMult(ref _c1, d1, ref grad);
            VectorUtils.AddMult(ref _c2, d2, ref grad);
            return((Float)0.5 * (d1 * d1 + d2 * d2) + d3 + 55);
        }
Пример #7
0
            internal void MapDirByInverseHessian()
            {
                int count = _roList.Count;

                if (count != 0)
                {
                    Float[] alphas = new Float[count];

                    int lastGoodRo = -1;

                    for (int i = count - 1; i >= 0; i--)
                    {
                        if (_roList[i] > 0)
                        {
                            alphas[i] = -VectorUtils.DotProduct(ref _sList[i], ref _dir) / _roList[i];
                            VectorUtils.AddMult(ref _yList[i], alphas[i], ref _dir);
                            if (lastGoodRo == -1)
                            {
                                lastGoodRo = i;
                            }
                        }
                    }

                    // if we have no positive ros, dir doesn't change
                    if (lastGoodRo == -1)
                    {
                        return;
                    }

                    Float yDotY = VectorUtils.DotProduct(ref _yList[lastGoodRo], ref _yList[lastGoodRo]);
                    VectorUtils.ScaleBy(ref _dir, _roList[lastGoodRo] / yDotY);

                    for (int i = 0; i <= lastGoodRo; i++)
                    {
                        if (_roList[i] > 0)
                        {
                            Float beta = VectorUtils.DotProduct(ref _yList[i], ref _dir) / _roList[i];
                            VectorUtils.AddMult(ref _sList[i], -alphas[i] - beta, ref _dir);
                        }
                    }
                }
            }
Пример #8
0
 public void ChangeDir()
 {
     if (_useCG)
     {
         Float newByNew = VectorUtils.NormSquared(_newGrad);
         Float newByOld = VectorUtils.DotProduct(ref _newGrad, ref _grad);
         Float oldByOld = VectorUtils.NormSquared(_grad);
         Float betaPR   = (newByNew - newByOld) / oldByOld;
         Float beta     = Math.Max(0, betaPR);
         VectorUtils.ScaleBy(ref _dir, beta);
         VectorUtils.AddMult(ref _newGrad, -1, ref _dir);
     }
     else
     {
         VectorUtils.ScaleInto(ref _newGrad, -1, ref _dir);
     }
     _newPoint.CopyTo(ref _point);
     _newGrad.CopyTo(ref _grad);
     _value = _newValue;
 }
Пример #9
0
            /// <summary>
            /// An implementation of the line search for the Wolfe conditions, from Nocedal &amp; Wright
            /// </summary>
            internal virtual bool LineSearch(IChannel ch, bool force)
            {
                Contracts.AssertValue(ch);
                Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reasons for this is a bug in your function's gradient computation,
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float c1 = (Float)1e-4 * dirDeriv;
                Float c2 = (Float)0.9 * dirDeriv;

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv);
                PointValueDeriv aLo  = new PointValueDeriv();
                PointValueDeriv aHi  = new PointValueDeriv();

                // initial bracketing phase
                while (true)
                {
                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (Float.IsPositiveInfinity(Value))
                    {
                        alpha /= 2;
                        continue;
                    }

                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }

                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);
                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V))
                    {
                        aLo = last;
                        aHi = curr;
                        break;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else if (curr.D >= 0)
                    {
                        aLo = curr;
                        aHi = last;
                        break;
                    }

                    last = curr;
                    if (alpha == 0)
                    {
                        alpha = Float.Epsilon; // Robust to divisional underflow.
                    }
                    else
                    {
                        alpha *= 2;
                    }
                }

                Float minChange = (Float)0.01;
                int   maxSteps  = 10;

                // this loop is the "zoom" procedure described in Nocedal & Wright
                for (int step = 0; ; ++step)
                {
                    if (step == maxSteps && !force)
                    {
                        return(false);
                    }

                    PointValueDeriv left  = aLo.A < aHi.A ? aLo : aHi;
                    PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo;
                    if (left.D > 0 && right.D < 0)
                    {
                        // interpolating cubic would have max in range, not min (can this happen?)
                        // set a to the one with smaller value
                        alpha = aLo.V < aHi.V ? aLo.A : aHi.A;
                    }
                    else
                    {
                        alpha = CubicInterp(aLo, aHi);
                        if (Float.IsNaN(alpha) || Float.IsInfinity(alpha))
                        {
                            alpha = (aLo.A + aHi.A) / 2;
                        }
                    }

                    // this is to ensure that the new point is within bounds
                    // and that the change is reasonably sized
                    Float ub = (minChange * left.A + (1 - minChange) * right.A);
                    if (alpha > ub)
                    {
                        alpha = ub;
                    }
                    Float lb = (minChange * right.A + (1 - minChange) * left.A);
                    if (alpha < lb)
                    {
                        alpha = lb;
                    }

                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }
                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);

                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V))
                    {
                        if (aHi.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aHi = curr;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else
                    {
                        if (curr.D * (aHi.A - aLo.A) >= 0)
                        {
                            aHi = aLo;
                        }
                        if (aLo.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aLo = curr;
                    }
                }
            }
        /// <summary>
        /// Tests the gradient reported by f.
        /// </summary>
        /// <param name="f">function to test</param>
        /// <param name="x">point at which to test</param>
        /// <param name="quiet">If false, outputs detailed info.</param>
        /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           normX   = VectorUtils.Norm(x);

            f(ref x, ref grad, null);

            if (!quiet)
            {
                Console.WriteLine(Header);
            }

            Float maxNormDiff = Float.NegativeInfinity;

            int numIters    = Math.Min((int)x.Length, 10);
            int maxDirCount = Math.Min((int)x.Length / 2, 100);

            for (int n = 1; n <= numIters; n++)
            {
                int          dirCount = Math.Min(n * 10, maxDirCount);
                List <int>   indices  = new List <int>(dirCount);
                List <Float> values   = new List <Float>(dirCount);
                for (int i = 0; i < dirCount; i++)
                {
                    int index = _r.Next((int)x.Length);
                    while (indices.IndexOf(index) >= 0)
                    {
                        index = _r.Next((int)x.Length);
                    }
                    indices.Add(index);
                    values.Add(SampleFromGaussian(_r));
                }
                VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray());

                Float norm = VectorUtils.Norm(dir);
                VectorUtils.ScaleBy(ref dir, 1 / norm);

                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                if (!quiet)
                {
                    Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
                }

                maxNormDiff = Math.Max(maxNormDiff, normDiff);
            }

            return(maxNormDiff);
        }