コード例 #1
0
        /// <summary>
        /// Tests the gradient reported by <paramref name="f"/>.
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="dir">Direction to test derivative</param>
        /// <param name="quiet">Whether to disable output</param>
        /// <param name="newGrad">This is a reusable working buffer for intermediate calculations</param>
        /// <param name="newX">This is a reusable working buffer for intermediate calculations</param>
        /// <returns>Normalized difference between analytic and numeric directional derivative</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, ref VBuffer <Float> dir, bool quiet,
                                 ref VBuffer <Float> newGrad, ref VBuffer <Float> newX)
        {
            Float normDir = VectorUtils.Norm(dir);

            Float val      = f(ref x, ref newGrad, null);
            Float dirDeriv = VectorUtils.DotProduct(ref newGrad, ref dir);

            Float scaledEps = Eps / normDir;

            VectorUtils.AddMultInto(ref x, scaledEps, ref dir, ref newX);
            Float rVal = f(ref newX, ref newGrad, null);

            VectorUtils.AddMultInto(ref x, -scaledEps, ref dir, ref newX);
            Float lVal = f(ref newX, ref newGrad, null);

            Float numDeriv = (rVal - lVal) / (2 * scaledEps);

            Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
            Float diff     = numDeriv - dirDeriv;

            if (!quiet)
            {
                Console.WriteLine("{0,-18:0.0000e0}{1,-18:0.0000e0}{2,-15:0.0000e0}{3,0:0.0000e0}", numDeriv, dirDeriv, diff, normDiff);
            }

            return(normDiff);
        }
コード例 #2
0
        /// <summary>
        /// Tests the gradient using finite differences on each axis in the list
        /// </summary>
        /// <param name="f">Function to test</param>
        /// <param name="x">Point at which to test</param>
        /// <param name="coords">List of coordinates to test</param>
        public static void TestCoords(DifferentiableFunction f, ref VBuffer <Float> x, IList <int> coords)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           val     = f(ref x, ref grad, null);
            Float           normX   = VectorUtils.Norm(x);

            Console.WriteLine(Header);

            Random r = new Random(5);

            VBuffer <Float> dir = new VBuffer <Float>(x.Length, 1, new Float[] { 1 }, new int[] { 0 });

            foreach (int n in coords)
            {
                dir.Values[0] = n;
                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
            }
        }
コード例 #3
0
ファイル: L1Optimizer.cs プロジェクト: zyw400/machinelearning
            private void GetNextPoint(Float alpha)
            {
                VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);

                if (!EnforceNonNegativity)
                {
                    VBufferUtils.ApplyWith(ref _x, ref _newX,
                                           delegate(int ind, Float xVal, ref Float newXval)
                    {
                        if (xVal * newXval < 0.0 && ind >= _biasCount)
                        {
                            newXval = 0;
                        }
                    });
                }
                else
                {
                    VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                    {
                        if (newXval < 0.0 && ind >= _biasCount)
                        {
                            newXval = 0;
                        }
                    });
                }
            }
コード例 #4
0
 public Float Eval(Float step, out Float deriv)
 {
     VectorUtils.AddMultInto(ref _point, step, ref _dir, ref _newPoint);
     _newValue = _func(ref _newPoint, ref _newGrad, null);
     deriv     = VectorUtils.DotProduct(ref _dir, ref _newGrad);
     return(_newValue);
 }
コード例 #5
0
            internal void Shift()
            {
                if (_roList.Count < _m)
                {
                    if (_totalMemLimit > 0)
                    {
                        long totalMem = GC.GetTotalMemory(true);
                        if (totalMem > _totalMemLimit)
                        {
                            _m = _roList.Count;
                        }
                    }
                }

                VBuffer <Float> nextS;
                VBuffer <Float> nextY;

                if (_roList.Count == _m)
                {
                    // REVIEW: Goofy. Instead somehow consider the array
                    // "circular" in some sense.
                    nextS = _sList[0];
                    Array.Copy(_sList, 1, _sList, 0, _m - 1);
                    nextY = _yList[0];
                    Array.Copy(_yList, 1, _yList, 0, _m - 1);
                    _roList.RemoveAt(0);
                }
                else
                {
                    nextS = CreateWorkingVector();
                    nextY = CreateWorkingVector();
                }

                VectorUtils.AddMultInto(ref _newX, -1, ref _x, ref nextS);
                VectorUtils.AddMultInto(ref _newGrad, -1, ref _grad, ref nextY);
                Float ro = VectorUtils.DotProduct(ref nextS, ref nextY);

                if (ro == 0)
                {
                    throw Ch.Process(new PrematureConvergenceException(this, "ro equals zero. Is your function linear?"));
                }

                _sList[_roList.Count] = nextS;
                _yList[_roList.Count] = nextY;
                _roList.Add(ro);

                var temp = LastValue;

                LastValue = Value;
                Value     = temp;
                Utils.Swap(ref _x, ref _newX);
                Utils.Swap(ref _grad, ref _newGrad);

                Iter++;
                GradientCalculations = 0;
            }
コード例 #6
0
            /// <summary>
            /// An implementation of the line search for the Wolfe conditions, from Nocedal &amp; Wright
            /// </summary>
            internal virtual bool LineSearch(IChannel ch, bool force)
            {
                Contracts.AssertValue(ch);
                Float dirDeriv = VectorUtils.DotProduct(ref _dir, ref _grad);

                if (dirDeriv == 0)
                {
                    throw ch.Process(new PrematureConvergenceException(this, "Directional derivative is zero. You may be sitting on the optimum."));
                }

                // if a non-descent direction is chosen, the line search will break anyway, so throw here
                // The most likely reasons for this is a bug in your function's gradient computation,
                ch.Check(dirDeriv < 0, "L-BFGS chose a non-descent direction.");

                Float c1 = (Float)1e-4 * dirDeriv;
                Float c2 = (Float)0.9 * dirDeriv;

                Float alpha = (Iter == 1 ? (1 / VectorUtils.Norm(_dir)) : 1);

                PointValueDeriv last = new PointValueDeriv(0, LastValue, dirDeriv);
                PointValueDeriv aLo  = new PointValueDeriv();
                PointValueDeriv aHi  = new PointValueDeriv();

                // initial bracketing phase
                while (true)
                {
                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (Float.IsPositiveInfinity(Value))
                    {
                        alpha /= 2;
                        continue;
                    }

                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }

                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);
                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (last.A > 0 && curr.V >= last.V))
                    {
                        aLo = last;
                        aHi = curr;
                        break;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else if (curr.D >= 0)
                    {
                        aLo = curr;
                        aHi = last;
                        break;
                    }

                    last = curr;
                    if (alpha == 0)
                    {
                        alpha = Float.Epsilon; // Robust to divisional underflow.
                    }
                    else
                    {
                        alpha *= 2;
                    }
                }

                Float minChange = (Float)0.01;
                int   maxSteps  = 10;

                // this loop is the "zoom" procedure described in Nocedal & Wright
                for (int step = 0; ; ++step)
                {
                    if (step == maxSteps && !force)
                    {
                        return(false);
                    }

                    PointValueDeriv left  = aLo.A < aHi.A ? aLo : aHi;
                    PointValueDeriv right = aLo.A < aHi.A ? aHi : aLo;
                    if (left.D > 0 && right.D < 0)
                    {
                        // interpolating cubic would have max in range, not min (can this happen?)
                        // set a to the one with smaller value
                        alpha = aLo.V < aHi.V ? aLo.A : aHi.A;
                    }
                    else
                    {
                        alpha = CubicInterp(aLo, aHi);
                        if (Float.IsNaN(alpha) || Float.IsInfinity(alpha))
                        {
                            alpha = (aLo.A + aHi.A) / 2;
                        }
                    }

                    // this is to ensure that the new point is within bounds
                    // and that the change is reasonably sized
                    Float ub = (minChange * left.A + (1 - minChange) * right.A);
                    if (alpha > ub)
                    {
                        alpha = ub;
                    }
                    Float lb = (minChange * right.A + (1 - minChange) * left.A);
                    if (alpha < lb)
                    {
                        alpha = lb;
                    }

                    VectorUtils.AddMultInto(ref _x, alpha, ref _dir, ref _newX);
                    if (EnforceNonNegativity)
                    {
                        VBufferUtils.Apply(ref _newX, delegate(int ind, ref Float newXval)
                        {
                            if (newXval < 0.0)
                            {
                                newXval = 0;
                            }
                        });
                    }

                    Value = Eval(ref _newX, ref _newGrad);
                    GradientCalculations++;
                    if (!FloatUtils.IsFinite(Value))
                    {
                        throw ch.Except("Optimizer unable to proceed with loss function yielding {0}", Value);
                    }
                    dirDeriv = VectorUtils.DotProduct(ref _dir, ref _newGrad);

                    PointValueDeriv curr = new PointValueDeriv(alpha, Value, dirDeriv);

                    if ((curr.V > LastValue + c1 * alpha) || (curr.V >= aLo.V))
                    {
                        if (aHi.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aHi = curr;
                    }
                    else if (Math.Abs(curr.D) <= -c2)
                    {
                        return(true);
                    }
                    else
                    {
                        if (curr.D * (aHi.A - aLo.A) >= 0)
                        {
                            aHi = aLo;
                        }
                        if (aLo.A == curr.A)
                        {
                            if (force)
                            {
                                throw ch.Process(new PrematureConvergenceException(this, "Step size interval numerically zero."));
                            }
                            else
                            {
                                return(false);
                            }
                        }
                        aLo = curr;
                    }
                }
            }
コード例 #7
0
        /// <summary>
        /// Tests the gradient reported by f.
        /// </summary>
        /// <param name="f">function to test</param>
        /// <param name="x">point at which to test</param>
        /// <param name="quiet">If false, outputs detailed info.</param>
        /// <returns>maximum normalized difference between analytic and numeric directional derivative over multiple tests</returns>
        public static Float Test(DifferentiableFunction f, ref VBuffer <Float> x, bool quiet)
        {
            // REVIEW: Delete this method?
            VBuffer <Float> grad    = default(VBuffer <Float>);
            VBuffer <Float> newGrad = default(VBuffer <Float>);
            VBuffer <Float> newX    = default(VBuffer <Float>);
            Float           normX   = VectorUtils.Norm(x);

            f(ref x, ref grad, null);

            if (!quiet)
            {
                Console.WriteLine(Header);
            }

            Float maxNormDiff = Float.NegativeInfinity;

            int numIters    = Math.Min((int)x.Length, 10);
            int maxDirCount = Math.Min((int)x.Length / 2, 100);

            for (int n = 1; n <= numIters; n++)
            {
                int          dirCount = Math.Min(n * 10, maxDirCount);
                List <int>   indices  = new List <int>(dirCount);
                List <Float> values   = new List <Float>(dirCount);
                for (int i = 0; i < dirCount; i++)
                {
                    int index = _r.Next((int)x.Length);
                    while (indices.IndexOf(index) >= 0)
                    {
                        index = _r.Next((int)x.Length);
                    }
                    indices.Add(index);
                    values.Add(SampleFromGaussian(_r));
                }
                VBuffer <Float> dir = new VBuffer <Float>(x.Length, values.Count, values.ToArray(), indices.ToArray());

                Float norm = VectorUtils.Norm(dir);
                VectorUtils.ScaleBy(ref dir, 1 / norm);

                VectorUtils.AddMultInto(ref x, Eps, ref dir, ref newX);
                Float rVal = f(ref newX, ref newGrad, null);

                VectorUtils.AddMultInto(ref x, -Eps, ref dir, ref newX);
                Float lVal = f(ref newX, ref newGrad, null);

                Float dirDeriv = VectorUtils.DotProduct(ref grad, ref dir);
                Float numDeriv = (rVal - lVal) / (2 * Eps);

                Float normDiff = Math.Abs(1 - numDeriv / dirDeriv);
                Float diff     = numDeriv - dirDeriv;
                if (!quiet)
                {
                    Console.WriteLine("{0,-9}{1,-18:0.0000e0}{2,-18:0.0000e0}{3,-15:0.0000e0}{4,0:0.0000e0}", n, numDeriv, dirDeriv, diff, normDiff);
                }

                maxNormDiff = Math.Max(maxNormDiff, normDiff);
            }

            return(maxNormDiff);
        }