Example #1
0
 /// <summary>
 /// Creates SGDOptimizer and sets optimization parameters
 /// </summary>
 /// <param name="terminate">Termination criterion</param>
 /// <param name="rateSchedule">Annealing schedule type for learning rate</param>
 /// <param name="averaging">If true, all iterates are averaged</param>
 /// <param name="t0">Base for learning rate schedule</param>
 /// <param name="batchSize">Average this number of stochastic gradients for each update</param>
 /// <param name="momentum">Momentum parameter</param>
 /// <param name="maxSteps">Maximum number of updates (0 for no max)</param>
 public SgdOptimizer(DTerminate terminate, RateScheduleType rateSchedule = RateScheduleType.Sqrt, bool averaging = false, Float t0 = 1, int batchSize = 1, Float momentum = 0, int maxSteps = 0)
 {
     _terminate    = terminate;
     _rateSchedule = rateSchedule;
     _averaging    = averaging;
     _t0           = t0;
     _batchSize    = batchSize;
     _momentum     = momentum;
     _maxSteps     = maxSteps;
 }
Example #2
0
 /// <summary>
 /// Makes a new GDOptimizer with the given optimization parameters
 /// </summary>
 /// <param name="terminate">Termination criterion</param>
 /// <param name="lineSearch">Line search to use</param>
 /// <param name="maxSteps">Maximum number of updates</param>
 /// <param name="useCG">Use Cubic interpolation line search or Backtracking line search with Armijo condition</param>
 public GDOptimizer(DTerminate terminate, IDiffLineSearch lineSearch = null, bool useCG = false, int maxSteps = 0)
 {
     Terminate = terminate;
     if (LineSearch == null)
     {
         if (useCG)
         {
             LineSearch = new CubicInterpLineSearch((Float)0.01);
         }
         else
         {
             LineSearch = new BacktrackingLineSearch();
         }
     }
     else
     {
         LineSearch = lineSearch;
     }
     _maxSteps = maxSteps;
     UseCG     = useCG;
 }
Example #3
0
        public static void Main(string[] argv)
        {
            RunTest(QuadTest);
            RunTest(LogTest);

            VBuffer <Float> grad  = VBufferUtils.CreateEmpty <Float>(2);
            int             n     = 0;
            bool            print = false;
            DTerminate      term  =
                (ref VBuffer <Float> x) =>
            {
                QuadTest2D(ref x, ref grad);
                Float norm = VectorUtils.Norm(grad);
                if (++n % 1000 == 0 || print)
                {
                    Console.WriteLine("{0}\t{1}", n, norm);
                }
                return(norm < 1e-5);
            };
            SgdOptimizer    sgdo = new SgdOptimizer(term, SgdOptimizer.RateScheduleType.Constant, false, 100, 1, (Float)0.99);
            VBuffer <Float> init;

            CreateWrapped(out init, 0, 0);
            VBuffer <Float> ans = default(VBuffer <Float>);

            sgdo.Minimize(StochasticQuadTest2D, ref init, ref ans);
            QuadTest2D(ref ans, ref grad);
            Console.WriteLine(VectorUtils.Norm(grad));
            Console.WriteLine();
            Console.WriteLine();
            n = 0;
            GDOptimizer gdo = new GDOptimizer(term, null, true);

            print = true;
            CreateWrapped(out init, 0, 0);
            gdo.Minimize(QuadTest2D, ref init, ref ans);
            QuadTest2D(ref ans, ref grad);
            Console.WriteLine(VectorUtils.Norm(grad));
        }
        /// <summary>
        /// Initialize weights by running SGD up to specified tolerance.
        /// </summary>
        protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory)
        {
            if (!Quiet)
            {
                ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance);
            }

            int        numExamples  = 0;
            var        oldWeights   = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
            DTerminate terminateSgd =
                (in VBuffer <float> x) =>
            {
                if (++numExamples % 1000 != 0)
                {
                    return(false);
                }
                VectorUtils.AddMult(in x, -1, ref oldWeights);
                float normDiff = VectorUtils.Norm(oldWeights);
                x.CopyTo(ref oldWeights);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.Write(".");
                    if (numExamples % 50000 == 0)
                    {
                        Console.WriteLine("\t{0}\t{1}", numExamples, normDiff);
                    }
                }
                // #endif
                return(normDiff < SgdInitializationTolerance);
            };

            VBuffer <float>  result = default(VBuffer <float>);
            FloatLabelCursor cursor = null;

            try
            {
                float[] scratch = null;

                SgdOptimizer.DStochasticGradient lossSgd =
                    (in VBuffer <float> x, ref VBuffer <float> grad) =>
                {
                    // Zero out the gradient by sparsifying.
                    grad = new VBuffer <float>(grad.Length, 0, grad.Values, grad.Indices);
                    EnsureBiases(ref grad);

                    if (cursor == null || !cursor.MoveNext())
                    {
                        if (cursor != null)
                        {
                            cursor.Dispose();
                        }
                        cursor = cursorFactory.Create();
                        if (!cursor.MoveNext())
                        {
                            return;
                        }
                    }
                    AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch);
                };

                VBuffer <float> sgdWeights;
                if (DenseOptimizer)
                {
                    sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount);
                }
                else
                {
                    sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
                }
                SgdOptimizer sgdo = new SgdOptimizer(terminateSgd);
                sgdo.Minimize(lossSgd, ref sgdWeights, ref result);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.WriteLine();
                }
                // #endif
                ch.Info("SGD initialization done in {0} rounds", numExamples);
            }
            finally
            {
                if (cursor != null)
                {
                    cursor.Dispose();
                }
            }

            return(result);
        }