Example #1
0
        // Runs a single epoch of training.
        private static void TrainEpoch(Sequential model, ICriterion criterion, SgdOptimizer optim, DataSet trainingSet, int numInputs, bool useTargetClasses)
        {
            using (new SimpleTimer("Training epoch completed in {0}ms"))
            {
                for (int batchStart = 0; batchStart <= trainingSet.inputs.Shape[0] - BatchSize; batchStart += BatchSize)
                {
                    Console.Write(".");

                    var grad = new GradFunc(parameters =>
                    {
                        using (var mbInputs = trainingSet.inputs.Narrow(0, batchStart, BatchSize))
                            using (var mbTargets = trainingSet.targets.Narrow(0, batchStart, BatchSize))
                                using (var mbTargetClasses = trainingSet.targetValues.Narrow(0, batchStart, BatchSize))
                                {
                                    foreach (var gradNDArray in model.GetGradParameters())
                                    {
                                        Ops.Fill(gradNDArray, 0);
                                    }

                                    var modelOutput     = model.Forward(mbInputs, ModelMode.Train);
                                    var criterionOutput = criterion.UpdateOutput(modelOutput, useTargetClasses ? mbTargetClasses : mbTargets);


                                    var criterionGradIn = criterion.UpdateGradInput(modelOutput, useTargetClasses ? mbTargetClasses : mbTargets);
                                    model.Backward(mbInputs, criterionGradIn, ModelMode.Train);

                                    return(new OutputAndGrads()
                                    {
                                        output = modelOutput, grads = model.GetGradParameters().ToArray()
                                    });
                                }
                    });

                    optim.Update(grad, model.GetParameters().ToArray());
                }
            }
            Console.WriteLine();
        }
        /// <summary>
        /// Initialize weights by running SGD up to specified tolerance.
        /// </summary>
        protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory)
        {
            if (!Quiet)
            {
                ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance);
            }

            int        numExamples  = 0;
            var        oldWeights   = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
            DTerminate terminateSgd =
                (in VBuffer <float> x) =>
            {
                if (++numExamples % 1000 != 0)
                {
                    return(false);
                }
                VectorUtils.AddMult(in x, -1, ref oldWeights);
                float normDiff = VectorUtils.Norm(oldWeights);
                x.CopyTo(ref oldWeights);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.Write(".");
                    if (numExamples % 50000 == 0)
                    {
                        Console.WriteLine("\t{0}\t{1}", numExamples, normDiff);
                    }
                }
                // #endif
                return(normDiff < SgdInitializationTolerance);
            };

            VBuffer <float>  result = default(VBuffer <float>);
            FloatLabelCursor cursor = null;

            try
            {
                float[] scratch = null;

                SgdOptimizer.DStochasticGradient lossSgd =
                    (in VBuffer <float> x, ref VBuffer <float> grad) =>
                {
                    // Zero out the gradient by sparsifying.
                    grad = new VBuffer <float>(grad.Length, 0, grad.Values, grad.Indices);
                    EnsureBiases(ref grad);

                    if (cursor == null || !cursor.MoveNext())
                    {
                        if (cursor != null)
                        {
                            cursor.Dispose();
                        }
                        cursor = cursorFactory.Create();
                        if (!cursor.MoveNext())
                        {
                            return;
                        }
                    }
                    AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch);
                };

                VBuffer <float> sgdWeights;
                if (DenseOptimizer)
                {
                    sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount);
                }
                else
                {
                    sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount);
                }
                SgdOptimizer sgdo = new SgdOptimizer(terminateSgd);
                sgdo.Minimize(lossSgd, ref sgdWeights, ref result);
                // #if OLD_TRACING // REVIEW: How should this be ported?
                if (!Quiet)
                {
                    Console.WriteLine();
                }
                // #endif
                ch.Info("SGD initialization done in {0} rounds", numExamples);
            }
            finally
            {
                if (cursor != null)
                {
                    cursor.Dispose();
                }
            }

            return(result);
        }
Example #3
0
        // End of configuraion options
        //##########################################################################



        static void Main(string[] args)
        {
            // Init TensorSharp

            IAllocator allocator = null;

            if (AccMode == AccelMode.Cpu)
            {
                allocator = new CpuAllocator();
            }
            else
            {
                var cudaContext = new TSCudaContext();
                cudaContext.Precompile(Console.Write);
                cudaContext.CleanUnusedPTX();
                allocator = new CudaAllocator(cudaContext, 0);
            }

            var random = new SeedSource(42); // set seed to a known value - we do this to make the training repeatable



            // Load data

            if (string.IsNullOrEmpty(MnistFolder))
            {
                throw new ApplicationException("MnistFolder should be set to the path containing the MNIST data set");
            }

            Console.WriteLine("loading data sets");
            DataSet trainingSet, testingSet;

            using (new SimpleTimer("data set loading done in {0}ms"))
            {
                MnistDataSetBuilder.BuildDataSets(allocator, MnistFolder, TRAINING_SIZE, TESTING_SIZE, out trainingSet, out testingSet);
            }


            // Construct the model, loss function and optimizer

            int numInputs = MnistParser.ImageSize * MnistParser.ImageSize;

            Sequential model;
            ICriterion criterion;
            bool       useTargetClasses;

            var useCudnn = AccMode == AccelMode.Cudnn;

            switch (MType)
            {
            case ModelType.MLP: ModelBuilder.BuildMLP(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break;

            case ModelType.MLPSoftmax: ModelBuilder.BuildMLPSoftmax(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break;

            case ModelType.Cnn: ModelBuilder.BuildCnn(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break;

            default: throw new InvalidOperationException("Unrecognized model type " + MType);
            }

            var optim = new SgdOptimizer(sgdConfig);


            // Train the model

            for (int i = 0; i < 50; ++i)
            {
                TrainEpoch(model, criterion, optim, trainingSet, numInputs, useTargetClasses);
                EvaluateModel(model, testingSet, numInputs);
            }
        }