// Runs a single epoch of training. private static void TrainEpoch(Sequential model, ICriterion criterion, SgdOptimizer optim, DataSet trainingSet, int numInputs, bool useTargetClasses) { using (new SimpleTimer("Training epoch completed in {0}ms")) { for (int batchStart = 0; batchStart <= trainingSet.inputs.Shape[0] - BatchSize; batchStart += BatchSize) { Console.Write("."); var grad = new GradFunc(parameters => { using (var mbInputs = trainingSet.inputs.Narrow(0, batchStart, BatchSize)) using (var mbTargets = trainingSet.targets.Narrow(0, batchStart, BatchSize)) using (var mbTargetClasses = trainingSet.targetValues.Narrow(0, batchStart, BatchSize)) { foreach (var gradNDArray in model.GetGradParameters()) { Ops.Fill(gradNDArray, 0); } var modelOutput = model.Forward(mbInputs, ModelMode.Train); var criterionOutput = criterion.UpdateOutput(modelOutput, useTargetClasses ? mbTargetClasses : mbTargets); var criterionGradIn = criterion.UpdateGradInput(modelOutput, useTargetClasses ? mbTargetClasses : mbTargets); model.Backward(mbInputs, criterionGradIn, ModelMode.Train); return(new OutputAndGrads() { output = modelOutput, grads = model.GetGradParameters().ToArray() }); } }); optim.Update(grad, model.GetParameters().ToArray()); } } Console.WriteLine(); }
/// <summary> /// Initialize weights by running SGD up to specified tolerance. /// </summary> protected virtual VBuffer <float> InitializeWeightsSgd(IChannel ch, FloatLabelCursor.Factory cursorFactory) { if (!Quiet) { ch.Info("Running SGD initialization with tolerance {0}", SgdInitializationTolerance); } int numExamples = 0; var oldWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); DTerminate terminateSgd = (in VBuffer <float> x) => { if (++numExamples % 1000 != 0) { return(false); } VectorUtils.AddMult(in x, -1, ref oldWeights); float normDiff = VectorUtils.Norm(oldWeights); x.CopyTo(ref oldWeights); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.Write("."); if (numExamples % 50000 == 0) { Console.WriteLine("\t{0}\t{1}", numExamples, normDiff); } } // #endif return(normDiff < SgdInitializationTolerance); }; VBuffer <float> result = default(VBuffer <float>); FloatLabelCursor cursor = null; try { float[] scratch = null; SgdOptimizer.DStochasticGradient lossSgd = (in VBuffer <float> x, ref VBuffer <float> grad) => { // Zero out the gradient by sparsifying. grad = new VBuffer <float>(grad.Length, 0, grad.Values, grad.Indices); EnsureBiases(ref grad); if (cursor == null || !cursor.MoveNext()) { if (cursor != null) { cursor.Dispose(); } cursor = cursorFactory.Create(); if (!cursor.MoveNext()) { return; } } AccumulateOneGradient(in cursor.Features, cursor.Label, cursor.Weight, in x, ref grad, ref scratch); }; VBuffer <float> sgdWeights; if (DenseOptimizer) { sgdWeights = VBufferUtils.CreateDense <float>(BiasCount + WeightCount); } else { sgdWeights = VBufferUtils.CreateEmpty <float>(BiasCount + WeightCount); } SgdOptimizer sgdo = new SgdOptimizer(terminateSgd); sgdo.Minimize(lossSgd, ref sgdWeights, ref result); // #if OLD_TRACING // REVIEW: How should this be ported? if (!Quiet) { Console.WriteLine(); } // #endif ch.Info("SGD initialization done in {0} rounds", numExamples); } finally { if (cursor != null) { cursor.Dispose(); } } return(result); }
// End of configuraion options //########################################################################## static void Main(string[] args) { // Init TensorSharp IAllocator allocator = null; if (AccMode == AccelMode.Cpu) { allocator = new CpuAllocator(); } else { var cudaContext = new TSCudaContext(); cudaContext.Precompile(Console.Write); cudaContext.CleanUnusedPTX(); allocator = new CudaAllocator(cudaContext, 0); } var random = new SeedSource(42); // set seed to a known value - we do this to make the training repeatable // Load data if (string.IsNullOrEmpty(MnistFolder)) { throw new ApplicationException("MnistFolder should be set to the path containing the MNIST data set"); } Console.WriteLine("loading data sets"); DataSet trainingSet, testingSet; using (new SimpleTimer("data set loading done in {0}ms")) { MnistDataSetBuilder.BuildDataSets(allocator, MnistFolder, TRAINING_SIZE, TESTING_SIZE, out trainingSet, out testingSet); } // Construct the model, loss function and optimizer int numInputs = MnistParser.ImageSize * MnistParser.ImageSize; Sequential model; ICriterion criterion; bool useTargetClasses; var useCudnn = AccMode == AccelMode.Cudnn; switch (MType) { case ModelType.MLP: ModelBuilder.BuildMLP(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; case ModelType.MLPSoftmax: ModelBuilder.BuildMLPSoftmax(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; case ModelType.Cnn: ModelBuilder.BuildCnn(allocator, random, BatchSize, useCudnn, out model, out criterion, out useTargetClasses); break; default: throw new InvalidOperationException("Unrecognized model type " + MType); } var optim = new SgdOptimizer(sgdConfig); // Train the model for (int i = 0; i < 50; ++i) { TrainEpoch(model, criterion, optim, trainingSet, numInputs, useTargetClasses); EvaluateModel(model, testingSet, numInputs); } }