/// <summary> /// Create a MNIST trainer (writing recognition) that will be added to an environemnt. /// </summary> /// <param name="sigma">The sigma environemnt this trainer will be assigned to.</param> /// <returns>The newly created trainer.</returns> private static ITrainer CreateMnistTrainer(SigmaEnvironment sigma) { IDataset dataset = Defaults.Datasets.Mnist(); ITrainer trainer = sigma.CreateTrainer("mnist-trainer"); trainer.Network = new Network(); trainer.Network.Architecture = InputLayer.Construct(28, 28) + DropoutLayer.Construct(0.2) + FullyConnectedLayer.Construct(1000, activation: "rel") + DropoutLayer.Construct(0.4) + FullyConnectedLayer.Construct(800, activation: "rel") + DropoutLayer.Construct(0.4) + FullyConnectedLayer.Construct(10, activation: "sigmoid") + OutputLayer.Construct(10) + SoftMaxCrossEntropyCostLayer.Construct(); trainer.TrainingDataIterator = new MinibatchIterator(100, dataset); trainer.AddNamedDataIterator("validation", new UndividedIterator(Defaults.Datasets.MnistValidation())); trainer.Optimiser = new AdagradOptimiser(baseLearningRate: 0.02); trainer.Operator = new CudaSinglethreadedOperator(); trainer.AddInitialiser("*.weights", new GaussianInitialiser(standardDeviation: 0.1)); trainer.AddInitialiser("*.bias*", new GaussianInitialiser(standardDeviation: 0.05)); trainer.AddLocalHook(new ValueReporter("optimiser.cost_total", TimeStep.Every(1, TimeScale.Iteration), reportEpochIteration: true) .On(new ExtremaCriteria("optimiser.cost_total", ExtremaTarget.Min))); trainer.AddLocalHook(new RunningTimeReporter(TimeStep.Every(1, TimeScale.Epoch), 4)); return(trainer); }
private static void SampleMnist() { SigmaEnvironment sigma = SigmaEnvironment.Create("mnist"); sigma.SetRandomSeed(0); IDataset dataset = Defaults.Datasets.Mnist(); ITrainer trainer = sigma.CreateTrainer("mnist-trainer"); trainer.Network = new Network(); trainer.Network.Architecture = InputLayer.Construct(28, 28) + DropoutLayer.Construct(0.2) + FullyConnectedLayer.Construct(1000, activation: "rel") + DropoutLayer.Construct(0.4) + FullyConnectedLayer.Construct(800, activation: "rel") + DropoutLayer.Construct(0.4) + FullyConnectedLayer.Construct(10, activation: "sigmoid") + OutputLayer.Construct(10) + SoftMaxCrossEntropyCostLayer.Construct(); trainer.TrainingDataIterator = new MinibatchIterator(100, dataset); trainer.AddNamedDataIterator("validation", new UndividedIterator(Defaults.Datasets.MnistValidation())); //trainer.Optimiser = new GradientDescentOptimiser(learningRate: 0.01); //trainer.Optimiser = new MomentumGradientOptimiser(learningRate: 0.01, momentum: 0.9); trainer.Optimiser = new AdagradOptimiser(baseLearningRate: 0.02); trainer.Operator = new CudaSinglethreadedOperator(); trainer.AddInitialiser("*.weights", new GaussianInitialiser(standardDeviation: 0.1)); trainer.AddInitialiser("*.bias*", new GaussianInitialiser(standardDeviation: 0.05)); trainer.AddLocalHook(new ValueReporter("optimiser.cost_total", TimeStep.Every(1, TimeScale.Iteration), reportEpochIteration: true) .On(new ExtremaCriteria("optimiser.cost_total", ExtremaTarget.Min))); var validationTimeStep = TimeStep.Every(1, TimeScale.Epoch); trainer.AddHook(new MultiClassificationAccuracyReporter("validation", validationTimeStep, tops: new[] { 1, 2, 3 })); for (int i = 0; i < 10; i++) { trainer.AddGlobalHook(new TargetMaximisationReporter(trainer.Operator.Handler.NDArray(ArrayUtils.OneHot(i, 10), 10), TimeStep.Every(1, TimeScale.Epoch))); } trainer.AddLocalHook(new RunningTimeReporter(TimeStep.Every(10, TimeScale.Iteration), 32)); trainer.AddLocalHook(new RunningTimeReporter(TimeStep.Every(1, TimeScale.Epoch), 4)); trainer.AddHook(new StopTrainingHook(atEpoch: 10)); sigma.PrepareAndRun(); }