protected void CanOptimizeRosenbrock(OptimizerBase <float> optimizer) { var weight = new NeuroWeight <float>(Matrix <float> .Build.Dense(2, 1)); //_output.WriteLine($"Rosenbrock: {Rosenbrock(weight.Weight)}"); //_output.WriteLine(weight.Weight.ToMatrixString()); var watch = new Stopwatch(); watch.Start(); for (int i = 0; i < 10000; i++) { RosenbrockGrad(weight.Weight, weight.Gradient); optimizer.Optimize(weight); //_output.WriteLine($"Rosenbrock: {Rosenbrock(weight.Weight)}"); //_output.WriteLine(weight.Weight.ToMatrixString()); } watch.Stop(); double result = Rosenbrock(weight.Weight); result.ShouldBeLessThanOrEqualTo(6e-5); _output.WriteLine($"Rosenbrock: {result}"); _output.WriteLine($"Optimized in {watch.Elapsed}"); }
private GruLayer(GruLayer <T> other) : base(other) { _wxh = other._wxh.Clone(); _wxr = other._wxr.Clone(); _wxz = other._wxz.Clone(); _whh = other._whh.Clone(); _whr = other._whr.Clone(); _whz = other._whz.Clone(); _bxh = other._bxh.Clone(); _bxr = other._bxr.Clone(); _bxz = other._bxz.Clone(); _bhh = other._bhh.Clone(); _bhr = other._bhr.Clone(); _bhz = other._bhz.Clone(); _lastH = other._lastH.CloneMatrix(); _hiddenOnes = other._hiddenOnes.CloneMatrix(); Inputs = other.Inputs.Clone(); Outputs = other.Outputs.Clone(); _hPropVals = other._hPropVals.Clone(); _hNewVals = other._hNewVals.Clone(); _rVals = other._rVals.Clone(); _zVals = other._zVals.Clone(); _hSize = other._hSize; RegisterWeights(); }
public GruLayer(int xSize, int hSize, IMatrixInitializer <T> linearWeightInitializer, IMatrixInitializer <T> hiddenWeightInitializer, IMatrixInitializer <T> biasInitializer) { _hSize = hSize; _wxh = new NeuroWeight <T>(linearWeightInitializer.CreateMatrix(hSize, xSize)); _wxr = new NeuroWeight <T>(linearWeightInitializer.CreateMatrix(hSize, xSize)); _wxz = new NeuroWeight <T>(linearWeightInitializer.CreateMatrix(hSize, xSize)); _whh = new NeuroWeight <T>(hiddenWeightInitializer.CreateMatrix(hSize, hSize)); _whr = new NeuroWeight <T>(hiddenWeightInitializer.CreateMatrix(hSize, hSize)); _whz = new NeuroWeight <T>(hiddenWeightInitializer.CreateMatrix(hSize, hSize)); _bxh = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); _bxr = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); _bxz = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); _bhh = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); _bhr = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); _bhz = new NeuroWeight <T>(biasInitializer.CreateMatrix(hSize, 1)); ResetOptimizer(); ErrorFunction = new MeanSquareError <T>(); RegisterWeights(); }
private LinearLayer(LinearLayer <T> other) : base(other) { _weights = other._weights.Clone(); _bias = other._bias.Clone(); RegisterWeights(_bias, _weights); }
public override void AdagradUpdate(Float learningRate, NeuroWeight <Float> weight) { using (var ptrs = new MatrixPointersBag <Float>(weight.Weight, weight.Cache2, weight.Gradient)) { AdagradUpdate(learningRate, ptrs[0], ptrs[1], ptrs[2], weight.Weight.Length()); } }
public override void AdamUpdate(float learningRate, float b1, float b2, NeuroWeight <Float> weight) { using (var ptrs = new MatrixPointersBag <Float>(weight.Weight, weight.Cache1, weight.Cache2, weight.Gradient)) { AdamUpdate(learningRate, b1, b2, weight.Timestep, ptrs[0], ptrs[1], ptrs[2], ptrs[3], weight.Weight.Length()); } }
public void RMSPropValuesAreEqual(float learningRate, float decayRate, float weightDecay, float momentum) { var local = new NeuroWeight <float>(MatrixFactory.RandomMatrix <float>(10, 10, 1e-2f)); var remote = local.Clone(); for (int i = 0; i < 100; i++) { var grad = MatrixFactory.RandomMatrix <float>(10, 10, 1.0f); grad.CopyTo(local.Gradient); grad.CopyTo(remote.Gradient); MathProvider.GravesRmsPropUpdate(weightDecay, learningRate, decayRate, momentum, local); using (var ptrs = new MatrixPointersBag <float>(true, remote.Weight, remote.Gradient, remote.Cache1, remote.Cache2, remote.CacheM)) { Interface.TestRMSPropUpdate(ptrs.Definitions[0], ptrs.Definitions[1], ptrs.Definitions[2], ptrs.Definitions[3], ptrs.Definitions[4], learningRate, decayRate, momentum, weightDecay); } local.Weight.ShouldMatrixEqualWithinError(remote.Weight); local.Cache1.ShouldMatrixEqualWithinError(remote.Cache1); local.Cache2.ShouldMatrixEqualWithinError(remote.Cache2); local.CacheM.ShouldMatrixEqualWithinError(remote.CacheM); local.Gradient.ShouldMatrixEqualWithinError(remote.Gradient); } }
public GruLayer(BinaryReader reader) : base(reader) { _bxr = NeuroWeight <T> .Load(reader.BaseStream); _bxz = NeuroWeight <T> .Load(reader.BaseStream); _bxh = NeuroWeight <T> .Load(reader.BaseStream); _bhr = NeuroWeight <T> .Load(reader.BaseStream); _bhz = NeuroWeight <T> .Load(reader.BaseStream); _bhh = NeuroWeight <T> .Load(reader.BaseStream); _wxr = NeuroWeight <T> .Load(reader.BaseStream); _wxz = NeuroWeight <T> .Load(reader.BaseStream); _wxh = NeuroWeight <T> .Load(reader.BaseStream); _whr = NeuroWeight <T> .Load(reader.BaseStream); _whz = NeuroWeight <T> .Load(reader.BaseStream); _whh = NeuroWeight <T> .Load(reader.BaseStream); _lastH = MatrixFactory.Load <T>(reader.BaseStream); _hiddenOnes = Matrix <T> .Build.Dense(_hSize, _lastH.ColumnCount, Matrix <T> .One); RegisterWeights(); }
public LinearLayer(BinaryReader reader) : base(reader) { _bias = NeuroWeight <T> .Load(reader.BaseStream); _weights = NeuroWeight <T> .Load(reader.BaseStream); RegisterWeights(_bias, _weights); }
public LinearLayer(int xSize, int ySize, IMatrixInitializer <T> matrixInitializer) { _weights = new NeuroWeight <T>(matrixInitializer.CreateMatrix(ySize, xSize)); _bias = new NeuroWeight <T>(matrixInitializer.CreateMatrix(ySize, 1)); ErrorFunction = new MeanSquareError <T>(); RegisterWeights(_bias, _weights); }
public void CanTransferWeightThroughNeuroWeightRowMajor() { var weight = MatrixFactory.RandomMatrix <float>(2, 3, 5.0f); var local = new NeuroWeight <float>(weight); var remote = local.Clone(); MutateMatrixRowMajor(local.Weight); MutateMatrixRowMajor(local.Gradient); MutateMatrixRowMajor(local.Cache1); MutateMatrixRowMajor(local.Cache2); MutateMatrixRowMajor(local.CacheM); using (var ptrs = new WeightDefinitionBag <float>(true, remote)) { GpuInterface.Testing.TestComplexWeightTransferRowMajor(ptrs.Definitions[0]); } local.Weight.ShouldMatrixEqualWithinError(remote.Weight); local.Gradient.ShouldMatrixEqualWithinError(remote.Gradient); local.Cache1.ShouldMatrixEqualWithinError(remote.Cache1); local.Cache2.ShouldMatrixEqualWithinError(remote.Cache2); local.CacheM.ShouldMatrixEqualWithinError(remote.CacheM); }
/// <summary> /// Performs Adam update on a weight. /// </summary> /// <remarks>See https://arxiv.org/pdf/1412.6980.pdf </remarks> /// <param name="learningRate">Learning rate.</param> /// <param name="b1">Decay rate of first order MAV.</param> /// <param name="b2">Decay rate of second order MAV.</param> /// <param name="weight">Weight.</param> public abstract void AdamUpdate(float learningRate, float b1, float b2, NeuroWeight <T> weight);
public override void Optimize(NeuroWeight <T> weight) { MathProvider.GravesRmsPropUpdate(_weightDecay, LearningRate, _decayRate, _momentum, weight); }
/// <summary> /// Performs Graves' version of RMSProp update on a weight. /// </summary> /// <remarks>See http://arxiv.org/pdf/1308.0850v5.pdf, page 23</remarks> /// <param name="weightDecay">Weight decay rate.</param> /// <param name="learningRate">Learning rate.</param> /// <param name="decayRate">Decay rate.</param> /// <param name="momentum">Momentum.</param> /// <param name="weight">Weight.</param> public abstract void GravesRmsPropUpdate(float weightDecay, float learningRate, float decayRate, float momentum, NeuroWeight <T> weight);
public override void GravesRmsPropUpdate(float weightDecay, float learningRate, float decayRate, float momentum, NeuroWeight <Float> weight) { using (var ptrs = new MatrixPointersBag <Float>(weight.Weight, weight.Cache1, weight.Cache2, weight.CacheM, weight.Gradient)) { GravesRMSPropUpdate(weightDecay, learningRate, decayRate, momentum, ptrs[0], ptrs[1], ptrs[2], ptrs[3], ptrs[4], weight.Weight.Length()); } }
public abstract void Optimize(NeuroWeight <T> weight);
/// <summary> /// Performs Adagrad update on a weight. /// </summary> /// <param name="learningRate">Learning rate.</param> /// <param name="weight">Weight.</param> public abstract void AdagradUpdate(T learningRate, NeuroWeight <T> weight);
public override void Optimize(NeuroWeight <T> weight) { MathProvider.AdagradUpdate(MathProvider.Scalar(LearningRate), weight); }
public override void Optimize(NeuroWeight <T> weight) { weight.Timestep++; MathProvider.AdamUpdate(LearningRate, _b1, _b2, weight); }