public override Volume <T> Evaluate(Session <T> session) { if (!this.IsDirty) { return(base.Evaluate(session)); } this.IsDirty = false; var y = this.Parents[0].Evaluate(session); if (this.Result == null) { this.Result = this._builder.SameAs(this.Index == -1 ? new Shape(4) : new Shape(1)); } if (this.Index == -1) { this.Result.Set(0, Ops <T> .Cast(y.Shape.Dimensions[0])); this.Result.Set(1, Ops <T> .Cast(y.Shape.Dimensions[1])); this.Result.Set(2, Ops <T> .Cast(y.Shape.Dimensions[2])); this.Result.Set(3, Ops <T> .Cast(y.Shape.Dimensions[3])); } else { this.Result.Set(0, Ops <T> .Cast(y.Shape.Dimensions[this.Index])); } return(base.Evaluate(session)); }
protected virtual void Backward(Volume <T> y) { var chrono = Stopwatch.StartNew(); var batchSize = y.Shape.Dimensions[3]; this.Loss = Ops <T> .Divide(this.Net.Backward(y), Ops <T> .Cast(batchSize)); this.BackwardTimeMs = chrono.Elapsed.TotalMilliseconds / batchSize; }
protected override void TrainImplem() { var parametersAndGradients = this.Net.GetParametersAndGradients(); var isMomentumGreaterThanZero = Ops <T> .GreaterThan(this.Momentum, Ops <T> .Zero); // initialize lists for accumulators. Will only be done once on first iteration if (this.gsum.Count == 0 && isMomentumGreaterThanZero) { foreach (var t in parametersAndGradients) { this.gsum.Add(BuilderInstance <T> .Volume.SameAs(t.Volume.Shape)); } } T factor = Ops <T> .Divide(this.LearningRate, Ops <T> .Cast(this.BatchSize)); // perform an update for all sets of weights for (var i = 0; i < parametersAndGradients.Count; i++) { var parametersAndGradient = parametersAndGradients[i]; var vol = parametersAndGradient.Volume; var grad = parametersAndGradient.Gradient; // learning rate for some parameters. var l2DecayMul = parametersAndGradient.L2DecayMul ?? Ops <T> .One; var l1DecayMul = parametersAndGradient.L1DecayMul ?? Ops <T> .One; var l2Decay = Ops <T> .Multiply(this.L2Decay, l2DecayMul); var l1Decay = Ops <T> .Multiply(this.L1Decay, l1DecayMul); // this.L2DecayLoss += l2Decay * vol.Get(j) * vol.Get(j) / 2; // accumulate weight decay loss // this.L1DecayLoss += l1Decay * Math.Abs(vol.Get(j)); // var l1Grad = l1Decay * (vol.Get(j) > 0 ? 1 : -1); var l2Grad = vol * l2Decay; var gij = grad + l2Grad; // var gij = (l2Grad + l1Grad + vol.GetGradient(j)) / this.BatchSize; // raw batch gradient if (isMomentumGreaterThanZero) { // momentum update var dx = this.gsum[i] * this.Momentum + gij * factor; // step this.gsum[i] = dx.Clone(); // back this up for next iteration of momentum vol.MapInplace((v, d) => d, vol - dx); // apply corrected gradient } else { // vanilla sgd vol.MapInplace((v, d) => d, vol - gij * factor); } grad.Clear(); // zero out gradient so that we can begin accumulating anew } }
public override Volume <T> Evaluate(Session <T> session) { var variables = session.LearnableVariables; var volumes = new Dictionary <Variable <T>, Volume <T> >(); var gradients = new Dictionary <Variable <T>, Volume <T> >(); if (this._updaters.Count == 0) { foreach (var variable in variables.Values) { var lr = this.Graph.PlaceHolder("lr"); // learning rate var grad = this.Graph.PlaceHolder("grad"); // gradients var v = this.Graph.PlaceHolder("v"); // volume this._updaters[variable] = v - grad * lr; } } this._learningRate.Set(0, Ops <T> .Divide(this._lr, Ops <T> .Cast(session.BatchSize))); // Prepare updated variables foreach (var variable in variables.Values) { volumes[variable] = variable.Evaluate(session); gradients[variable] = variable.Derivate.Evaluate(session); } // Apply updated variables foreach (var variable in variables.Values) { var grad = gradients[variable]; var v = volumes[variable]; var variableV = session.Run(this._updaters[variable], new Dictionary <string, Volume <T> > { { "lr", this._learningRate }, { "grad", grad }, { "v", v } }, false); variable.Result.Storage.CopyFrom(variableV.Storage); variable.SetDirty(); } return(null); }
public void TestCast() { Assert.AreEqual(5, Ops.Cast <int, int>(5)); Assert.AreEqual(5L, Ops.Cast <int, long>(5)); Assert.AreEqual(5, Ops.Cast <long, int>(5)); Assert.AreEqual("abc", Ops.Cast <string, object>("abc")); Assert.AreEqual(new MyLong(5), Ops.Cast <int, MyLong>(5)); Assert.ThrowsException <NotSupportedException>(() => { Ops.Cast <string, long>("5"); }); { Assert.AreEqual((int)new BigInteger(12), Ops.Cast <BigInteger, int>(new BigInteger(12))); } }
protected override void TrainImplem() { var parametersAndGradients = this.Net.GetParametersAndGradients(); var isMomentumGreaterThanZero = Ops <T> .GreaterThan(this.Momentum, Ops <T> .Zero); // initialize lists for accumulators. Will only be done once on first iteration if (this.velocities.Count == 0) { foreach (var parameter in parametersAndGradients) { this.velocities.Add(BuilderInstance <T> .Volume.SameAs(parameter.Volume.Shape)); this.regGrads.Add(BuilderInstance <T> .Volume.SameAs(parameter.Volume.Shape)); } } // perform an update for all sets of weights for (var i = 0; i < parametersAndGradients.Count; i++) { var parametersAndGradient = parametersAndGradients[i]; var parameters = parametersAndGradient.Volume; var gradients = parametersAndGradient.Gradient; var velocity = this.velocities[i]; var batchAdjustedLearningRate = Ops <T> .Divide(this.LearningRate, Ops <T> .Cast(this.BatchSize)); // delta = gradient + regularization; gradients.Multiply(batchAdjustedLearningRate, gradients); if (isMomentumGreaterThanZero) { // sgd with momentum update velocity.Multiply(this.Momentum, velocity); // step velocity.Add(gradients, velocity); velocity.SubtractFrom(parameters, parameters); // apply corrected gradient } else { // vanilla sgd gradients.SubtractFrom(parameters, parameters); } // zero out gradient so that we can begin accumulating anew gradients.Clear(); } }
public override Volume <T> Evaluate(Session <T> session) { if (!this.IsDirty) { return(base.Evaluate(session)); } this.IsDirty = false; var y = this.Parents[1].Evaluate(session); var outputActivation = this.Parents[0].Evaluate(session); var loss = Ops <T> .Zero; for (var n = 0; n < y.Shape.Dimensions[3]; n++) { for (var d = 0; d < y.Shape.Dimensions[2]; d++) { for (var h = 0; h < y.Shape.Dimensions[1]; h++) { for (var w = 0; w < y.Shape.Dimensions[0]; w++) { var expected = y.Get(w, h, d, n); var actual = outputActivation.Get(w, h, d, n); if (Ops <T> .Zero.Equals(actual)) { actual = Ops <T> .Epsilon; } var current = Ops <T> .Multiply(expected, Ops <T> .Log(actual)); loss = Ops <T> .Add(loss, current); } } } } var batchSize = outputActivation.Shape.Dimensions[3]; loss = Ops <T> .Divide(Ops <T> .Negate(loss), Ops <T> .Cast(batchSize)); this.Result.Set(0, loss); return(base.Evaluate(session)); }
public override void Backward(Volume <T> y, out T loss) { y.DoSubtractFrom(this.OutputActivation, this.InputActivationGradients.ReShape(this.OutputActivation.Shape.Dimensions.ToArray())); if (this._result == null) { this._result = BuilderInstance <T> .Volume.SameAs(this.OutputActivation.Shape); this._sum = BuilderInstance <T> .Volume.SameAs(new Shape(1)); } this._sum.Clear(); this.OutputActivation.DoMultiply(this.OutputActivation, this._result); // dy * dy var half = (T)Convert.ChangeType(0.5, typeof(T)); this._result.DoMultiply(this._result, half); // dy * dy * 0.5 this._result.DoSum(this._sum); // sum over all batch var batchSize = y.Shape.GetDimension(3); loss = Ops <T> .Divide(this._sum.Get(0), Ops <T> .Cast(batchSize)); // average }
public override Volume <T> Evaluate(Session <T> session) { if (!this.IsDirty) { return(this.Result); } this.IsDirty = false; var y = this.Parents[0].Evaluate(session); if (this.Result == null) { this.Result = this._builder.SameAs(new Shape(4)); } this.Result.Set(0, Ops <T> .Cast(y.Shape.GetDimension(0))); this.Result.Set(1, Ops <T> .Cast(y.Shape.GetDimension(1))); this.Result.Set(2, Ops <T> .Cast(y.Shape.GetDimension(2))); this.Result.Set(3, Ops <T> .Cast(y.Shape.GetDimension(3))); return(this.Result); }
public override void Backward(Volume <T> y, out T loss) { var reshape = y.ReShape(new Shape(1, 1, -1, Shape.Keep)); var dy = this.InputActivationGradients.ReShape(this.OutputActivation.Shape.Dimensions); reshape.SubtractFrom(this.OutputActivation, dy); if (this._result == null) { this._result = BuilderInstance <T> .Volume.SameAs(this.OutputActivation.Shape); this._sum = BuilderInstance <T> .Volume.SameAs(new Shape(1)); } this._sum.Clear(); dy.Multiply(dy, this._result); // dy * dy var half = (T)Convert.ChangeType(0.5, typeof(T)); this._result.Multiply(half, this._result); // dy * dy * 0.5 this._result.Sum(this._sum); // sum over all batch var batchSize = y.Shape.Dimensions[3]; loss = Ops <T> .Divide(this._sum.Get(0), Ops <T> .Cast(batchSize)); // average }
public override Volume <T> Evaluate(Session <T> session) { var variables = session.LearnableVariables; var dico = new Dictionary <Variable <T>, Volume <T> >(); if (this._updaters.Count == 0) { foreach (var variable in variables.Values) { var lr = this._cns.PlaceHolder("lr"); // learning rate var grad = this._cns.PlaceHolder("grad"); // gradients var v = this._cns.PlaceHolder("v"); // volume this._updaters[variable] = v - grad * lr; } } this._learningRate.Set(0, Ops <T> .Divide(this._lr, Ops <T> .Cast(session.BatchSize))); // Prepare updated variables foreach (var variable in variables.Values) { var grad = variable.Derivate.Evaluate(session); var volume = variable.Evaluate(session); var gradBatchSize = grad.Shape.GetDimension(3); var volumeBatchSize = volume.Shape.GetDimension(3); if (gradBatchSize != volumeBatchSize && gradBatchSize != 1) { // Batch size > 1 var gradShape = new Shape(grad.Shape); gradShape.SetDimension(0, variable.Result.Shape.GetDimension(0)); gradShape.SetDimension(1, variable.Result.Shape.GetDimension(1)); gradShape.SetDimension(3, 1); Volume <T> tempGrad; if (!this._tempGrads.TryGetValue(variable, out tempGrad) || !tempGrad.Shape.Equals(gradShape)) { tempGrad = BuilderInstance <T> .Volume.SameAs(gradShape); this._tempGrads[variable] = tempGrad; } grad.DoSum(tempGrad); // sum gradient batch grad = tempGrad; } var variableV = session.Run(this._updaters[variable], new Dictionary <string, Volume <T> > { { "lr", this._learningRate }, { "grad", grad }, { "v", volume } }); dico[variable] = variableV; } // Apply updated variables foreach (var pair in dico) { pair.Key.Result.Storage.CopyFrom(pair.Value.Storage); } return(null); }
protected override void TrainImplem() { var parametersAndGradients = this.Net.GetParametersAndGradients(); // initialize lists for accumulators. Will only be done once on first iteration if (this.gsum.Count == 0) { foreach (var t in parametersAndGradients) { this.gsum.Add(BuilderInstance <T> .Volume.SameAs(t.Volume.Shape)); this.xsum.Add(BuilderInstance <T> .Volume.SameAs(t.Volume.Shape)); } } // perform an update for all sets of weights for (var i = 0; i < parametersAndGradients.Count; i++) { var parametersAndGradient = parametersAndGradients[i]; var vol = parametersAndGradient.Volume; var grad = parametersAndGradient.Gradient; grad.Multiply(Ops <T> .Divide(Ops <T> .One, Ops <T> .Cast(this.BatchSize)), grad); // grad *= 1 / BatchSize using (var temp1 = BuilderInstance <T> .Volume.SameAs(vol.Shape)) using (var temp2 = BuilderInstance <T> .Volume.SameAs(vol.Shape)) using (var gradgrad = BuilderInstance <T> .Volume.SameAs(vol.Shape)) using (var two = BuilderInstance <T> .Volume.From(new[] { Ops <T> .Cast(2.0) }, new Shape(1))) using (var epsilon = BuilderInstance <T> .Volume.From(new[] { this.Eps }, new Shape(1))) { // momentum update // update biased first moment estimate: gsum[i] = gsum[i] * Beta1 + (1 - Beta1) * grad this.gsum[i].Multiply(this.Beta1, temp1); // temp1 = this.gsum[i] * this.Beta1 grad.Multiply(Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(this.Beta1)), this.gsum[i]); // this.gsum[i] = grad * (1 - Beta1) temp1.Add(this.gsum[i]); // this.gsum[i] += temp1 grad.Power(two, gradgrad); // gradgrad = grad * grad // update biased second moment estimate: xsum[i] = xsum[i] * Beta2 + (1 - Beta2) * grad * grad this.xsum[i].Multiply(this.Beta2, temp1); // temp1 = this.xsum[i] * this.Beta2 gradgrad.Multiply(Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(this.Beta2)), this.xsum[i]); // temp2 = gradgrad * (1 - Beta2) temp1.Add(this.xsum[i]); // this.xsum[i] += temp1 var biasCorr1 = temp1; var biasCorr2 = temp2; this.gsum[i].Multiply(Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(Ops <T> .Pow(this.Beta1, Ops <T> .Cast(this.k)))), biasCorr1); // correct bias first moment estimate this.xsum[i].Multiply(Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(Ops <T> .Pow(this.Beta2, Ops <T> .Cast(this.k)))), biasCorr2); // correct bias second moment estimate biasCorr2.Sqrt(biasCorr2); // biasCorr2 = sqrt(biasCorr2) epsilon.Add(biasCorr2); // biasCorr2 += epsilon var dx = biasCorr1; dx.Multiply(this.LearningRate, dx); dx.Divide(biasCorr2, dx); dx.SubtractFrom(vol, vol); } grad.Clear(); // zero out gradient so that we can begin accumulating anew this.k += this.BatchSize; } }
protected override void TrainImplem() { var parametersAndGradients = this.Net.GetParametersAndGradients(); // initialize lists for accumulators. Will only be done once on first iteration if (this.gsum.Count == 0) { foreach (var t in parametersAndGradients) { this.gsum.Add(BuilderInstance <T> .Volume.SameAs(t.Volume.Shape)); this.xsum.Add(BuilderInstance <T> .Volume.SameAs(t.Volume.Shape)); } } var factor = Ops <T> .Divide(Ops <T> .One, Ops <T> .Cast(this.BatchSize)); // perform an update for all sets of weights for (var i = 0; i < parametersAndGradients.Count; i++) { var parametersAndGradient = parametersAndGradients[i]; var vol = parametersAndGradient.Volume; var grad = parametersAndGradient.Gradient; // learning rate for some parameters. var l2DecayMul = parametersAndGradient.L2DecayMul ?? Ops <T> .One; var l1DecayMul = parametersAndGradient.L1DecayMul ?? Ops <T> .One; var l2Decay = Ops <T> .Multiply(this.L2Decay, l2DecayMul); var l1Decay = Ops <T> .Multiply(this.L1Decay, l1DecayMul); // this.L2DecayLoss += l2Decay * vol.Get(j) * vol.Get(j) / 2; // accumulate weight decay loss // this.L1DecayLoss += l1Decay * Math.Abs(vol.Get(j)); var l1Grad = vol.Clone(); l1Grad.MapInplace(x => Ops <T> .GreaterThan(x, Ops <T> .Zero) ? Ops <T> .One : Ops <T> .Negate(Ops <T> .One)); l1Grad = l1Grad * l1Decay; var l2Grad = vol * l2Decay; var gij = (grad + l2Grad + l1Grad) * factor; // momentum update this.gsum[i] = this.gsum[i] * this.Beta1 + gij * Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(this.Beta1)); // update biased first moment estimate var gijgij = gij.Clone(); gijgij.MapInplace(x => Ops <T> .Multiply(x, x)); this.xsum[i] = this.xsum[i] * this.Beta2 + gijgij * Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(this.Beta2)); // update biased second moment estimate var biasCorr1 = this.gsum[i] * Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(Ops <T> .Pow(this.Beta1, Ops <T> .Cast(this.k)))); // correct bias first moment estimate var biasCorr2 = this.xsum[i] * Ops <T> .Add(Ops <T> .One, Ops <T> .Negate(Ops <T> .Pow(this.Beta2, Ops <T> .Cast(this.k)))); // correct bias second moment estimate biasCorr2.MapInplace(x => Ops <T> .Add(Ops <T> .Sqrt(x), this.Eps)); var dx = biasCorr1 * this.LearningRate; dx.MapInplace((l, r) => Ops <T> .Divide(l, r), biasCorr2); vol.MapInplace((v, d) => d, vol - dx); // apply corrected gradient grad.Clear(); // zero out gradient so that we can begin accumulating anew } this.k += this.BatchSize; }
public Var <T> Cast <T>() => Ops.Cast <E, T>(Value);
protected override void TrainImplem() { var parametersAndGradients = this.Net.GetParametersAndGradients(); var isMomentumGreaterThanZero = Ops <T> .GreaterThan(this.Momentum, Ops <T> .Zero); // initialize lists for accumulators. Will only be done once on first iteration if (this.velocities.Count == 0) { foreach (var parameter in parametersAndGradients) { this.velocities.Add(BuilderInstance <T> .Volume.SameAs(parameter.Volume.Shape)); this.deltas.Add(BuilderInstance <T> .Volume.SameAs(parameter.Volume.Shape)); this.regGrads.Add(BuilderInstance <T> .Volume.SameAs(parameter.Volume.Shape)); } } // perform an update for all sets of weights for (var i = 0; i < parametersAndGradients.Count; i++) { var parametersAndGradient = parametersAndGradients[i]; var parameters = parametersAndGradient.Volume; var gradients = parametersAndGradient.Gradient; var delta = this.deltas[i]; var regularizationGradients = this.regGrads[i]; var velocity = this.velocities[i]; // learning rate for some parameters. var l2DecayMul = parametersAndGradient.L2DecayMul ?? Ops <T> .One; var l1DecayMul = parametersAndGradient.L1DecayMul ?? Ops <T> .One; var l2Decay = Ops <T> .Multiply(this.L2Decay, l2DecayMul); var l1Decay = Ops <T> .Multiply(this.L1Decay, l1DecayMul); // this.L2DecayLoss += l2Decay * vol.Get(j) * vol.Get(j) / 2; // accumulate weight decay loss // this.L1DecayLoss += l1Decay * Math.Abs(vol.Get(j)); //L1 regularization if (Ops <T> .GreaterThan(l1Decay, Ops <T> .Zero)) { //l1Grad = l1Grad * l1Decay; parameters.Storage.Map(x => Ops <T> .GreaterThan(x, Ops <T> .Zero) ? Ops <T> .One : Ops <T> .Negate(Ops <T> .One), regularizationGradients.Storage); regularizationGradients.DoMultiply(delta, l1Decay); } else { delta.Clear(); } //L2 regularization if (Ops <T> .GreaterThan(l2Decay, Ops <T> .Zero)) { //l2Grad = vol * l2Decay; parameters.DoMultiply(regularizationGradients, l2Decay); delta.DoAdd(regularizationGradients, delta); } T batchAdjustedLearningRate = Ops <T> .Divide(this.LearningRate, Ops <T> .Cast(this.BatchSize)); //delta = gradient + regularization; gradients.DoMultiply(gradients, batchAdjustedLearningRate); delta.DoMultiply(delta, this.LearningRate); delta.DoAdd(gradients, delta); if (isMomentumGreaterThanZero) { // sgd with momentum update velocity.DoMultiply(velocity, this.Momentum); // step velocity.DoAdd(delta, velocity); velocity.DoSubtractFrom(parameters, parameters); // apply corrected gradient } else { // vanilla sgd delta.DoSubtractFrom(parameters, parameters); } // zero out gradient so that we can begin accumulating anew gradients.Clear(); } }
public override Volume <T> Evaluate(Session <T> session) { var variables = session.LearnableVariables; var volumes = new Dictionary <Variable <T>, Volume <T> >(); var gradients = new Dictionary <Variable <T>, Volume <T> >(); if (this._updaters.Count == 0) { foreach (var variable in variables.Values) { var one = this.Graph.Const(Ops <T> .One, "one"); var epsilon = this.Graph.PlaceHolder("epsilon"); var beta1 = this.Graph.PlaceHolder("beta1"); var beta2 = this.Graph.PlaceHolder("beta2"); var m = this.Graph.Variable(Ops <T> .Zero, "m"); var v = this.Graph.Variable(Ops <T> .Zero, "v"); var t = this.Graph.Variable(Ops <T> .Zero, "t"); var grad = this.Graph.PlaceHolder("grad"); // gradients var learningRate = this.Graph.PlaceHolder("lr"); // learning rate var m_t = this.Graph.Assign(m, beta1 * m + (one - beta1) * grad); // m_t <- beta1 * m_{t-1} + (1 - beta1) * g //m_t.Evaluated += (sender, args) => { Console.WriteLine($"m[{variable}]={ ((Op<T>)sender).Result.Get(0)}"); }; var v_t = this.Graph.Assign(v, beta2 * v + (one - beta2) * grad * grad); // beta2 * v_{t-1} + (1 - beta2) * g * g //v_t.Evaluated += (sender, args) => { Console.WriteLine($"v[{variable}]={ ((Op<T>)sender).Result.Get(0)}"); }; var t_plus_1 = this.Graph.Assign(t, t + one); // t = t + 1 //t_plus_1.Evaluated += (sender, args) => { Console.WriteLine($"t[{variable}]={ ((Op<T>)sender).Result.Get(0)}"); }; var lr = learningRate * this.Graph.Sqrt(one - (beta2 ^ t_plus_1)) / (one - (beta1 ^ t_plus_1)); // lr_t <- learning_rate * sqrt(1 - beta2^t) / (1 - beta1^t) //lr.Evaluated += (sender, args) => { Console.WriteLine($"lr[{variable}]={ ((Op<T>)sender).Result.Get(0)}"); }; var vol = this.Graph.PlaceHolder("vol"); var delta = lr * (m_t / (this.Graph.Sqrt(v_t) + epsilon)); //delta.Evaluated += (sender, args) => { Console.WriteLine($"delta[{variable}]={ ((Op<T>)sender).Result.Get(0)}"); }; this._updaters[variable] = vol - delta; } } this._learningRate.Set(0, Ops <T> .Divide(this._lr, Ops <T> .Cast(session.BatchSize))); // Prepare updated variables foreach (var variable in variables.Values) { volumes[variable] = variable.Evaluate(session); gradients[variable] = variable.Derivate.Evaluate(session); } // Apply updated variables foreach (var variable in variables.Values) { var grad = gradients[variable]; var v = volumes[variable]; var variableV = session.Run(this._updaters[variable], new Dictionary <string, Volume <T> > { { "epsilon", this._epsilon }, { "beta1", this._beta1 }, { "beta2", this._beta2 }, { "lr", this._learningRate }, { "grad", grad }, { "vol", v } }, false); variable.Result.Storage.CopyFrom(variableV.Storage); variable.SetDirty(); // Console.WriteLine("-----------------"); } return(null); }