public override void Update(int iteration, BaseLayer layer) { if (DecayRate > 0) { LearningRate = LearningRate * (1 / 1 + DecayRate * iteration); } float t = iteration + 1; float lr_t = Convert.ToSingle(LearningRate / Math.Sqrt(1f - Math.Pow(Beta1, t))); foreach (var item in layer.Params) { var param = item.Value; if (!ms.ContainsKey(param.Name)) { ms[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, param.Data.Shape); us[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, param.Data.Shape); var m_t = (Beta1 * ms[param.Name]) + (1 - Beta1) * param.Grad; var u_t = TOps.Maximum((Beta2 * us[param.Name]), Abs(param.Grad)); param.Data = param.Data - lr_t * m_t / (u_t + EPSILON); ms[param.Name] = m_t; us[param.Name] = u_t; param.ApplyConstraint(); } } }
public void Exponential(NDArray result, int?seed, float lambda) { using (var cpuCopy = new NDArray(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Exponential(cpuCopy, seed, lambda); TOps.Copy(result, cpuCopy); } }
public void Normal(NDArray result, int?seed, float mean, float stdv) { using (var cpuCopy = new NDArray(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Normal(cpuCopy, seed, mean, stdv); TOps.Copy(result, cpuCopy); } }
public void Uniform(NDArray result, int?seed, float min, float max) { using (var cpuCopy = new NDArray(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Uniform(cpuCopy, seed, min, max); TOps.Copy(result, cpuCopy); } }
public void Bernoulli(NDArray result, int?seed, float p) { using (var cpuCopy = new NDArray(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Bernoulli(cpuCopy, seed, p); TOps.Copy(result, cpuCopy); } }
public void Cauchy(NDArray result, int?seed, float median, float sigma) { using (var cpuCopy = new NDArray(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Cauchy(cpuCopy, seed, median, sigma); TOps.Copy(result, cpuCopy); } }
public void Geometric(Tensor result, int?seed, float p) { using (var cpuCopy = new Tensor(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.Geometric(cpuCopy, seed, p); TOps.Copy(result, cpuCopy); } }
public void LogNormal(Tensor result, int?seed, float mean, float stdv) { using (var cpuCopy = new Tensor(cpuAllocator, result.ElementType, result.Shape)) { cpuRandom.LogNormal(cpuCopy, seed, mean, stdv); TOps.Copy(result, cpuCopy); } }
public void Fit(IFrameIter train, int epochs, int batchSize, IFrameIter val = null) { DateTime start = DateTime.Now; List <float> train_losses = new List <float>(); List <float> train_metrics = new List <float>(); List <float> val_losses = new List <float>(); List <float> val_metrics = new List <float>(); train.SetBatchSize(batchSize); for (int iteration = 1; iteration <= epochs; iteration++) { train.Reset(); while (train.Next()) { var(x, y) = train.GetBatch(); using (Variable pred = Forward(x)) using (Tensor lossVal = LossFn.Call(pred.Data, y)) using (Tensor grad = LossFn.CalcGrad(pred.Data, y)) using (Tensor reg_loss = ApplyRegularizer(lossVal)) { //var metricVal = MetricFn.Call(pred.Data, y); train_losses.Add(reg_loss.TVar().ToScalar().Evaluate()); //train_metrics.Add(metricVal.ToScalar().Evaluate()); Backward(grad); ApplyDeltaRegularizer(); foreach (var layer in Layers) { OptimizerFn.Update(iteration, layer); } } x.Dispose(); y.Dispose(); } if (val != null) { while (val.Next()) { var(x, y) = val.GetBatch(); var pred = Forward(x); var lossVal = LossFn.Call(pred.Data, y); var metricVal = MetricFn.Call(pred.Data, y); val_losses.Add(TOps.MeanF(lossVal)); val_metrics.Add(TOps.MeanF(metricVal)); } } Console.WriteLine("Epoch: {0}, Loss: {1}", iteration, train_losses.Average()); } }
public override void Update(int iteration, BaseLayer layer) { if (DecayRate > 0) { LearningRate = LearningRate * (1 / 1 + DecayRate * iteration); } float t = iteration + 1; float lr_t = Convert.ToSingle(LearningRate * Math.Sqrt(1f - Math.Pow(Beta2, t)) / (1f - Math.Pow(Beta1, t))); foreach (var item in layer.Params) { var param = item.Value; if (!ms.ContainsKey(param.Name)) { ms[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, param.Data.Shape); vs[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, param.Data.Shape); if (AmsGrad) { vhats[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, param.Data.Shape); } else { vhats[param.Name] = Tensor.Constant(0, Global.Device, DType.Float32, 1); } var m_t = (Beta1 * ms[param.Name]) + (1 - Beta1) * param.Grad; var v_t = (Beta2 * vs[param.Name]) + (1 - Beta2) * Square(param.Grad); if (AmsGrad) { Tensor vhat_t = TOps.Maximum(vhats[param.Name], v_t); param.Data = param.Data - lr_t * m_t / (Sqrt(vhat_t) + EPSILON); vhats[param.Name] = vhat_t; } else { param.Data = param.Data - lr_t * m_t / (Sqrt(v_t) + EPSILON); } ms[param.Name] = m_t; vs[param.Name] = v_t; param.ApplyConstraint(); } } }
static void Main(string[] args) { Global.UseGpu(); Tensor x = Tensor.FromArray(Global.Device, new float[] { 1, 2, 3, 4, 5, 6, 7, 8, 9 }); x = x.Reshape(3, 3); var result = TOps.Diag(x); result.Print(); string datasetFolder = @"C:\dataset\MNIST"; bool useDenseModel = false; var((trainX, trainY), (valX, valY)) = MNISTParser.LoadDataSet(datasetFolder, trainCount: 60000, testCount: 10000, flatten: useDenseModel); Console.WriteLine("Train and Test data loaded"); DataFrameIter trainIter = new DataFrameIter(trainX, trainY); DataFrameIter valIter = new DataFrameIter(valX, valY); Sequential model = null; if (useDenseModel) { model = BuildFCModel(); } else { model = BuildConvModel(); } model.Compile(OptimizerType.Adam, LossType.CategorialCrossEntropy, MetricType.Accuracy); Console.WriteLine("Model compiled.. initiating training"); model.EpochEnd += Model_EpochEnd; model.Train(trainIter, 10, 32, valIter); Console.ReadLine(); }