public double RunEpoch(int[] data, double learningRate = 1.0, bool verbose = false) { var cfg = Config; var isTraining = IsTraining; var epochSize = (data.Length / cfg.BatchSize - 1) / cfg.NumSteps; var time = Stopwatch.StartNew(); var costs = 0.0; var iters = 0; var step = 0; var firstBatch = true; foreach (var batch in Data.Iterator(data, cfg.NumSteps, cfg.BatchSize)) { Optimizer.AssignTensor(Inputs, batch.Inputs.AsTensor()); Optimizer.AssignTensor(Targets, batch.Targets.AsTensor()); if (firstBatch) { ResetStates(); firstBatch = false; } else { CopyStates(); } Optimizer.Forward(); if (isTraining) { Optimizer.Backward(); Optimizer.Optimize(learningRate); } var loss = Optimizer.GetTensor(Loss.Loss).ToScalar(); var cost = loss / cfg.BatchSize; costs += cost; iters += cfg.NumSteps; if (Profiling || (verbose && (step % (epochSize / 10) == 10))) { var perplexity = Math.Exp(costs / iters); var wps = (iters * cfg.BatchSize) / (time.Elapsed.TotalMilliseconds / 1000.0); Console.WriteLine($"{step:D4}: {step * 1.0 / epochSize:F3} perplexity: {perplexity:F3} speed:{wps:F0} wps cost: {cost:F3}"); } if (Profiling && step > 5) { break; } step++; } return(Math.Exp(costs / iters)); }
public void CopyStates() { if (UsingCuDnn) { Optimizer.AssignTensor(RnnAccelerated.CX, Optimizer.GetTensor(RnnAccelerated.CY)); Optimizer.AssignTensor(RnnAccelerated.HX, Optimizer.GetTensor(RnnAccelerated.HY)); } else { for (var i = 0; i < Config.NumLayers; ++i) { var lstm = RnnDirect[i]; Optimizer.AssignTensor(lstm.CX, Optimizer.GetTensor(lstm.CY)); Optimizer.AssignTensor(lstm.HX, Optimizer.GetTensor(lstm.HY)); } } }
public void ResetStates() { if (UsingCuDnn) { Optimizer.AssignTensor(RnnAccelerated.CX, Fill(Shape.Create(RnnAccelerated.CX.Shape.AsArray), 0.0f)); Optimizer.AssignTensor(RnnAccelerated.HX, Fill(Shape.Create(RnnAccelerated.HX.Shape.AsArray), 0.0f)); } else { for (var i = 0; i < Config.NumLayers; ++i) { var lstm = RnnDirect[i]; var shape = Shape.Create(Config.BatchSize, lstm.HiddenSize); Optimizer.AssignTensor(lstm.CX, Fill(shape, 0.0f)); Optimizer.AssignTensor(lstm.HX, Fill(shape, 0.0f)); } } }
public void CopyWeightsFrom(Model o) { Optimizer.AssignTensor(Embedding.Weights, o.Optimizer.GetTensor(o.Embedding.Weights)); Optimizer.AssignTensor(FC.Weights, o.Optimizer.GetTensor(o.FC.Weights)); Optimizer.AssignTensor(FC.Bias, o.Optimizer.GetTensor(o.FC.Bias)); if (UsingCuDnn) { Util.EnsureTrue(o.UsingCuDnn); Optimizer.AssignTensor(RnnAccelerated.W, o.Optimizer.GetTensor(o.RnnAccelerated.W)); } else { Util.EnsureTrue(!o.UsingCuDnn); for (var i = 0; i < Config.NumLayers; ++i) { Optimizer.AssignTensor(RnnDirect[i].W, o.Optimizer.GetTensor(o.RnnDirect[i].W)); } } }