Exemple #1
0
            public double RunEpoch(int[] data, double learningRate = 1.0, bool verbose = false)
            {
                var cfg        = Config;
                var isTraining = IsTraining;
                var epochSize  = (data.Length / cfg.BatchSize - 1) / cfg.NumSteps;
                var time       = Stopwatch.StartNew();
                var costs      = 0.0;
                var iters      = 0;
                var step       = 0;
                var firstBatch = true;

                foreach (var batch in Data.Iterator(data, cfg.NumSteps, cfg.BatchSize))
                {
                    Optimizer.AssignTensor(Inputs, batch.Inputs.AsTensor());
                    Optimizer.AssignTensor(Targets, batch.Targets.AsTensor());

                    if (firstBatch)
                    {
                        ResetStates();
                        firstBatch = false;
                    }
                    else
                    {
                        CopyStates();
                    }

                    Optimizer.Forward();

                    if (isTraining)
                    {
                        Optimizer.Backward();
                        Optimizer.Optimize(learningRate);
                    }

                    var loss = Optimizer.GetTensor(Loss.Loss).ToScalar();
                    var cost = loss / cfg.BatchSize;
                    costs += cost;
                    iters += cfg.NumSteps;

                    if (Profiling || (verbose && (step % (epochSize / 10) == 10)))
                    {
                        var perplexity = Math.Exp(costs / iters);
                        var wps        = (iters * cfg.BatchSize) / (time.Elapsed.TotalMilliseconds / 1000.0);

                        Console.WriteLine($"{step:D4}: {step * 1.0 / epochSize:F3} perplexity: {perplexity:F3} speed:{wps:F0} wps cost: {cost:F3}");
                    }

                    if (Profiling && step > 5)
                    {
                        break;
                    }

                    step++;
                }
                return(Math.Exp(costs / iters));
            }
Exemple #2
0
 public void CopyStates()
 {
     if (UsingCuDnn)
     {
         Optimizer.AssignTensor(RnnAccelerated.CX, Optimizer.GetTensor(RnnAccelerated.CY));
         Optimizer.AssignTensor(RnnAccelerated.HX, Optimizer.GetTensor(RnnAccelerated.HY));
     }
     else
     {
         for (var i = 0; i < Config.NumLayers; ++i)
         {
             var lstm = RnnDirect[i];
             Optimizer.AssignTensor(lstm.CX, Optimizer.GetTensor(lstm.CY));
             Optimizer.AssignTensor(lstm.HX, Optimizer.GetTensor(lstm.HY));
         }
     }
 }
Exemple #3
0
 public void ResetStates()
 {
     if (UsingCuDnn)
     {
         Optimizer.AssignTensor(RnnAccelerated.CX, Fill(Shape.Create(RnnAccelerated.CX.Shape.AsArray), 0.0f));
         Optimizer.AssignTensor(RnnAccelerated.HX, Fill(Shape.Create(RnnAccelerated.HX.Shape.AsArray), 0.0f));
     }
     else
     {
         for (var i = 0; i < Config.NumLayers; ++i)
         {
             var lstm  = RnnDirect[i];
             var shape = Shape.Create(Config.BatchSize, lstm.HiddenSize);
             Optimizer.AssignTensor(lstm.CX, Fill(shape, 0.0f));
             Optimizer.AssignTensor(lstm.HX, Fill(shape, 0.0f));
         }
     }
 }
Exemple #4
0
 public void CopyWeightsFrom(Model o)
 {
     Optimizer.AssignTensor(Embedding.Weights, o.Optimizer.GetTensor(o.Embedding.Weights));
     Optimizer.AssignTensor(FC.Weights, o.Optimizer.GetTensor(o.FC.Weights));
     Optimizer.AssignTensor(FC.Bias, o.Optimizer.GetTensor(o.FC.Bias));
     if (UsingCuDnn)
     {
         Util.EnsureTrue(o.UsingCuDnn);
         Optimizer.AssignTensor(RnnAccelerated.W, o.Optimizer.GetTensor(o.RnnAccelerated.W));
     }
     else
     {
         Util.EnsureTrue(!o.UsingCuDnn);
         for (var i = 0; i < Config.NumLayers; ++i)
         {
             Optimizer.AssignTensor(RnnDirect[i].W, o.Optimizer.GetTensor(o.RnnDirect[i].W));
         }
     }
 }