Esempio n. 1
0
        private static double evaluate(Tensor eval_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
        {
            model.eval();

            using (var d = torch.NewDisposeScope()) {
                var src_mask = model.GenerateSquareSubsequentMask(bptt);

                var total_loss = 0.0f;
                var batch      = 0;


                for (int i = 0; i < eval_data.shape[0] - 1; batch++, i += bptt)
                {
                    var(data, targets) = GetBatch(eval_data, i, bptt);
                    if (data.shape[0] != bptt)
                    {
                        src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                    }
                    using (var output = model.forward(data, src_mask)) {
                        var loss = criterion(output.view(-1, ntokens), targets);
                        total_loss += data.shape[0] * loss.to(torch.CPU).item <float>();
                    }

                    data.Dispose();
                    targets.Dispose();

                    d.DisposeEverythingBut(src_mask);
                }

                return(total_loss / eval_data.shape[0]);
            }
        }
Esempio n. 2
0
 private static void Train(
     Model model,
     torch.optim.Optimizer optimizer,
     Loss loss,
     Device device,
     IEnumerable <(Tensor, Tensor)> dataLoader,
Esempio n. 3
0
        private static void train(int epoch, Tensor train_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
        {
            model.train();

            using (var d = torch.NewDisposeScope()) {
                var total_loss = 0.0f;

                var batch        = 0;
                var log_interval = 200;

                var src_mask = model.GenerateSquareSubsequentMask(bptt);

                var tdlen = train_data.shape[0];


                for (int i = 0; i < tdlen - 1; batch++, i += bptt)
                {
                    var(data, targets) = GetBatch(train_data, i, bptt);
                    optimizer.zero_grad();

                    if (data.shape[0] != bptt)
                    {
                        src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                    }

                    using (var output = model.forward(data, src_mask)) {
                        var loss = criterion(output.view(-1, ntokens), targets);
                        loss.backward();
                        torch.nn.utils.clip_grad_norm_(model.parameters().ToArray(), 0.5);
                        optimizer.step();

                        total_loss += loss.to(torch.CPU).item <float>();
                    }

                    if (batch % log_interval == 0 && batch > 0)
                    {
                        var cur_loss = total_loss / log_interval;
                        Console.WriteLine($"epoch: {epoch} | batch: {batch} / {tdlen / bptt} | loss: {cur_loss:0.00}");
                        total_loss = 0;
                    }

                    d.DisposeEverythingBut(src_mask);
                }
            }
        }