Esempio n. 1
0
        private static double evaluate(TorchTensor eval_data, TransformerModel model, Loss criterion, double lr, int bptt, int ntokens, Optimizer optimizer)
        {
            model.Eval();

            var total_loss = 0.0f;
            var src_mask   = model.GenerateSquareSubsequentMask(bptt);
            var batch      = 0;

            for (int i = 0; i < eval_data.shape[0] - 1; batch++, i += bptt)
            {
                var(data, targets) = GetBatch(eval_data, i, bptt);
                if (data.shape[0] != bptt)
                {
                    src_mask.Dispose();
                    src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                }
                var output = model.forward(data, src_mask);
                var loss   = criterion(output.view(-1, ntokens), targets);
                total_loss += data.shape[0] * loss.to(Device.CPU).DataItem <float>();

                data.Dispose();
                targets.Dispose();

                GC.Collect();
            }

            return(total_loss / eval_data.shape[0]);
        }
Esempio n. 2
0
        private static double evaluate(Tensor eval_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
        {
            model.eval();

            using (var d = torch.NewDisposeScope()) {
                var src_mask = model.GenerateSquareSubsequentMask(bptt);

                var total_loss = 0.0f;
                var batch      = 0;


                for (int i = 0; i < eval_data.shape[0] - 1; batch++, i += bptt)
                {
                    var(data, targets) = GetBatch(eval_data, i, bptt);
                    if (data.shape[0] != bptt)
                    {
                        src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                    }
                    using (var output = model.forward(data, src_mask)) {
                        var loss = criterion(output.view(-1, ntokens), targets);
                        total_loss += data.shape[0] * loss.to(torch.CPU).item <float>();
                    }

                    data.Dispose();
                    targets.Dispose();

                    d.DisposeEverythingBut(src_mask);
                }

                return(total_loss / eval_data.shape[0]);
            }
        }
Esempio n. 3
0
        private static void train(int epoch, Tensor train_data, TransformerModel model, Loss criterion, int bptt, int ntokens, torch.optim.Optimizer optimizer)
        {
            model.train();

            using (var d = torch.NewDisposeScope()) {
                var total_loss = 0.0f;

                var batch        = 0;
                var log_interval = 200;

                var src_mask = model.GenerateSquareSubsequentMask(bptt);

                var tdlen = train_data.shape[0];


                for (int i = 0; i < tdlen - 1; batch++, i += bptt)
                {
                    var(data, targets) = GetBatch(train_data, i, bptt);
                    optimizer.zero_grad();

                    if (data.shape[0] != bptt)
                    {
                        src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                    }

                    using (var output = model.forward(data, src_mask)) {
                        var loss = criterion(output.view(-1, ntokens), targets);
                        loss.backward();
                        torch.nn.utils.clip_grad_norm_(model.parameters().ToArray(), 0.5);
                        optimizer.step();

                        total_loss += loss.to(torch.CPU).item <float>();
                    }

                    if (batch % log_interval == 0 && batch > 0)
                    {
                        var cur_loss = total_loss / log_interval;
                        Console.WriteLine($"epoch: {epoch} | batch: {batch} / {tdlen / bptt} | loss: {cur_loss:0.00}");
                        total_loss = 0;
                    }

                    d.DisposeEverythingBut(src_mask);
                }
            }
        }
Esempio n. 4
0
        private static void train(int epoch, TorchTensor train_data, TransformerModel model, Loss criterion, int bptt, int ntokens, Optimizer optimizer)
        {
            model.Train();

            var total_loss = 0.0f;

            var src_mask = model.GenerateSquareSubsequentMask(bptt);

            var batch        = 0;
            var log_interval = 200;

            var tdlen = train_data.shape[0];

            for (int i = 0; i < tdlen - 1; batch++, i += bptt)
            {
                var(data, targets) = GetBatch(train_data, i, bptt);
                optimizer.zero_grad();

                if (data.shape[0] != bptt)
                {
                    src_mask.Dispose();
                    src_mask = model.GenerateSquareSubsequentMask(data.shape[0]);
                }

                var output = model.forward(data, src_mask);
                var loss   = criterion(output.view(-1, ntokens), targets);
                {
                    loss.backward();
                    model.parameters().clip_grad_norm(0.5);
                    optimizer.step();

                    total_loss += loss.to(Device.CPU).DataItem <float>();
                }

                GC.Collect();

                if (batch % log_interval == 0 && batch > 0)
                {
                    var cur_loss = total_loss / log_interval;
                    Console.WriteLine($"epoch: {epoch} | batch: {batch} / {tdlen/bptt} | loss: {cur_loss:0.00}");
                    total_loss = 0;
                }
            }
        }