Esempio n. 1
0
        object callImpl(IGraphNodeBase inputs, dynamic training)
        {
            IGraphNodeBase result = inputs;

            var batchNormExtraArgs = new PythonDict <string, object>();

            if (training != null)
            {
                batchNormExtraArgs["training"] = training;
            }

            for (int part = 0; part < PartCount; part++)
            {
                result = this.convs[part].apply(result);
                result = this.batchNorms[part].apply(result, kwargs: batchNormExtraArgs);
                if (part + 1 != PartCount)
                {
                    result = tf.nn.relu(result);
                }
            }

            result += (Tensor)result + inputs;

            return(tf.nn.relu(result));
        }
        public string Sample(Session session, dynamic chars, IReadOnlyDictionary <char, int> vocabulary, int num = 200, string prime = "The ", int samplingType = 1)
        {
            dynamic state = this.CreateInitialState(session, vocabulary, prime);

            int WeightedPick(IEnumerable <float32> weights)
            {
                double[] sums = weights.Aggregate((sum: 0.0, sums: new List <double>()),
                                                  (acc, value) => {
                    acc.sum += (double)value; acc.sums.Add(acc.sum);
                    return(acc.sum, acc.sums);
                }).sums.ToArray();
                int index = Array.BinarySearch(sums, this.random.NextDouble() * sums.Last());

                return(index < 0 ? ~index : index);
            }

            string ret = prime;
            char   chr = prime.Last();

            for (int i = 0; i < num; i++)
            {
                var x = np.zeros(new TensorShape(1, 1));
                x[0, 0] = vocabulary[chr];
                var feed = new PythonDict <dynamic, dynamic> {
                    [this.inputData]    = x,
                    [this.initialState] = state,
                };
                var outputs = session.run(new dynamic[] { this.probs, this.finalState }, feed);
                var probs   = outputs[0];
                state = outputs[1];
                ndarray computedProbabilities = probs[0];

                dynamic sample;
                switch (samplingType)
                {
                case 1:
                case 2 when chr == ' ':
                    sample = WeightedPick(computedProbabilities.Cast <ndarray>().SelectMany(s => s.Cast <float32>()));
                    break;

                case 0:
                case 2:
                    sample = computedProbabilities.argmax();
                    break;

                default:
                    throw new NotSupportedException();
                }

                var pred = chars[sample];
                ret += pred;
                chr  = pred;
            }
            return(ret);
        }
Esempio n. 3
0
        public static HParams LoadHParams(string modelName)
        {
            var    hParams            = DefaultHParams;
            string paramsOverridePath = Path.Combine("models", modelName, "hparams.json");
            var    overrides          = JsonConvert.DeserializeObject <Dictionary <string, object> >(File.ReadAllText(paramsOverridePath));
            var    pyDict             = new PythonDict <object, object>();

            foreach (var entry in overrides)
            {
                pyDict.Add(entry.Key, entry.Value);
            }
            hParams.override_from_dict(pyDict);
            return(hParams);
        }
        private dynamic CreateInitialState(Session session, IReadOnlyDictionary <char, int> vocabulary, string prime)
        {
            var state = session.run(this.rnn.zero_state(1, tf.float32));

            foreach (char chr in prime.Substring(0, prime.Length - 1))
            {
                var x = np.zeros(new TensorShape(1, 1));
                x[0, 0] = vocabulary[chr];
                var feed = new PythonDict <dynamic, dynamic> {
                    [this.inputData]    = x,
                    [this.initialState] = state,
                };
                state = Enumerable.First(session.run(new dynamic[] { this.finalState }, feed));
            }

            return(state);
        }
        static void Main()
        {
            GradientLog.OutputWriter = Console.Out;
            GradientSetup.UseEnvironmentFromVariable();

            var input  = tf.placeholder(tf.float32, new TensorShape(null, 1), name: "x");
            var output = tf.placeholder(tf.float32, new TensorShape(null, 1), name: "y");

            var hiddenLayer = tf.layers.dense(input, hiddenSize,
                                              activation: tf.sigmoid_fn,
                                              kernel_initializer: new ones_initializer(),
                                              bias_initializer: new random_uniform_initializer(minval: -x1, maxval: -x0),
                                              name: "hidden");

            var model = tf.layers.dense(hiddenLayer, units: 1, name: "output");

            var cost = tf.losses.mean_squared_error(output, model);

            var training = new GradientDescentOptimizer(learning_rate: learningRate).minimize(cost);

            dynamic init = tf.global_variables_initializer();

            new Session().UseSelf(session => {
                session.run(new[] { init });

                foreach (int iteration in Enumerable.Range(0, iterations))
                {
                    var(trainInputs, trainOutputs) = GenerateTestValues();
                    var iterationDataset           = new PythonDict <dynamic, object> {
                        [input]  = trainInputs,
                        [output] = trainOutputs,
                    };
                    session.run(new[] { training }, feed_dict: iterationDataset);

                    if (iteration % 100 == 99)
                    {
                        Console.WriteLine($"cost = {session.run(new[] { cost }, feed_dict: iterationDataset)}");
                    }
                }

                var(testInputs, testOutputs) = GenerateTestValues();

                var testValues = session.run(new[] { model }, feed_dict: new PythonDict <dynamic, object> {
                    [input] = testInputs,
                });

                new variable_scope("hidden", reuse: true).UseSelf(_ => {
                    Variable w = tf.get_variable("kernel");
                    Variable b = tf.get_variable("bias");
                    Console.WriteLine("hidden:");
                    Console.WriteLine($"kernel= {w.eval()}");
                    Console.WriteLine($"bias  = {b.eval()}");
                });

                new variable_scope("output", reuse: true).UseSelf(_ => {
                    Variable w = tf.get_variable("kernel");
                    Variable b = tf.get_variable("bias");
                    Console.WriteLine("hidden:");
                    Console.WriteLine($"kernel= {w.eval()}");
                    Console.WriteLine($"bias  = {b.eval()}");
                });
            });
        }
Esempio n. 6
0
        public void Train(string checkpoint, string run, int?counter, dynamic sessionConfig = null, CancellationToken cancellation = default)
        {
            Session sess = sessionConfig == null
                ? Session.NewDyn(config : sessionConfig)
                : new Session();

            sess.UseSelf(session => {
                var context   = tf.placeholder(tf.int32, new TensorShape(this.batchSize, null));
                var output    = Gpt2Model.Model(this.hParams, input: context);
                Tensor labels = context[Range.All, Range.StartAt(1)];
                Tensor logits = output["logits"][Range.All, Range.EndAt(new Index(1, fromEnd: true))];
                var loss      = tf.reduce_mean(
                    tf.nn.sparse_softmax_cross_entropy_with_logits_dyn(
                        labels: labels,
                        logits: logits));

                var sample = Gpt2Sampler.SampleSequence(
                    this.hParams,
                    length: this.sampleLength,
                    context: context,
                    batchSize: this.batchSize,
                    temperature: 1.0f,
                    topK: 40);

                var trainVars = tf.trainable_variables().Where((dynamic var) => var.name.Contains("model"));
                var optimizer = new AdamOptimizer(learning_rate: 0.0002).minimize(loss, var_list: trainVars);

                var saver = new Saver(
                    var_list: trainVars,
                    max_to_keep: 5,
                    keep_checkpoint_every_n_hours: 1);

                session.run(tf.global_variables_initializer());

                Console.WriteLine("Loading checkpoint " + checkpoint);
                saver.restore(session, checkpoint);

                Console.WriteLine("Loading dataset...");
                var sampler = new TrainingSampler(this.dataset, this.random);
                Console.WriteLine($"Dataset has {sampler.TokenCount} tokens");

                string counterFile = Path.Combine(Gpt2Checkpoints.CheckpointDir, run, "counter");
                if (counter == null && File.Exists(counterFile))
                {
                    counter = int.Parse(File.ReadAllText(counterFile), CultureInfo.InvariantCulture) + 1;
                }
                counter = counter ?? 1;

                string runCheckpointDir = Path.Combine(Gpt2Checkpoints.CheckpointDir, run);
                string runSampleDir     = Path.Combine(SampleDir, run);

                void Save()
                {
                    Directory.CreateDirectory(runCheckpointDir);
                    Console.WriteLine("Saving " + Path.Combine(runCheckpointDir, Invariant($"model-{counter}")));
                    saver.save(session,
                               Path.Combine(runCheckpointDir, "model"),
                               global_step: counter.Value);
                    File.WriteAllText(path: counterFile, contents: Invariant($"{counter}"));
                }

                void GenerateSamples()
                {
                    var contextTokens = np.array(new[] { this.encoder.EncodedEndOfText });
                    var allText       = new List <string>();
                    int index         = 0;
                    string text       = null;
                    while (index < this.SampleNum)
                    {
                        var @out = session.run(sample, feed_dict: new PythonDict <object, object> {
                            [context] = Enumerable.Repeat(contextTokens, this.batchSize),
                        });
                        foreach (int i in Enumerable.Range(0, Math.Min(this.SampleNum - index, this.batchSize)))
                        {
                            text = this.encoder.Decode(@out[i]);
                            text = Invariant($"======== SAMPLE {index + 1} ========\n{text}\n");
                            allText.Add(text);
                            index++;
                        }
                    }
                    Console.WriteLine(text);
                    Directory.CreateDirectory(runSampleDir);
                    File.WriteAllLines(
                        path: Path.Combine(runSampleDir, Invariant($"samples-{counter}")),
                        contents: allText);
                }

                var avgLoss   = (0.0, 0.0);
                var startTime = DateTime.Now;

                while (!cancellation.IsCancellationRequested)
                {
                    if (counter % this.SaveEvery == 0)
                    {
                        Save();
                    }
                    if (counter % this.SampleEvery == 0)
                    {
                        GenerateSamples();
                    }

                    var batch = Enumerable.Range(0, this.batchSize)
                                .Select(_ => sampler.Sample(1024))
                                .ToArray();

                    var placeholderValues = new PythonDict <object, object> {
                        [context] = batch,
                    };
                    var tuple = session.run_dyn((optimizer, loss), feed_dict: placeholderValues);

                    var lv = tuple.Item2;

                    avgLoss = (avgLoss.Item1 * 0.99 + lv, avgLoss.Item2 * 0.99 + 1);

                    Console.WriteLine($"[{counter} | {DateTime.Now-startTime}] loss={lv} avg={avgLoss.Item1/avgLoss.Item2}");

                    counter++;
                }

                Console.WriteLine("Interrupted");
                Save();
            });
        }
Esempio n. 7
0
        public int Run()
        {
            dynamic datasets = Py.Import("sklearn.datasets");
            dynamic slice    = PythonEngine.Eval("slice");
            var     iris     = datasets.load_iris();
            dynamic firstTwoFeaturesIndex = new PyTuple(new PyObject[] {
                slice(null),
                slice(null, 2)
            });
            var         input          = iris.data.__getitem__(firstTwoFeaturesIndex);
            IEnumerable target         = iris.target;
            var         expectedOutput = target.Cast <dynamic>()
                                         .Select(l => (int)l == 0 ? 1 : -1)
                                         .ToArray();
            int trainCount = expectedOutput.Length * 4 / 5;
            var trainIn    = numpy.np.array(((IEnumerable)input).Cast <dynamic>().Take(trainCount));
            var trainOut   = numpy.np.array(expectedOutput.Take(trainCount));
            var testIn     = numpy.np.array(((IEnumerable)input).Cast <dynamic>().Skip(trainCount));
            var testOut    = numpy.np.array(expectedOutput.Skip(trainCount));

            var inPlace  = tf.placeholder(shape: new TensorShape(null, input.shape[1]), dtype: tf.float32);
            var outPlace = tf.placeholder(shape: new TensorShape(null, 1), dtype: tf.float32);
            var w        = new Variable(tf.random_normal(shape: new TensorShape((int)input.shape[1], 1)));
            var b        = new Variable(tf.random_normal(shape: new TensorShape(1, 1)));

            var totalLoss = Loss(w, b, inPlace, outPlace);
            var accuracy  = Inference(w, b, inPlace, outPlace);

            var trainOp = new GradientDescentOptimizer(this.flags.InitialLearningRate).minimize(totalLoss);

            var expectedTrainOut = trainOut.reshape((trainOut.Length, 1));
            var expectedTestOut  = testOut.reshape((testOut.Length, 1));

            new Session().UseSelf(sess =>
            {
                var init = tensorflow.tf.global_variables_initializer();
                sess.run(init);
                for (int step = 0; step < this.flags.StepCount; step++)
                {
                    (numpy.ndarray @in, numpy.ndarray @out) = NextBatch(trainIn, trainOut, sampleCount: this.flags.BatchSize);
                    var feed = new PythonDict <object, object> {
                        [inPlace]  = @in,
                        [outPlace] = @out,
                    };
                    sess.run(trainOp, feed_dict: feed);

                    var loss     = sess.run(totalLoss, feed_dict: feed);
                    var trainAcc = sess.run(accuracy, new PythonDict <object, object>
                    {
                        [inPlace]  = trainIn,
                        [outPlace] = expectedTrainOut,
                    });
                    var testAcc = sess.run(accuracy, new PythonDict <object, object>
                    {
                        [inPlace]  = testIn,
                        [outPlace] = expectedTestOut,
                    });

                    if ((step + 1) % 100 == 0)
                    {
                        Console.WriteLine($"Step{step}: test acc {testAcc}, train acc {trainAcc}");
                    }
                }

                //if (this.flags.IsEvaluation)
                //{

                //}
            });

            return(0);
        }
        static int Train(CharRNNTrainingParameters args)
        {
            var dataLoader = new TextLoader(args.dataDir, args.BatchSize, args.SeqLength);

            args.VocabularySize = dataLoader.vocabularySize;
            dynamic checkpoint = null;

            if (!string.IsNullOrEmpty(args.initFrom))
            {
                checkpoint = tf.train.latest_checkpoint(args.initFrom);
                var savedArgs =
                    JsonConvert.DeserializeObject <CharRNNModelParameters>(
                        File.ReadAllText(Path.Combine(args.initFrom, ConfigFileName)));
                Trace.Assert(savedArgs.ModelType == args.ModelType);
                Trace.Assert(savedArgs.RNNSize == args.RNNSize);
                Trace.Assert(savedArgs.LayerCount == args.LayerCount);
                Trace.Assert(savedArgs.SeqLength == args.SeqLength);

                var(chars, vocabulary) = LoadCharsVocabulary(Path.Combine(args.saveDir, CharsVocabularyFileName));
                Trace.Assert(dataLoader.chars.SequenceEqual(chars));
                Trace.Assert(dataLoader.vocabulary.SequenceEqual(vocabulary));
            }

            Directory.CreateDirectory(args.saveDir);
            File.WriteAllText(Path.Combine(args.saveDir, ConfigFileName), JsonConvert.SerializeObject(args));
            File.WriteAllText(Path.Combine(args.saveDir, CharsVocabularyFileName), JsonConvert.SerializeObject((dataLoader.chars, dataLoader.vocabulary)));

            var model = new CharRNNModel(args, training: true);

            new Session().UseSelf(session => {
                var summaries = tf.summary.merge_all();
                var writer    = new FileWriter(Path.Combine(args.logDir, DateTime.Now.ToString("s").Replace(':', '-')));
                writer.add_graph(session.graph);

                session.run(new dynamic[] { tf.global_variables_initializer() });
                var globals = tf.global_variables();
                var saver   = new Saver(globals);
                if (checkpoint != null)
                {
                    saver.restore(session, checkpoint);
                }

                int totalNumberOfBatches = args.epochs * dataLoader.batchCount;

                for (int epoch = 0; epoch < args.epochs; epoch++)
                {
                    session.run(new[] { tf.assign(
                                            model.learningRate,
                                            tf.constant(args.learningRate * Math.Pow(args.decayRate, epoch))) });
                    dataLoader.ResetBatchPointer();
                    var state     = session.run(model.initialState.Items().Cast <object>());
                    var stopwatch = Stopwatch.StartNew();
                    for (int batch = 0; batch < dataLoader.batchCount; batch++)
                    {
                        stopwatch.Restart();
                        var(input, targets) = dataLoader.NextBatch();
                        var feed            = new PythonDict <dynamic, dynamic> {
                            [model.inputData] = input,
                            [model.targets]   = targets,
                        };
                        foreach (var(i, tuple) in model.initialState.Items().Enumerate())
                        {
                            feed[tuple.c] = state[i].c;
                            feed[tuple.h] = state[i].h;
                        }

                        var step = session.run(new dynamic[] { summaries, model.cost, model.finalState, model.trainOp }, feed);

                        int currentBatch = (epoch * dataLoader.batchCount) + batch;
                        writer.add_summary(step[0], currentBatch);

                        var time = stopwatch.Elapsed;
                        Console.WriteLine(
                            $"{currentBatch}/{totalNumberOfBatches} " +
                            $"(epoch {epoch}), " +
                            $"train loss = {step[1]}, time/batch = {time}");
                        if ((currentBatch % args.saveEvery == 0) ||
                            (epoch == args.epochs - 1 && batch == dataLoader.batchCount - 1))
                        {
                            string checkpointPath = Path.Combine(args.saveDir, "model.ckpt");
                            saver.save(session, checkpointPath, global_step: currentBatch);
                            Console.WriteLine("model saved to " + checkpointPath);
                        }
                    }
                }
            });
            return(0);
        }