object callImpl(IGraphNodeBase inputs, dynamic training) { IGraphNodeBase result = inputs; var batchNormExtraArgs = new PythonDict <string, object>(); if (training != null) { batchNormExtraArgs["training"] = training; } for (int part = 0; part < PartCount; part++) { result = this.convs[part].apply(result); result = this.batchNorms[part].apply(result, kwargs: batchNormExtraArgs); if (part + 1 != PartCount) { result = tf.nn.relu(result); } } result += (Tensor)result + inputs; return(tf.nn.relu(result)); }
public string Sample(Session session, dynamic chars, IReadOnlyDictionary <char, int> vocabulary, int num = 200, string prime = "The ", int samplingType = 1) { dynamic state = this.CreateInitialState(session, vocabulary, prime); int WeightedPick(IEnumerable <float32> weights) { double[] sums = weights.Aggregate((sum: 0.0, sums: new List <double>()), (acc, value) => { acc.sum += (double)value; acc.sums.Add(acc.sum); return(acc.sum, acc.sums); }).sums.ToArray(); int index = Array.BinarySearch(sums, this.random.NextDouble() * sums.Last()); return(index < 0 ? ~index : index); } string ret = prime; char chr = prime.Last(); for (int i = 0; i < num; i++) { var x = np.zeros(new TensorShape(1, 1)); x[0, 0] = vocabulary[chr]; var feed = new PythonDict <dynamic, dynamic> { [this.inputData] = x, [this.initialState] = state, }; var outputs = session.run(new dynamic[] { this.probs, this.finalState }, feed); var probs = outputs[0]; state = outputs[1]; ndarray computedProbabilities = probs[0]; dynamic sample; switch (samplingType) { case 1: case 2 when chr == ' ': sample = WeightedPick(computedProbabilities.Cast <ndarray>().SelectMany(s => s.Cast <float32>())); break; case 0: case 2: sample = computedProbabilities.argmax(); break; default: throw new NotSupportedException(); } var pred = chars[sample]; ret += pred; chr = pred; } return(ret); }
public static HParams LoadHParams(string modelName) { var hParams = DefaultHParams; string paramsOverridePath = Path.Combine("models", modelName, "hparams.json"); var overrides = JsonConvert.DeserializeObject <Dictionary <string, object> >(File.ReadAllText(paramsOverridePath)); var pyDict = new PythonDict <object, object>(); foreach (var entry in overrides) { pyDict.Add(entry.Key, entry.Value); } hParams.override_from_dict(pyDict); return(hParams); }
private dynamic CreateInitialState(Session session, IReadOnlyDictionary <char, int> vocabulary, string prime) { var state = session.run(this.rnn.zero_state(1, tf.float32)); foreach (char chr in prime.Substring(0, prime.Length - 1)) { var x = np.zeros(new TensorShape(1, 1)); x[0, 0] = vocabulary[chr]; var feed = new PythonDict <dynamic, dynamic> { [this.inputData] = x, [this.initialState] = state, }; state = Enumerable.First(session.run(new dynamic[] { this.finalState }, feed)); } return(state); }
static void Main() { GradientLog.OutputWriter = Console.Out; GradientSetup.UseEnvironmentFromVariable(); var input = tf.placeholder(tf.float32, new TensorShape(null, 1), name: "x"); var output = tf.placeholder(tf.float32, new TensorShape(null, 1), name: "y"); var hiddenLayer = tf.layers.dense(input, hiddenSize, activation: tf.sigmoid_fn, kernel_initializer: new ones_initializer(), bias_initializer: new random_uniform_initializer(minval: -x1, maxval: -x0), name: "hidden"); var model = tf.layers.dense(hiddenLayer, units: 1, name: "output"); var cost = tf.losses.mean_squared_error(output, model); var training = new GradientDescentOptimizer(learning_rate: learningRate).minimize(cost); dynamic init = tf.global_variables_initializer(); new Session().UseSelf(session => { session.run(new[] { init }); foreach (int iteration in Enumerable.Range(0, iterations)) { var(trainInputs, trainOutputs) = GenerateTestValues(); var iterationDataset = new PythonDict <dynamic, object> { [input] = trainInputs, [output] = trainOutputs, }; session.run(new[] { training }, feed_dict: iterationDataset); if (iteration % 100 == 99) { Console.WriteLine($"cost = {session.run(new[] { cost }, feed_dict: iterationDataset)}"); } } var(testInputs, testOutputs) = GenerateTestValues(); var testValues = session.run(new[] { model }, feed_dict: new PythonDict <dynamic, object> { [input] = testInputs, }); new variable_scope("hidden", reuse: true).UseSelf(_ => { Variable w = tf.get_variable("kernel"); Variable b = tf.get_variable("bias"); Console.WriteLine("hidden:"); Console.WriteLine($"kernel= {w.eval()}"); Console.WriteLine($"bias = {b.eval()}"); }); new variable_scope("output", reuse: true).UseSelf(_ => { Variable w = tf.get_variable("kernel"); Variable b = tf.get_variable("bias"); Console.WriteLine("hidden:"); Console.WriteLine($"kernel= {w.eval()}"); Console.WriteLine($"bias = {b.eval()}"); }); }); }
public void Train(string checkpoint, string run, int?counter, dynamic sessionConfig = null, CancellationToken cancellation = default) { Session sess = sessionConfig == null ? Session.NewDyn(config : sessionConfig) : new Session(); sess.UseSelf(session => { var context = tf.placeholder(tf.int32, new TensorShape(this.batchSize, null)); var output = Gpt2Model.Model(this.hParams, input: context); Tensor labels = context[Range.All, Range.StartAt(1)]; Tensor logits = output["logits"][Range.All, Range.EndAt(new Index(1, fromEnd: true))]; var loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits_dyn( labels: labels, logits: logits)); var sample = Gpt2Sampler.SampleSequence( this.hParams, length: this.sampleLength, context: context, batchSize: this.batchSize, temperature: 1.0f, topK: 40); var trainVars = tf.trainable_variables().Where((dynamic var) => var.name.Contains("model")); var optimizer = new AdamOptimizer(learning_rate: 0.0002).minimize(loss, var_list: trainVars); var saver = new Saver( var_list: trainVars, max_to_keep: 5, keep_checkpoint_every_n_hours: 1); session.run(tf.global_variables_initializer()); Console.WriteLine("Loading checkpoint " + checkpoint); saver.restore(session, checkpoint); Console.WriteLine("Loading dataset..."); var sampler = new TrainingSampler(this.dataset, this.random); Console.WriteLine($"Dataset has {sampler.TokenCount} tokens"); string counterFile = Path.Combine(Gpt2Checkpoints.CheckpointDir, run, "counter"); if (counter == null && File.Exists(counterFile)) { counter = int.Parse(File.ReadAllText(counterFile), CultureInfo.InvariantCulture) + 1; } counter = counter ?? 1; string runCheckpointDir = Path.Combine(Gpt2Checkpoints.CheckpointDir, run); string runSampleDir = Path.Combine(SampleDir, run); void Save() { Directory.CreateDirectory(runCheckpointDir); Console.WriteLine("Saving " + Path.Combine(runCheckpointDir, Invariant($"model-{counter}"))); saver.save(session, Path.Combine(runCheckpointDir, "model"), global_step: counter.Value); File.WriteAllText(path: counterFile, contents: Invariant($"{counter}")); } void GenerateSamples() { var contextTokens = np.array(new[] { this.encoder.EncodedEndOfText }); var allText = new List <string>(); int index = 0; string text = null; while (index < this.SampleNum) { var @out = session.run(sample, feed_dict: new PythonDict <object, object> { [context] = Enumerable.Repeat(contextTokens, this.batchSize), }); foreach (int i in Enumerable.Range(0, Math.Min(this.SampleNum - index, this.batchSize))) { text = this.encoder.Decode(@out[i]); text = Invariant($"======== SAMPLE {index + 1} ========\n{text}\n"); allText.Add(text); index++; } } Console.WriteLine(text); Directory.CreateDirectory(runSampleDir); File.WriteAllLines( path: Path.Combine(runSampleDir, Invariant($"samples-{counter}")), contents: allText); } var avgLoss = (0.0, 0.0); var startTime = DateTime.Now; while (!cancellation.IsCancellationRequested) { if (counter % this.SaveEvery == 0) { Save(); } if (counter % this.SampleEvery == 0) { GenerateSamples(); } var batch = Enumerable.Range(0, this.batchSize) .Select(_ => sampler.Sample(1024)) .ToArray(); var placeholderValues = new PythonDict <object, object> { [context] = batch, }; var tuple = session.run_dyn((optimizer, loss), feed_dict: placeholderValues); var lv = tuple.Item2; avgLoss = (avgLoss.Item1 * 0.99 + lv, avgLoss.Item2 * 0.99 + 1); Console.WriteLine($"[{counter} | {DateTime.Now-startTime}] loss={lv} avg={avgLoss.Item1/avgLoss.Item2}"); counter++; } Console.WriteLine("Interrupted"); Save(); }); }
public int Run() { dynamic datasets = Py.Import("sklearn.datasets"); dynamic slice = PythonEngine.Eval("slice"); var iris = datasets.load_iris(); dynamic firstTwoFeaturesIndex = new PyTuple(new PyObject[] { slice(null), slice(null, 2) }); var input = iris.data.__getitem__(firstTwoFeaturesIndex); IEnumerable target = iris.target; var expectedOutput = target.Cast <dynamic>() .Select(l => (int)l == 0 ? 1 : -1) .ToArray(); int trainCount = expectedOutput.Length * 4 / 5; var trainIn = numpy.np.array(((IEnumerable)input).Cast <dynamic>().Take(trainCount)); var trainOut = numpy.np.array(expectedOutput.Take(trainCount)); var testIn = numpy.np.array(((IEnumerable)input).Cast <dynamic>().Skip(trainCount)); var testOut = numpy.np.array(expectedOutput.Skip(trainCount)); var inPlace = tf.placeholder(shape: new TensorShape(null, input.shape[1]), dtype: tf.float32); var outPlace = tf.placeholder(shape: new TensorShape(null, 1), dtype: tf.float32); var w = new Variable(tf.random_normal(shape: new TensorShape((int)input.shape[1], 1))); var b = new Variable(tf.random_normal(shape: new TensorShape(1, 1))); var totalLoss = Loss(w, b, inPlace, outPlace); var accuracy = Inference(w, b, inPlace, outPlace); var trainOp = new GradientDescentOptimizer(this.flags.InitialLearningRate).minimize(totalLoss); var expectedTrainOut = trainOut.reshape((trainOut.Length, 1)); var expectedTestOut = testOut.reshape((testOut.Length, 1)); new Session().UseSelf(sess => { var init = tensorflow.tf.global_variables_initializer(); sess.run(init); for (int step = 0; step < this.flags.StepCount; step++) { (numpy.ndarray @in, numpy.ndarray @out) = NextBatch(trainIn, trainOut, sampleCount: this.flags.BatchSize); var feed = new PythonDict <object, object> { [inPlace] = @in, [outPlace] = @out, }; sess.run(trainOp, feed_dict: feed); var loss = sess.run(totalLoss, feed_dict: feed); var trainAcc = sess.run(accuracy, new PythonDict <object, object> { [inPlace] = trainIn, [outPlace] = expectedTrainOut, }); var testAcc = sess.run(accuracy, new PythonDict <object, object> { [inPlace] = testIn, [outPlace] = expectedTestOut, }); if ((step + 1) % 100 == 0) { Console.WriteLine($"Step{step}: test acc {testAcc}, train acc {trainAcc}"); } } //if (this.flags.IsEvaluation) //{ //} }); return(0); }
static int Train(CharRNNTrainingParameters args) { var dataLoader = new TextLoader(args.dataDir, args.BatchSize, args.SeqLength); args.VocabularySize = dataLoader.vocabularySize; dynamic checkpoint = null; if (!string.IsNullOrEmpty(args.initFrom)) { checkpoint = tf.train.latest_checkpoint(args.initFrom); var savedArgs = JsonConvert.DeserializeObject <CharRNNModelParameters>( File.ReadAllText(Path.Combine(args.initFrom, ConfigFileName))); Trace.Assert(savedArgs.ModelType == args.ModelType); Trace.Assert(savedArgs.RNNSize == args.RNNSize); Trace.Assert(savedArgs.LayerCount == args.LayerCount); Trace.Assert(savedArgs.SeqLength == args.SeqLength); var(chars, vocabulary) = LoadCharsVocabulary(Path.Combine(args.saveDir, CharsVocabularyFileName)); Trace.Assert(dataLoader.chars.SequenceEqual(chars)); Trace.Assert(dataLoader.vocabulary.SequenceEqual(vocabulary)); } Directory.CreateDirectory(args.saveDir); File.WriteAllText(Path.Combine(args.saveDir, ConfigFileName), JsonConvert.SerializeObject(args)); File.WriteAllText(Path.Combine(args.saveDir, CharsVocabularyFileName), JsonConvert.SerializeObject((dataLoader.chars, dataLoader.vocabulary))); var model = new CharRNNModel(args, training: true); new Session().UseSelf(session => { var summaries = tf.summary.merge_all(); var writer = new FileWriter(Path.Combine(args.logDir, DateTime.Now.ToString("s").Replace(':', '-'))); writer.add_graph(session.graph); session.run(new dynamic[] { tf.global_variables_initializer() }); var globals = tf.global_variables(); var saver = new Saver(globals); if (checkpoint != null) { saver.restore(session, checkpoint); } int totalNumberOfBatches = args.epochs * dataLoader.batchCount; for (int epoch = 0; epoch < args.epochs; epoch++) { session.run(new[] { tf.assign( model.learningRate, tf.constant(args.learningRate * Math.Pow(args.decayRate, epoch))) }); dataLoader.ResetBatchPointer(); var state = session.run(model.initialState.Items().Cast <object>()); var stopwatch = Stopwatch.StartNew(); for (int batch = 0; batch < dataLoader.batchCount; batch++) { stopwatch.Restart(); var(input, targets) = dataLoader.NextBatch(); var feed = new PythonDict <dynamic, dynamic> { [model.inputData] = input, [model.targets] = targets, }; foreach (var(i, tuple) in model.initialState.Items().Enumerate()) { feed[tuple.c] = state[i].c; feed[tuple.h] = state[i].h; } var step = session.run(new dynamic[] { summaries, model.cost, model.finalState, model.trainOp }, feed); int currentBatch = (epoch * dataLoader.batchCount) + batch; writer.add_summary(step[0], currentBatch); var time = stopwatch.Elapsed; Console.WriteLine( $"{currentBatch}/{totalNumberOfBatches} " + $"(epoch {epoch}), " + $"train loss = {step[1]}, time/batch = {time}"); if ((currentBatch % args.saveEvery == 0) || (epoch == args.epochs - 1 && batch == dataLoader.batchCount - 1)) { string checkpointPath = Path.Combine(args.saveDir, "model.ckpt"); saver.save(session, checkpointPath, global_step: currentBatch); Console.WriteLine("model saved to " + checkpointPath); } } } }); return(0); }