public static (Tensor, Tensor) dynamic_rnn(RNNCell cell, Tensor inputs_tensor, Tensor sequence_length = null, Tensor initial_state = null, TF_DataType dtype = TF_DataType.DtInvalid, int?parallel_iterations = null, bool swap_memory = false, bool time_major = false) { with(tf.variable_scope("rnn"), scope => { VariableScope varscope = scope; var flat_input = nest.flatten(inputs_tensor); if (!time_major) { flat_input = flat_input.Select(x => ops.convert_to_tensor(x)).ToList(); flat_input = flat_input.Select(x => _transpose_batch_time(x)).ToList(); } parallel_iterations = parallel_iterations ?? 32; if (sequence_length != null) { throw new NotImplementedException("dynamic_rnn sequence_length has value"); } var batch_size = _best_effort_input_batch_size(flat_input); Tensor state = null; if (initial_state != null) { state = initial_state; } else { state = cell.get_initial_state(batch_size: batch_size, dtype: dtype); } var inputs = nest.pack_sequence_as(structure: inputs_tensor, flat_sequence: flat_input); var(outputs, final_state) = _dynamic_rnn_loop( cell, inputs as Tensor, state, parallel_iterations: parallel_iterations.Value, swap_memory: swap_memory, sequence_length: sequence_length, dtype: dtype); }); throw new NotImplementedException(""); }
/// <summary> /// Internal implementation of Dynamic RNN. /// </summary> /// <param name="cell"></param> /// <param name="inputs"></param> /// <param name="initial_state"></param> /// <param name="parallel_iterations"></param> /// <param name="swap_memory"></param> /// <param name="sequence_length"></param> /// <param name="dtype"></param> /// <returns></returns> private static (Tensor, Tensor) _dynamic_rnn_loop(RNNCell cell, Tensor inputs, Tensor initial_state, int parallel_iterations, bool swap_memory, Tensor sequence_length = null, TF_DataType dtype = TF_DataType.DtInvalid) { var state = initial_state; var state_size = cell.state_size; var flat_input = nest.flatten(inputs); var flat_output_size = nest.flatten(cell.output_size); // Construct an initial output var input_shape = array_ops.shape(flat_input[0]); var time_steps = input_shape.slice(0); var batch_size = _best_effort_input_batch_size(flat_input); var inputs_got_shape = flat_input.Select(input_ => input_.TensorShape.with_rank_at_least(3)).ToArray(); var dims = inputs_got_shape[0].Dimensions.Take(2).ToArray(); var(const_time_steps, const_batch_size) = (dims[0], dims[1]); foreach (var shape in inputs_got_shape) { if (shape[2] == -1) { throw new ValueError("Input size (depth of inputs) must be accessible via shape inference," + " but saw value None."); } var got_time_steps = shape.dims[0]; var got_batch_size = shape.dims[1]; if (const_time_steps != got_time_steps) { throw new ValueError("Time steps is not the same for all the elements in the input in a " + "batch."); } if (const_batch_size != got_batch_size) { throw new ValueError("Batch_size is not the same for all the elements in the input."); } } Func <int, Tensor> _create_zero_arrays = (size_) => { var size = rnn_cell_impl._concat(batch_size, size_); return(array_ops.zeros( array_ops.stack(size), dtype: _infer_state_dtype(dtype, state))); }; // Prepare dynamic conditional copying of state & output var flat_zero_output = flat_output_size.Select(output => _create_zero_arrays(output)).ToArray(); var zero_output = nest.pack_sequence_as(structure: cell.output_size, flat_sequence: flat_zero_output); Tensor min_sequence_length = null, max_sequence_length = null; if (sequence_length != null) { min_sequence_length = math_ops.reduce_min(sequence_length); max_sequence_length = math_ops.reduce_max(sequence_length); } else { max_sequence_length = time_steps; } var time = array_ops.constant(0, dtype: dtypes.int32, name: "time"); string base_name = null; with(ops.name_scope("dynamic_rnn"), scope => base_name = scope); Func <string, TensorShape, TF_DataType, Tensor> _create_ta = (name, element_shape, dtype_) => { new TensorArray(dtype: dtype_, size: time_steps, element_shape: element_shape, tensor_array_name: base_name + name); throw new NotImplementedException(""); }; bool in_graph_mode = true; if (in_graph_mode) { foreach (var(i, out_size) in enumerate(flat_output_size)) { _create_ta($"output_{i}", new TensorShape(const_batch_size).concatenate( _maybe_tensor_shape_from_tensor(out_size)), _infer_state_dtype(dtype, state)); } } throw new NotImplementedException(""); }
public CharRNNModel(CharRNNModelParameters parameters, bool training = true) { this.parameters = parameters ?? throw new ArgumentNullException(nameof(parameters)); if (!training) { this.parameters.BatchSize = 1; this.parameters.SeqLength = 1; } if (!ModelTypeToCellFunction.TryGetValue(parameters.ModelType, out this.cellFactory)) { throw new NotSupportedException(parameters.ModelType.ToString()); } for (int i = 0; i < parameters.LayerCount; i++) { RNNCell cell = this.cellFactory(parameters.RNNSize); if (training && (parameters.KeepOutputProbability < 1 || parameters.KeepInputProbability < 1)) { cell = new DropoutWrapper(cell, input_keep_prob: parameters.KeepInputProbability, output_keep_prob: parameters.KeepOutputProbability); } this.cells.Add(cell); } this.rnn = new MultiRNNCell(this.cells, state_is_tuple: true); this.inputData = tf.placeholder(tf.int32, new TensorShape(parameters.BatchSize, parameters.SeqLength)); this.targets = tf.placeholder(tf.int32, new TensorShape(parameters.BatchSize, parameters.SeqLength)); this.initialState = this.rnn.zero_state(parameters.BatchSize, tf.float32); Variable softmax_W = null, softmax_b = null; new variable_scope("rnnlm").UseSelf(_ => { softmax_W = tf.get_variable("softmax_w", new TensorShape(parameters.RNNSize, parameters.VocabularySize)); softmax_b = tf.get_variable("softmax_b", new TensorShape(parameters.VocabularySize)); }); Variable embedding = tf.get_variable("embedding", new TensorShape(parameters.VocabularySize, parameters.RNNSize)); Tensor input = tf.nn.embedding_lookup(embedding, this.inputData); // dropout beta testing: double check which one should affect next line if (training && parameters.KeepOutputProbability < 1) { input = tf.nn.dropout(input, parameters.KeepOutputProbability); } IList <Tensor> inputs = tf.split(input, parameters.SeqLength, axis: 1); inputs = inputs.Select(i => (Tensor)tf.squeeze(i, axis: 1)).ToList(); dynamic Loop(dynamic prev, dynamic _) { prev = tf.matmul(prev, softmax_W) + softmax_b; var prevSymbol = tf.stop_gradient(tf.argmax(prev, 1)); return(tf.nn.embedding_lookup(embedding, prevSymbol)); } var decoder = tensorflow.contrib.legacy_seq2seq.legacy_seq2seq.rnn_decoder_dyn( decoder_inputs: inputs, initial_state: this.initialState.Items(), cell: this.rnn, loop_function: training ? null : PythonFunctionContainer.Of(new Func <dynamic, dynamic, dynamic>(Loop)), scope: "rnnlm"); IList <Tensor> outputs = decoder.Item1; var lastState = (seq2seqState)decoder.Item2; dynamic contatenatedOutputs = tf.concat(outputs, 1); var output = tensorflow.tf.reshape(contatenatedOutputs, new[] { -1, parameters.RNNSize }); this.logits = tf.matmul(output, softmax_W) + softmax_b; this.probs = tf.nn.softmax(new[] { this.logits }); this.loss = tensorflow.contrib.legacy_seq2seq.legacy_seq2seq.sequence_loss_by_example_dyn( logits: new[] { this.logits }, targets: new[] { tf.reshape(this.targets, new[] { -1 }) }, weights: new[] { tf.ones(new[] { parameters.BatchSize *parameters.SeqLength }) }); Tensor cost = null; new name_scope("cost").UseSelf(_ => { cost = tf.reduce_sum(this.loss) / parameters.BatchSize / parameters.SeqLength; }); this.cost = cost; this.finalState = lastState; this.learningRate = new Variable(0.0, trainable: false); var tvars = tf.trainable_variables(); IEnumerable <object> grads = tf.clip_by_global_norm(tf.gradients(this.cost, tvars), parameters.GradientClip).Item1; AdamOptimizer optimizer = null; new name_scope("optimizer").UseSelf(_ => optimizer = new AdamOptimizer(this.learningRate)); this.trainOp = optimizer.apply_gradients(grads.Zip(tvars, (grad, @var) => (dynamic)(grad, @var))); tf.summary.histogram("logits", new[] { this.logits }); tf.summary.histogram("loss", new[] { this.loss }); tf.summary.histogram("train_loss", new[] { this.cost }); }
/// <summary> /// Internal implementation of Dynamic RNN. /// </summary> /// <param name="cell"></param> /// <param name="inputs"></param> /// <param name="initial_state"></param> /// <param name="parallel_iterations"></param> /// <param name="swap_memory"></param> /// <param name="sequence_length"></param> /// <param name="dtype"></param> /// <returns></returns> private static (Tensor, Tensor) _dynamic_rnn_loop(RNNCell cell, Tensor inputs, Tensor initial_state, int parallel_iterations, bool swap_memory, Tensor sequence_length = null, TF_DataType dtype = TF_DataType.DtInvalid) { var state = initial_state; var state_size = cell.state_size; var flat_input = nest.flatten(inputs); var flat_output_size = nest.flatten(cell.output_size); // Construct an initial output var input_shape = array_ops.shape(flat_input[0]); var time_steps = input_shape.slice(0); var batch_size = _best_effort_input_batch_size(flat_input); var inputs_got_shape = flat_input.Select(input_ => input_.TensorShape.with_rank_at_least(3)).ToArray(); var dims = inputs_got_shape[0].dims.Take(2).ToArray(); var(const_time_steps, const_batch_size) = (dims[0], dims[1]); foreach (var shape in inputs_got_shape) { if (shape.dims[2] == -1) { throw new ValueError("Input size (depth of inputs) must be accessible via shape inference," + " but saw value None."); } var got_time_steps = shape.dims[0]; var got_batch_size = shape.dims[1]; if (const_time_steps != got_time_steps) { throw new ValueError("Time steps is not the same for all the elements in the input in a " + "batch."); } if (const_batch_size != got_batch_size) { throw new ValueError("Batch_size is not the same for all the elements in the input."); } } Func <int, Tensor> _create_zero_arrays = (size_) => { var size = rnn_cell_impl._concat(batch_size, size_); return(array_ops.zeros( array_ops.stack(size), dtype: _infer_state_dtype(dtype, state))); }; // Prepare dynamic conditional copying of state & output var flat_zero_output = flat_output_size.Select(output => _create_zero_arrays(output)).ToArray(); var zero_output = nest.pack_sequence_as(structure: cell.output_size, flat_sequence: flat_zero_output); Tensor min_sequence_length = null, max_sequence_length = null; if (sequence_length != null) { min_sequence_length = math_ops.reduce_min(sequence_length); max_sequence_length = math_ops.reduce_max(sequence_length); } else { max_sequence_length = time_steps; } var time = array_ops.constant(0, dtype: dtypes.int32, name: "time"); string base_name = null; tf_with(ops.name_scope("dynamic_rnn"), scope => base_name = scope); Func <string, TensorShape, TF_DataType, TensorArray> _create_ta = (name, element_shape, dtype_) => { var ta = new TensorArray(dtype: dtype_, size: time_steps, element_shape: new[] { element_shape }, tensor_array_name: base_name + name); return(ta); }; bool in_graph_mode = true; var output_ta = new List <TensorArray>(); var input_ta = new List <TensorArray>(); if (in_graph_mode) { foreach (var(i, out_size) in enumerate(flat_output_size)) { output_ta.Add(_create_ta($"output_{i}", new TensorShape(const_batch_size).concatenate( _maybe_tensor_shape_from_tensor(out_size)), _infer_state_dtype(dtype, state))); } foreach (var(i, flat_input_i) in enumerate(flat_input)) { input_ta.Add(_create_ta($"input_{i}", new TensorShape(flat_input_i.dims.Skip(1).ToArray()), flat_input_i.dtype)); } for (int i = 0; i < input_ta.Count; i++) { var(ta, input_) = (input_ta[0], flat_input[0]); } } // Make sure that we run at least 1 step, if necessary, to ensure // the TensorArrays pick up the dynamic shape. Tensor loop_bound; if (in_graph_mode) { loop_bound = math_ops.minimum( time_steps, math_ops.maximum(1, max_sequence_length)); } /*Func<Tensor, Tensor> cond = (ctime) => * { * return null; * }; * * control_flow_ops.while_loop( * cond: cond, * body = );*/ throw new NotImplementedException(""); }