Example #1
0
        public static (Tensor, Tensor) dynamic_rnn(RNNCell cell, Tensor inputs_tensor,
                                                   Tensor sequence_length  = null, Tensor initial_state = null,
                                                   TF_DataType dtype       = TF_DataType.DtInvalid,
                                                   int?parallel_iterations = null, bool swap_memory = false, bool time_major = false)
        {
            with(tf.variable_scope("rnn"), scope =>
            {
                VariableScope varscope = scope;
                var flat_input         = nest.flatten(inputs_tensor);

                if (!time_major)
                {
                    flat_input = flat_input.Select(x => ops.convert_to_tensor(x)).ToList();
                    flat_input = flat_input.Select(x => _transpose_batch_time(x)).ToList();
                }

                parallel_iterations = parallel_iterations ?? 32;

                if (sequence_length != null)
                {
                    throw new NotImplementedException("dynamic_rnn sequence_length has value");
                }

                var batch_size = _best_effort_input_batch_size(flat_input);

                Tensor state = null;
                if (initial_state != null)
                {
                    state = initial_state;
                }
                else
                {
                    state = cell.get_initial_state(batch_size: batch_size, dtype: dtype);
                }

                var inputs = nest.pack_sequence_as(structure: inputs_tensor, flat_sequence: flat_input);

                var(outputs, final_state) = _dynamic_rnn_loop(
                    cell,
                    inputs as Tensor,
                    state,
                    parallel_iterations: parallel_iterations.Value,
                    swap_memory: swap_memory,
                    sequence_length: sequence_length,
                    dtype: dtype);
            });

            throw new NotImplementedException("");
        }
Example #2
0
        /// <summary>
        /// Internal implementation of Dynamic RNN.
        /// </summary>
        /// <param name="cell"></param>
        /// <param name="inputs"></param>
        /// <param name="initial_state"></param>
        /// <param name="parallel_iterations"></param>
        /// <param name="swap_memory"></param>
        /// <param name="sequence_length"></param>
        /// <param name="dtype"></param>
        /// <returns></returns>
        private static (Tensor, Tensor) _dynamic_rnn_loop(RNNCell cell, Tensor inputs, Tensor initial_state,
                                                          int parallel_iterations, bool swap_memory, Tensor sequence_length = null, TF_DataType dtype = TF_DataType.DtInvalid)
        {
            var state      = initial_state;
            var state_size = cell.state_size;

            var flat_input       = nest.flatten(inputs);
            var flat_output_size = nest.flatten(cell.output_size);

            // Construct an initial output
            var input_shape      = array_ops.shape(flat_input[0]);
            var time_steps       = input_shape.slice(0);
            var batch_size       = _best_effort_input_batch_size(flat_input);
            var inputs_got_shape = flat_input.Select(input_ => input_.TensorShape.with_rank_at_least(3)).ToArray();

            var dims = inputs_got_shape[0].Dimensions.Take(2).ToArray();

            var(const_time_steps, const_batch_size) = (dims[0], dims[1]);

            foreach (var shape in inputs_got_shape)
            {
                if (shape[2] == -1)
                {
                    throw new ValueError("Input size (depth of inputs) must be accessible via shape inference," +
                                         " but saw value None.");
                }

                var got_time_steps = shape.dims[0];
                var got_batch_size = shape.dims[1];

                if (const_time_steps != got_time_steps)
                {
                    throw new ValueError("Time steps is not the same for all the elements in the input in a " +
                                         "batch.");
                }

                if (const_batch_size != got_batch_size)
                {
                    throw new ValueError("Batch_size is not the same for all the elements in the input.");
                }
            }

            Func <int, Tensor> _create_zero_arrays = (size_) =>
            {
                var size = rnn_cell_impl._concat(batch_size, size_);
                return(array_ops.zeros(
                           array_ops.stack(size), dtype: _infer_state_dtype(dtype, state)));
            };

            // Prepare dynamic conditional copying of state & output
            var flat_zero_output = flat_output_size.Select(output => _create_zero_arrays(output)).ToArray();
            var zero_output      = nest.pack_sequence_as(structure: cell.output_size, flat_sequence: flat_zero_output);

            Tensor min_sequence_length = null, max_sequence_length = null;

            if (sequence_length != null)
            {
                min_sequence_length = math_ops.reduce_min(sequence_length);
                max_sequence_length = math_ops.reduce_max(sequence_length);
            }
            else
            {
                max_sequence_length = time_steps;
            }

            var time = array_ops.constant(0, dtype: dtypes.int32, name: "time");

            string base_name = null;

            with(ops.name_scope("dynamic_rnn"), scope => base_name = scope);

            Func <string, TensorShape, TF_DataType, Tensor> _create_ta = (name, element_shape, dtype_) =>
            {
                new TensorArray(dtype: dtype_,
                                size: time_steps,
                                element_shape: element_shape,
                                tensor_array_name: base_name + name);
                throw new NotImplementedException("");
            };

            bool in_graph_mode = true;

            if (in_graph_mode)
            {
                foreach (var(i, out_size) in enumerate(flat_output_size))
                {
                    _create_ta($"output_{i}",
                               new TensorShape(const_batch_size).concatenate(
                                   _maybe_tensor_shape_from_tensor(out_size)),
                               _infer_state_dtype(dtype, state));
                }
            }

            throw new NotImplementedException("");
        }
        public CharRNNModel(CharRNNModelParameters parameters, bool training = true)
        {
            this.parameters = parameters ?? throw new ArgumentNullException(nameof(parameters));
            if (!training)
            {
                this.parameters.BatchSize = 1;
                this.parameters.SeqLength = 1;
            }

            if (!ModelTypeToCellFunction.TryGetValue(parameters.ModelType, out this.cellFactory))
            {
                throw new NotSupportedException(parameters.ModelType.ToString());
            }

            for (int i = 0; i < parameters.LayerCount; i++)
            {
                RNNCell cell = this.cellFactory(parameters.RNNSize);
                if (training && (parameters.KeepOutputProbability < 1 || parameters.KeepInputProbability < 1))
                {
                    cell = new DropoutWrapper(cell,
                                              input_keep_prob: parameters.KeepInputProbability,
                                              output_keep_prob: parameters.KeepOutputProbability);
                }
                this.cells.Add(cell);
            }
            this.rnn          = new MultiRNNCell(this.cells, state_is_tuple: true);
            this.inputData    = tf.placeholder(tf.int32, new TensorShape(parameters.BatchSize, parameters.SeqLength));
            this.targets      = tf.placeholder(tf.int32, new TensorShape(parameters.BatchSize, parameters.SeqLength));
            this.initialState = this.rnn.zero_state(parameters.BatchSize, tf.float32);

            Variable softmax_W = null, softmax_b = null;

            new variable_scope("rnnlm").UseSelf(_ => {
                softmax_W = tf.get_variable("softmax_w", new TensorShape(parameters.RNNSize, parameters.VocabularySize));
                softmax_b = tf.get_variable("softmax_b", new TensorShape(parameters.VocabularySize));
            });

            Variable embedding = tf.get_variable("embedding", new TensorShape(parameters.VocabularySize, parameters.RNNSize));
            Tensor   input     = tf.nn.embedding_lookup(embedding, this.inputData);

            // dropout beta testing: double check which one should affect next line
            if (training && parameters.KeepOutputProbability < 1)
            {
                input = tf.nn.dropout(input, parameters.KeepOutputProbability);
            }

            IList <Tensor> inputs = tf.split(input, parameters.SeqLength, axis: 1);

            inputs = inputs.Select(i => (Tensor)tf.squeeze(i, axis: 1)).ToList();

            dynamic Loop(dynamic prev, dynamic _)
            {
                prev = tf.matmul(prev, softmax_W) + softmax_b;
                var prevSymbol = tf.stop_gradient(tf.argmax(prev, 1));

                return(tf.nn.embedding_lookup(embedding, prevSymbol));
            }

            var decoder = tensorflow.contrib.legacy_seq2seq.legacy_seq2seq.rnn_decoder_dyn(
                decoder_inputs: inputs,
                initial_state: this.initialState.Items(),
                cell: this.rnn,
                loop_function: training ? null : PythonFunctionContainer.Of(new Func <dynamic, dynamic, dynamic>(Loop)), scope: "rnnlm");
            IList <Tensor> outputs             = decoder.Item1;
            var            lastState           = (seq2seqState)decoder.Item2;
            dynamic        contatenatedOutputs = tf.concat(outputs, 1);
            var            output = tensorflow.tf.reshape(contatenatedOutputs, new[] { -1, parameters.RNNSize });

            this.logits = tf.matmul(output, softmax_W) + softmax_b;
            this.probs  = tf.nn.softmax(new[] { this.logits });
            this.loss   = tensorflow.contrib.legacy_seq2seq.legacy_seq2seq.sequence_loss_by_example_dyn(
                logits: new[] { this.logits },
                targets: new[] { tf.reshape(this.targets, new[] { -1 }) },
                weights: new[] { tf.ones(new[] { parameters.BatchSize *parameters.SeqLength }) });

            Tensor cost = null;

            new name_scope("cost").UseSelf(_ => {
                cost = tf.reduce_sum(this.loss) / parameters.BatchSize / parameters.SeqLength;
            });
            this.cost         = cost;
            this.finalState   = lastState;
            this.learningRate = new Variable(0.0, trainable: false);
            var tvars = tf.trainable_variables();

            IEnumerable <object> grads     = tf.clip_by_global_norm(tf.gradients(this.cost, tvars), parameters.GradientClip).Item1;
            AdamOptimizer        optimizer = null;

            new name_scope("optimizer").UseSelf(_ => optimizer = new AdamOptimizer(this.learningRate));
            this.trainOp = optimizer.apply_gradients(grads.Zip(tvars, (grad, @var) => (dynamic)(grad, @var)));

            tf.summary.histogram("logits", new[] { this.logits });
            tf.summary.histogram("loss", new[] { this.loss });
            tf.summary.histogram("train_loss", new[] { this.cost });
        }
Example #4
0
        /// <summary>
        /// Internal implementation of Dynamic RNN.
        /// </summary>
        /// <param name="cell"></param>
        /// <param name="inputs"></param>
        /// <param name="initial_state"></param>
        /// <param name="parallel_iterations"></param>
        /// <param name="swap_memory"></param>
        /// <param name="sequence_length"></param>
        /// <param name="dtype"></param>
        /// <returns></returns>
        private static (Tensor, Tensor) _dynamic_rnn_loop(RNNCell cell, Tensor inputs, Tensor initial_state,
                                                          int parallel_iterations, bool swap_memory, Tensor sequence_length = null, TF_DataType dtype = TF_DataType.DtInvalid)
        {
            var state      = initial_state;
            var state_size = cell.state_size;

            var flat_input       = nest.flatten(inputs);
            var flat_output_size = nest.flatten(cell.output_size);

            // Construct an initial output
            var input_shape      = array_ops.shape(flat_input[0]);
            var time_steps       = input_shape.slice(0);
            var batch_size       = _best_effort_input_batch_size(flat_input);
            var inputs_got_shape = flat_input.Select(input_ => input_.TensorShape.with_rank_at_least(3)).ToArray();

            var dims = inputs_got_shape[0].dims.Take(2).ToArray();

            var(const_time_steps, const_batch_size) = (dims[0], dims[1]);

            foreach (var shape in inputs_got_shape)
            {
                if (shape.dims[2] == -1)
                {
                    throw new ValueError("Input size (depth of inputs) must be accessible via shape inference," +
                                         " but saw value None.");
                }

                var got_time_steps = shape.dims[0];
                var got_batch_size = shape.dims[1];

                if (const_time_steps != got_time_steps)
                {
                    throw new ValueError("Time steps is not the same for all the elements in the input in a " +
                                         "batch.");
                }

                if (const_batch_size != got_batch_size)
                {
                    throw new ValueError("Batch_size is not the same for all the elements in the input.");
                }
            }

            Func <int, Tensor> _create_zero_arrays = (size_) =>
            {
                var size = rnn_cell_impl._concat(batch_size, size_);
                return(array_ops.zeros(
                           array_ops.stack(size), dtype: _infer_state_dtype(dtype, state)));
            };

            // Prepare dynamic conditional copying of state & output
            var flat_zero_output = flat_output_size.Select(output => _create_zero_arrays(output)).ToArray();
            var zero_output      = nest.pack_sequence_as(structure: cell.output_size, flat_sequence: flat_zero_output);

            Tensor min_sequence_length = null, max_sequence_length = null;

            if (sequence_length != null)
            {
                min_sequence_length = math_ops.reduce_min(sequence_length);
                max_sequence_length = math_ops.reduce_max(sequence_length);
            }
            else
            {
                max_sequence_length = time_steps;
            }

            var time = array_ops.constant(0, dtype: dtypes.int32, name: "time");

            string base_name = null;

            tf_with(ops.name_scope("dynamic_rnn"), scope => base_name = scope);

            Func <string, TensorShape, TF_DataType, TensorArray> _create_ta = (name, element_shape, dtype_) =>
            {
                var ta = new TensorArray(dtype: dtype_,
                                         size: time_steps,
                                         element_shape: new[] { element_shape },
                                         tensor_array_name: base_name + name);
                return(ta);
            };

            bool in_graph_mode = true;
            var  output_ta     = new List <TensorArray>();
            var  input_ta      = new List <TensorArray>();

            if (in_graph_mode)
            {
                foreach (var(i, out_size) in enumerate(flat_output_size))
                {
                    output_ta.Add(_create_ta($"output_{i}",
                                             new TensorShape(const_batch_size).concatenate(
                                                 _maybe_tensor_shape_from_tensor(out_size)),
                                             _infer_state_dtype(dtype, state)));
                }

                foreach (var(i, flat_input_i) in enumerate(flat_input))
                {
                    input_ta.Add(_create_ta($"input_{i}",
                                            new TensorShape(flat_input_i.dims.Skip(1).ToArray()),
                                            flat_input_i.dtype));
                }

                for (int i = 0; i < input_ta.Count; i++)
                {
                    var(ta, input_) = (input_ta[0], flat_input[0]);
                }
            }

            // Make sure that we run at least 1 step, if necessary, to ensure
            // the TensorArrays pick up the dynamic shape.
            Tensor loop_bound;

            if (in_graph_mode)
            {
                loop_bound = math_ops.minimum(
                    time_steps, math_ops.maximum(1, max_sequence_length));
            }

            /*Func<Tensor, Tensor> cond = (ctime) =>
             * {
             *  return null;
             * };
             *
             * control_flow_ops.while_loop(
             * cond: cond,
             * body = );*/

            throw new NotImplementedException("");
        }