Beispiel #1
0
        /// <summary>
        /// Fills the NetParameter  with the LSTM network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput * 4;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT);

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("c_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone());
            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone());
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xc_x = W_xc * x + b_c
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xc"));
                x_transform_param.parameters.Add(new ParamSpec("b_c"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xc_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xc_x_static = W_xc_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xc_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xc_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xc_x_static_reshape";
                reshape_param.bottom.Add("W_xc_x_static_preshape");
                reshape_param.top.Add("W_xc_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xc_x_slice";
            x_slice_param.bottom.Add("W_xc_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "h_concat";
            output_concat_layer.top.Add("h");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xc_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.group_start = true;
                    cont_h_param.name        = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hc_h_{t-1} := W_hc * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "transform_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_hc"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hc_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add the outputs of the linear transformations to compute the gate input.
                //  get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c
                //               = W_hc_h_{t-1} + W_xc_x_t + b_c
                {
                    LayerParameter input_sum_layer = sum_param.Clone(false);
                    input_sum_layer.name = "gate_input_" + ts;
                    input_sum_layer.bottom.Add("W_hc_h_" + tm1s);
                    input_sum_layer.bottom.Add("W_xc_x_" + ts);
                    if (m_bStaticInput)
                    {
                        input_sum_layer.bottom.Add("W_xc_x_static");
                    }
                    input_sum_layer.top.Add("gate_input_" + ts);
                    net_param.layer.Add(input_sum_layer);
                }

                // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t.
                //  Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t
                //  Outputs: c_t, h_t
                //      [ i_t' ]
                //      [ f_t' ] := gate_input_t
                //      [ o_t' ]
                //      [ g_t' ]
                //          i_t := \sigmoid[i_t']
                //          f_t := \sigmoid[f_t']
                //          o_t := \sigmoid[o_t']
                //          g_t := \tanh[g_t']
                //          c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
                //          h_t := o_t .* \tanh[c_t]
                {
                    LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT);
                    lstm_unit_param.bottom.Add("c_" + tm1s);
                    lstm_unit_param.bottom.Add("gate_input_" + ts);
                    lstm_unit_param.bottom.Add("cont_" + ts);
                    lstm_unit_param.top.Add("c_" + ts);
                    lstm_unit_param.top.Add("h_" + ts);
                    lstm_unit_param.name = "unit_" + ts;
                    net_param.layer.Add(lstm_unit_param);
                }

                output_concat_layer.bottom.Add("h_" + ts);
            }

            {
                LayerParameter c_T_copy_param = split_param.Clone(false);
                c_T_copy_param.bottom.Add("c_" + m_nT.ToString());
                c_T_copy_param.top.Add("c_T");
                net_param.layer.Add(c_T_copy_param);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }
Beispiel #2
0
        /// <summary>
        /// Fills the NetParameter  with the RNN network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH);

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0]);
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xh_x = W_xh * x + b_h
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xh"));
                x_transform_param.parameters.Add(new ParamSpec("b_h"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xh_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xh_x_static = W_xh_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xh_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xh_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xh_x_static_reshape";
                reshape_param.bottom.Add("W_xh_x_static_preshape");
                reshape_param.top.Add("W_xh_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xh_x_slice";
            x_slice_param.bottom.Add("W_xh_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "o_concat";
            output_concat_layer.top.Add("o");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xh_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.name = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hh_h_{t-1} := W_hh * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "W_hh_h_" + tm1s;
                    w_param.parameters.Add(new ParamSpec("W_hh"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hh_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layers to compute
                //      h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h )
                //           = \tanh( W_hh_h_{t-1} + W_xh_t )
                {
                    LayerParameter h_input_sum_param = sum_param.Clone(false);
                    h_input_sum_param.name = "h_input_sum_" + ts;
                    h_input_sum_param.bottom.Add("W_hh_h_" + tm1s);
                    h_input_sum_param.bottom.Add("W_xh_x_" + ts);

                    if (m_bStaticInput)
                    {
                        h_input_sum_param.bottom.Add("W_xh_x_static");
                    }

                    h_input_sum_param.top.Add("h_neuron_input_" + ts);
                    net_param.layer.Add(h_input_sum_param);
                }
                {
                    LayerParameter h_neuron_param = tanh_param.Clone(false);
                    h_neuron_param.name = "h_neuron_input_" + ts;
                    h_neuron_param.bottom.Add("h_neuron_input_" + ts);
                    h_neuron_param.top.Add("h_" + ts);
                    net_param.layer.Add(h_neuron_param);
                }

                // Add layer to compute
                //      W_ho_h_t := W_ho * h_t + b_o
                {
                    LayerParameter w_param = biased_hidden_param.Clone(false);
                    w_param.name = "W_ho_h_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_ho"));
                    w_param.parameters.Add(new ParamSpec("b_o"));
                    w_param.bottom.Add("h_" + ts);
                    w_param.top.Add("W_ho_h_" + ts);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layer to compute
                //      o_t := \tanh( W_ho * h_t + b_o
                //           = \tanh( W_ho_h_t )
                {
                    LayerParameter o_neuron_param = tanh_param.Clone(false);
                    o_neuron_param.name = "o_neuron_" + ts;
                    o_neuron_param.bottom.Add("W_ho_h_" + ts);
                    o_neuron_param.top.Add("o_" + ts);
                    net_param.layer.Add(o_neuron_param);
                }

                output_concat_layer.bottom.Add("o_" + ts);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }