Exemple #1
0
        /// <summary>
        /// Setup the layer.
        /// </summary>
        /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param>
        /// <param name="colTop">Specifies the collection of top (output) Blobs.</param>
        public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            m_log.CHECK(colTop[0] != colBottom[0], type.ToString() + " Layer does not allow in-place computation.");
            m_nInferredAxis  = -1;
            m_nConstantCount = 1;
            m_rgCopyAxes.Clear();

            BlobShape top_blob_shape = m_param.reshape_param.shape;
            int       top_num_axes   = top_blob_shape.dim.Count();

            for (int i = 0; i < top_num_axes; i++)
            {
                int top_dim = top_blob_shape.dim[i];

                if (top_dim == 0)
                {
                    m_rgCopyAxes.Add(i);
                }
                else if (top_dim == -1)
                {
                    m_log.CHECK_EQ(m_nInferredAxis, -1, "new shape contains multiple -1 dims; at most a single (1) value of -1 may be specified.");
                    m_nInferredAxis = i;
                }
                else
                {
                    m_nConstantCount *= top_dim;
                }
            }
        }
        /// <summary>
        /// Parses the parameter from a RawProto.
        /// </summary>
        /// <param name="rp">Specifies the RawProto to parse.</param>
        /// <returns>A new instance of the parameter is returned.</returns>
        public static TransposeParameter FromProto(RawProto rp)
        {
            TransposeParameter p = new TransposeParameter();

            p.m_shape = BlobShape.FromProto(rp);

            return(p);
        }
        /// <summary>
        /// Parses the parameter from a RawProto.
        /// </summary>
        /// <param name="rp">Specifies the RawProto to parse.</param>
        /// <returns>A new instance of the parameter is returned.</returns>
        public static SqueezeParameter FromProto(RawProto rp)
        {
            SqueezeParameter p = new SqueezeParameter();

            p.m_shape = BlobShape.FromProto(rp);

            return(p);
        }
Exemple #4
0
        /// <summary>
        /// Setup the layer.
        /// </summary>
        /// <param name="colBottom">Specifies the input blobs, which are not used.</param>
        /// <param name="colTop">Specifies the output blobs.</param>
        public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            m_shape = m_param.constant_param.output_shape;
            m_rgF   = m_param.constant_param.values_f;

            if (!string.IsNullOrEmpty(m_param.constant_param.binary_data_file))
            {
                m_log.CHECK(File.Exists(m_param.constant_param.binary_data_file), "The 'binary_data_file' specified ('" + m_param.constant_param.binary_data_file + "') could not be found!");

                using (FileStream fs = new FileStream(m_param.constant_param.binary_data_file, FileMode.Open, FileAccess.Read))
                    using (BinaryReader br = new BinaryReader(fs))
                    {
                        BlobProto proto = BlobProto.Load(br);
                        m_rgF = proto.data;
                    }
            }
        }
Exemple #5
0
        /// <summary>
        /// Initialize the gym with the specified properties.
        /// </summary>
        /// <param name="log">Specifies the output log to use.</param>
        /// <param name="properties">Specifies the properties containing Gym specific initialization parameters.</param>
        /// <remarks>
        /// The ModelGym uses the following initialization properties.
        ///
        /// 'GpuID' - the GPU to run on.
        /// 'ModelDescription' - the model description of the model to use.
        /// 'Dataset' - the name of the dataset to use.
        /// 'Weights' - the model trained weights.
        /// 'CudaPath' - the path of the CudaDnnDLL to use.
        /// 'BatchSize' - the batch size used when running images through the model (default = 16).
        /// 'RecreateData' - when 'True' the data is re-run through the model, otherwise if already run the data is loaded from file (faster).
        /// </remarks>
        public void Initialize(Log log, PropertySet properties)
        {
            m_nGpuID        = properties.GetPropertyAsInt("GpuID");
            m_strModelDesc  = properties.GetProperty("ModelDescription");
            m_strDataset    = properties.GetProperty("Dataset");
            m_rgWeights     = properties.GetPropertyBlob("Weights");
            m_nBatchSize    = properties.GetPropertyAsInt("BatchSize", 16);
            m_bRecreateData = properties.GetPropertyAsBool("RecreateData", false);
            m_strProject    = properties.GetProperty("ProjectName");
            if (string.IsNullOrEmpty(m_strProject))
            {
                m_strProject = "default";
            }

            string strCudaPath = properties.GetProperty("CudaPath");

            SettingsCaffe s = new SettingsCaffe();

            s.GpuIds            = m_nGpuID.ToString();
            s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ON_DEMAND_BACKGROUND;

            m_imgdb = new MyCaffeImageDatabase2(log);
            m_imgdb.InitializeWithDsName1(s, m_strDataset);
            m_ds = m_imgdb.GetDatasetByName(m_strDataset);

            SimpleDatum sd    = m_imgdb.QueryImage(m_ds.TrainingSource.ID, 0, IMGDB_LABEL_SELECTION_METHOD.NONE, IMGDB_IMAGE_SELECTION_METHOD.NONE);
            BlobShape   shape = new BlobShape(1, sd.Channels, sd.Height, sd.Width);

            if (m_evtCancel == null)
            {
                m_evtCancel = new CancelEvent();
            }

            m_mycaffe = new MyCaffeControl <float>(s, log, m_evtCancel, null, null, null, null, strCudaPath);
            m_mycaffe.LoadToRun(m_strModelDesc, m_rgWeights, shape);

            m_log = log;
        }
Exemple #6
0
        /// <summary>
        /// Reshape the bottom (input) and top (output) blobs.
        /// </summary>
        /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param>
        /// <param name="colTop">Specifies the collection of top (output) Blobs.</param>
        public override void Reshape(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            int input_start_axis = m_param.reshape_param.axis;
            int start_axis       = (input_start_axis >= 0) ? input_start_axis : colBottom[0].num_axes + input_start_axis + 1;

            m_log.CHECK_GE(start_axis, 0, "axis " + input_start_axis.ToString() + " out of range");
            m_log.CHECK_LE(start_axis, colBottom[0].num_axes, "axis " + input_start_axis.ToString() + " out of range for " + colBottom[0].num_axes.ToString() + "-D input blob");

            int num_axes = m_param.reshape_param.num_axes;

            m_log.CHECK_GE(num_axes, -1, "num_axes must be >= 0, or -1 for all");

            int end_axis = (num_axes == -1) ? colBottom[0].num_axes : (start_axis + num_axes);

            m_log.CHECK_LE(end_axis, colBottom[0].num_axes, "end_axis = axis + num_axes is out of range");

            int        num_axes_replaced = end_axis - start_axis;
            int        num_axes_retained = colBottom[0].num_axes - num_axes_replaced;
            BlobShape  top_blob_shape    = m_param.reshape_param.shape;
            int        num_new_axes      = top_blob_shape.dim.Count;
            List <int> rgTopShape        = new List <int>();
            int        top_shape_index   = 0;

            for (int i = 0; i < start_axis; i++)
            {
                rgTopShape.Add(colBottom[0].shape(i));
                top_shape_index++;
            }

            for (int i = 0; i < num_new_axes; i++)
            {
                rgTopShape.Add(top_blob_shape.dim[i]);
                top_shape_index++;
            }

            for (int i = end_axis; i < colBottom[0].num_axes; i++)
            {
                rgTopShape.Add(colBottom[0].shape(i));
                top_shape_index++;
            }

            m_log.CHECK_EQ(top_shape_index, rgTopShape.Count, "The top shape count should equal the top_shape_index.");

            for (int i = 0; i < m_rgCopyAxes.Count; i++)
            {
                int copy_axis_index = m_rgCopyAxes[i];
                m_log.CHECK_GT(colBottom[0].num_axes, start_axis + copy_axis_index, "new shape contains a 0, but there was no corresponding bottom axis to copy");
                rgTopShape[start_axis + copy_axis_index] = colBottom[0].shape(start_axis + copy_axis_index);
            }

            if (m_nInferredAxis >= 0)
            {
                // A -1 dim was specified; infer the correct dimension by computing the
                // product of the other dimensions.
                int explicit_count = m_nConstantCount;
                explicit_count *= colBottom[0].count(0, start_axis);
                explicit_count *= colBottom[0].count(end_axis);

                for (int i = 0; i < m_rgCopyAxes.Count; i++)
                {
                    int copy_axis_index = m_rgCopyAxes[i];
                    explicit_count *= rgTopShape[start_axis + copy_axis_index];
                }

                m_log.CHECK_EQ(0, colBottom[0].count() % explicit_count, "bottom count (" + colBottom[0].count().ToString() + ") must be divisible by the product of the specified dimensions( " + explicit_count.ToString() + ")");

                int inferred_dim = colBottom[0].count() / explicit_count;
                rgTopShape[start_axis + m_nInferredAxis] = inferred_dim;
            }

            colTop[0].Reshape(rgTopShape);
            m_log.CHECK_EQ(colTop[0].count(), colBottom[0].count(), "output count must match input count");

            colTop[0].ShareData(colBottom[0]);
            colTop[0].ShareDiff(colBottom[0]);
        }
Exemple #7
0
        /// <summary>
        /// Fills the NetParameter  with the LSTM network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput * 4;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT);

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("c_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone());
            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone());
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xc_x = W_xc * x + b_c
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xc"));
                x_transform_param.parameters.Add(new ParamSpec("b_c"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xc_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xc_x_static = W_xc_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xc_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xc_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xc_x_static_reshape";
                reshape_param.bottom.Add("W_xc_x_static_preshape");
                reshape_param.top.Add("W_xc_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xc_x_slice";
            x_slice_param.bottom.Add("W_xc_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "h_concat";
            output_concat_layer.top.Add("h");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xc_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.group_start = true;
                    cont_h_param.name        = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hc_h_{t-1} := W_hc * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "transform_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_hc"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hc_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add the outputs of the linear transformations to compute the gate input.
                //  get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c
                //               = W_hc_h_{t-1} + W_xc_x_t + b_c
                {
                    LayerParameter input_sum_layer = sum_param.Clone(false);
                    input_sum_layer.name = "gate_input_" + ts;
                    input_sum_layer.bottom.Add("W_hc_h_" + tm1s);
                    input_sum_layer.bottom.Add("W_xc_x_" + ts);
                    if (m_bStaticInput)
                    {
                        input_sum_layer.bottom.Add("W_xc_x_static");
                    }
                    input_sum_layer.top.Add("gate_input_" + ts);
                    net_param.layer.Add(input_sum_layer);
                }

                // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t.
                //  Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t
                //  Outputs: c_t, h_t
                //      [ i_t' ]
                //      [ f_t' ] := gate_input_t
                //      [ o_t' ]
                //      [ g_t' ]
                //          i_t := \sigmoid[i_t']
                //          f_t := \sigmoid[f_t']
                //          o_t := \sigmoid[o_t']
                //          g_t := \tanh[g_t']
                //          c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t)
                //          h_t := o_t .* \tanh[c_t]
                {
                    LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT);
                    lstm_unit_param.bottom.Add("c_" + tm1s);
                    lstm_unit_param.bottom.Add("gate_input_" + ts);
                    lstm_unit_param.bottom.Add("cont_" + ts);
                    lstm_unit_param.top.Add("c_" + ts);
                    lstm_unit_param.top.Add("h_" + ts);
                    lstm_unit_param.name = "unit_" + ts;
                    net_param.layer.Add(lstm_unit_param);
                }

                output_concat_layer.bottom.Add("h_" + ts);
            }

            {
                LayerParameter c_T_copy_param = split_param.Clone(false);
                c_T_copy_param.bottom.Add("c_" + m_nT.ToString());
                c_T_copy_param.top.Add("c_T");
                net_param.layer.Add(c_T_copy_param);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }
        /// <summary>
        /// Fills the NetParameter  with the RNN network architecture.
        /// </summary>
        /// <param name="net_param"></param>
        protected override void FillUnrolledNet(NetParameter net_param)
        {
            uint nNumOutput = m_param.recurrent_param.num_output;

            m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive.");
            FillerParameter weight_filler = m_param.recurrent_param.weight_filler;
            FillerParameter bias_filler   = m_param.recurrent_param.bias_filler;

            // Add generic LayerParameter's (without bottoms/tops) of layer types we'll
            // use to save redundant code.
            LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT);

            hidden_param.inner_product_param.num_output    = nNumOutput;
            hidden_param.inner_product_param.bias_term     = false;
            hidden_param.inner_product_param.axis          = 2;
            hidden_param.inner_product_param.weight_filler = weight_filler.Clone();

            LayerParameter biased_hidden_param = hidden_param.Clone(false);

            biased_hidden_param.inner_product_param.bias_term   = true;
            biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone();

            LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE);

            sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM;

            LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH);

            LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE);

            scale_param.scale_param.axis = 0;

            LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE);

            slice_param.slice_param.axis = 0;

            List <BlobShape> rgInputShapes = new List <BlobShape>();

            RecurrentInputShapes(rgInputShapes);
            m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape.");


            //--- Add the layers ---

            LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer_param.top.Add("h_0");
            input_layer_param.input_param.shape.Add(rgInputShapes[0]);
            net_param.layer.Add(input_layer_param);

            LayerParameter cont_slice_param = slice_param.Clone(false);

            cont_slice_param.name = "cont_slice";
            cont_slice_param.bottom.Add("cont");
            cont_slice_param.slice_param.axis = 0;
            net_param.layer.Add(cont_slice_param);

            // Add layer to transform all timesteps of x to the hidden state dimension.
            //  W_xh_x = W_xh * x + b_h
            {
                LayerParameter x_transform_param = biased_hidden_param.Clone(false);
                x_transform_param.name = "x_transform";
                x_transform_param.parameters.Add(new ParamSpec("W_xh"));
                x_transform_param.parameters.Add(new ParamSpec("b_h"));
                x_transform_param.bottom.Add("x");
                x_transform_param.top.Add("W_xh_x");
                x_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_transform_param);
            }

            if (m_bStaticInput)
            {
                // Add layer to transform x_static to the hidden state dimension.
                //  W_xh_x_static = W_xh_static * x_static
                LayerParameter x_static_transform_param = hidden_param.Clone(false);
                x_static_transform_param.inner_product_param.axis = 1;
                x_static_transform_param.name = "W_xh_x_static";
                x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static"));
                x_static_transform_param.bottom.Add("x_static");
                x_static_transform_param.top.Add("W_xh_x_static_preshape");
                x_static_transform_param.propagate_down.Add(true);
                net_param.layer.Add(x_static_transform_param);

                LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE);
                BlobShape      new_shape     = reshape_param.reshape_param.shape;
                new_shape.dim.Add(1);   // One timestep.
                new_shape.dim.Add(-1);  // Should infer m_nN as the dimension so we can reshape on batch size.
                new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output);
                reshape_param.name = "W_xh_x_static_reshape";
                reshape_param.bottom.Add("W_xh_x_static_preshape");
                reshape_param.top.Add("W_xh_x_static");
                net_param.layer.Add(reshape_param);
            }

            LayerParameter x_slice_param = slice_param.Clone(false);

            x_slice_param.name = "W_xh_x_slice";
            x_slice_param.bottom.Add("W_xh_x");
            net_param.layer.Add(x_slice_param);

            LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT);

            output_concat_layer.name = "o_concat";
            output_concat_layer.top.Add("o");
            output_concat_layer.concat_param.axis = 0;

            for (int t = 1; t <= m_nT; t++)
            {
                string tm1s = (t - 1).ToString();
                string ts   = t.ToString();

                cont_slice_param.top.Add("cont_" + ts);
                x_slice_param.top.Add("W_xh_x_" + ts);


                // Add layer to flush the hidden state when beginning a new sequence,
                //  as indicated by cont_t.
                //      h_conted_{t-1} := cont_t * h_{t-1}
                //
                //  Normally, cont_t is binary (i.e., 0 or 1), so:
                //      h_conted_{t-1} := h_{t-1} if cont_t == 1
                //                        0 otherwise.
                {
                    LayerParameter cont_h_param = scale_param.Clone(false);
                    cont_h_param.name = "h_conted_" + tm1s;
                    cont_h_param.bottom.Add("h_" + tm1s);
                    cont_h_param.bottom.Add("cont_" + ts);
                    cont_h_param.top.Add("h_conted_" + tm1s);
                    net_param.layer.Add(cont_h_param);
                }

                // Add layer to compute
                //     W_hh_h_{t-1} := W_hh * h_conted_{t-1}
                {
                    LayerParameter w_param = hidden_param.Clone(false);
                    w_param.name = "W_hh_h_" + tm1s;
                    w_param.parameters.Add(new ParamSpec("W_hh"));
                    w_param.bottom.Add("h_conted_" + tm1s);
                    w_param.top.Add("W_hh_h_" + tm1s);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layers to compute
                //      h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h )
                //           = \tanh( W_hh_h_{t-1} + W_xh_t )
                {
                    LayerParameter h_input_sum_param = sum_param.Clone(false);
                    h_input_sum_param.name = "h_input_sum_" + ts;
                    h_input_sum_param.bottom.Add("W_hh_h_" + tm1s);
                    h_input_sum_param.bottom.Add("W_xh_x_" + ts);

                    if (m_bStaticInput)
                    {
                        h_input_sum_param.bottom.Add("W_xh_x_static");
                    }

                    h_input_sum_param.top.Add("h_neuron_input_" + ts);
                    net_param.layer.Add(h_input_sum_param);
                }
                {
                    LayerParameter h_neuron_param = tanh_param.Clone(false);
                    h_neuron_param.name = "h_neuron_input_" + ts;
                    h_neuron_param.bottom.Add("h_neuron_input_" + ts);
                    h_neuron_param.top.Add("h_" + ts);
                    net_param.layer.Add(h_neuron_param);
                }

                // Add layer to compute
                //      W_ho_h_t := W_ho * h_t + b_o
                {
                    LayerParameter w_param = biased_hidden_param.Clone(false);
                    w_param.name = "W_ho_h_" + ts;
                    w_param.parameters.Add(new ParamSpec("W_ho"));
                    w_param.parameters.Add(new ParamSpec("b_o"));
                    w_param.bottom.Add("h_" + ts);
                    w_param.top.Add("W_ho_h_" + ts);
                    w_param.inner_product_param.axis = 2;
                    net_param.layer.Add(w_param);
                }

                // Add layer to compute
                //      o_t := \tanh( W_ho * h_t + b_o
                //           = \tanh( W_ho_h_t )
                {
                    LayerParameter o_neuron_param = tanh_param.Clone(false);
                    o_neuron_param.name = "o_neuron_" + ts;
                    o_neuron_param.bottom.Add("W_ho_h_" + ts);
                    o_neuron_param.top.Add("o_" + ts);
                    net_param.layer.Add(o_neuron_param);
                }

                output_concat_layer.bottom.Add("o_" + ts);
            }

            net_param.layer.Add(output_concat_layer.Clone(false));
        }
Exemple #9
0
        private void layerSetUpCaffe(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // Get (recurrent) input/output names.
            List <string> rgOutputNames = new List <string>();

            OutputBlobNames(rgOutputNames);

            List <string> rgRecurInputNames = new List <string>();

            RecurrentInputBlobNames(rgRecurInputNames);

            List <string> rgRecurOutputNames = new List <string>();

            RecurrentOutputBlobNames(rgRecurOutputNames);

            int nNumRecurBlobs = rgRecurInputNames.Count;

            m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names.");

            // If provided, bottom[2] is a static input to the recurrent net.
            int nNumHiddenExposed = (m_bExposeHidden) ? nNumRecurBlobs : 0;

            m_bStaticInput = (colBottom.Count > 2 + nNumHiddenExposed) ? true : false;

            if (m_bStaticInput)
            {
                m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1");
                m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString());
            }

            // Create a NetParameter; setup the inputs that aren't unique to particular
            // recurrent architectures.
            NetParameter net_param = new NetParameter();

            LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer.top.Add("x");
            BlobShape input_shape1 = new param.BlobShape();

            for (int i = 0; i < colBottom[0].num_axes; i++)
            {
                input_shape1.dim.Add(colBottom[0].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape1);

            input_layer.top.Add("cont");
            BlobShape input_shape2 = new param.BlobShape();

            for (int i = 0; i < colBottom[1].num_axes; i++)
            {
                input_shape2.dim.Add(colBottom[1].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape2);

            if (m_bStaticInput)
            {
                input_layer.top.Add("x_static");
                BlobShape input_shape3 = new BlobShape();
                for (int i = 0; i < colBottom[2].num_axes; i++)
                {
                    input_shape3.dim.Add(colBottom[2].shape(i));
                }
                input_layer.input_param.shape.Add(input_shape3);
            }

            net_param.layer.Add(input_layer);

            // Call the child's FillUnrolledNet implementation to specify the unrolled
            // recurrent architecture.
            FillUnrolledNet(net_param);

            // Prepend this layer's name to the names of each layer in the unrolled net.
            string strLayerName = m_param.name;

            if (strLayerName.Length > 0)
            {
                for (int i = 0; i < net_param.layer.Count; i++)
                {
                    LayerParameter layer = net_param.layer[i];
                    layer.name = strLayerName + "_" + layer.name;
                }
            }

            // Add 'pseudo-losses' to all outputs to force backpropagation.
            // (Setting force_backward is too agressive as we may not need to backprop to
            // all inputs, e.g., the sequence continuation indicators.)
            List <string> rgPseudoLosses = new List <string>();

            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss");
                LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]);
                layer.bottom.Add(rgOutputNames[i]);
                layer.top.Add(rgPseudoLosses[i]);
                layer.loss_weight.Add(1.0);
                net_param.layer.Add(layer);
            }

            // Create the unrolled net.
            Net <T> sharedNet = null;

            if (m_param is LayerParameterEx <T> )
            {
                RecurrentLayer <T> sharedLayer = ((LayerParameterEx <T>)m_param).SharedLayer as RecurrentLayer <T>;
                if (sharedLayer != null)
                {
                    sharedNet = sharedLayer.m_unrolledNet;
                }
            }

            m_unrolledNet = new Net <T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet);
            m_unrolledNet.set_debug_info(m_param.recurrent_param.debug_info);

            // Setup pointers to the inputs.
            m_blobXInputBlob    = m_unrolledNet.blob_by_name("x");
            m_blobContInputBlob = m_unrolledNet.blob_by_name("cont");

            if (m_bStaticInput)
            {
                m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static");
            }

            // Setup pointers to paired recurrent inputs/outputs.
            m_colRecurInputBlobs  = new common.BlobCollection <T>();
            m_colRecurOutputBlobs = new common.BlobCollection <T>();

            for (int i = 0; i < nNumRecurBlobs; i++)
            {
                m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i]));
                m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i]));
            }

            // Setup pointers to outputs.
            m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top.");
            m_colOutputBlobs = new common.BlobCollection <T>();
            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i]));
            }

            // We should have 2 inputs (x and cont), plus a number of recurrent inputs,
            // plus maybe a static input.
            int nStaticInput = (m_bStaticInput) ? 1 : 0;

            m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")");

            // This layer's parameters are any parameters in the layers of the unrolled
            // net.  We only want one copy of each parameter, so check that the parameter
            // is 'owned' by the layer, rather than shared with another.
            blobs.Clear();
            for (int i = 0; i < m_unrolledNet.parameters.Count; i++)
            {
                if (m_unrolledNet.param_owners[i] == -1)
                {
                    m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]);
                    blobs.Add(m_unrolledNet.parameters[i]);
                }
            }

            // Check that param_propagate_down is set for all of the parameters in the
            // unrolled net; set param_propagate_down to true in this layer.
            for (int i = 0; i < m_unrolledNet.layers.Count; i++)
            {
                for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++)
                {
                    m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString());
                }
            }
            m_rgbParamPropagateDown = new DictionaryMap <bool>(blobs.Count, true);

            // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across
            // batches.
            for (int i = 0; i < m_colRecurOutputBlobs.Count; i++)
            {
                m_colRecurOutputBlobs[i].SetDiff(0);
            }

            // Check that the last output_names.count layers are the pseudo-losses;
            // set last_layer_index so that we don't actually run these layers.
            List <string> rgLayerNames = m_unrolledNet.layer_names;

            m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
            for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
            {
                m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]);
            }
        }
        /** @copydoc LayerParameterBase::Copy */
        public override void Copy(LayerParameterBase src)
        {
            TransposeParameter p = (TransposeParameter)src;

            m_shape = p.m_shape.Clone();
        }
        /** @copydoc LayerParameterBase::Copy */
        public override void Copy(LayerParameterBase src)
        {
            SqueezeParameter p = (SqueezeParameter)src;

            m_shape = p.m_shape.Clone();
        }