/// <summary> /// Calculates the kernel and stride dimensions for the pooling layer, /// returns a correctly configured LayerParameter for a PoolingLayer. /// </summary> /// <param name="nPyramidLevel">Specifies the pyramid level.</param> /// <param name="nBottomH">Specifies the bottom height.</param> /// <param name="nBottomW">Specifies the bottom width.</param> /// <param name="spp_param">Specifies the SPPParameter used.</param> /// <returns>The pooling parameter is returned.</returns> protected virtual LayerParameter getPoolingParam(int nPyramidLevel, int nBottomH, int nBottomW, SPPParameter spp_param) { LayerParameter pool_param = new param.LayerParameter(LayerParameter.LayerType.POOLING); int nNumBins = (int)Math.Pow(2, nPyramidLevel); // find padding and kernel size so that the pooling is // performed across the entrie image int nKernelH = (int)Math.Ceiling(nBottomH / (double)nNumBins); int nKernelW = (int)Math.Ceiling(nBottomW / (double)nNumBins); // remainder_H is the min number of pixels that need to be padded before // entire image height is pooled over with the chosen kernel simension int nRemainderH = nKernelH * nNumBins - nBottomH; int nRemainderW = nKernelW * nNumBins - nBottomW; // pooling layer pads (2 * pad_h) pixels on the top and bottom of the // image. int nPadH = (nRemainderH + 1) / 2; int nPadW = (nRemainderW + 1) / 2; pool_param.pooling_param.pad_h = (uint)nPadH; pool_param.pooling_param.pad_w = (uint)nPadW; pool_param.pooling_param.kernel_h = (uint)nKernelH; pool_param.pooling_param.kernel_w = (uint)nKernelW; pool_param.pooling_param.stride_h = (uint)nKernelH; pool_param.pooling_param.stride_w = (uint)nKernelW; pool_param.pooling_param.pool = spp_param.pool; return(pool_param); }
/// <summary> /// The BinaryHashLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type GRN. /// </param> public BinaryHashLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.BINARYHASH; m_blobDebug = new common.Blob <T>(cuda, log, false); m_blobDebug.Name = "debug"; m_blobWork = new common.Blob <T>(cuda, log); m_blobWork.Name = "work"; m_blobNormalized = new common.Blob <T>(cuda, log); m_blobNormalized.Name = "normalized"; LayerParameter paramSigmoid = new param.LayerParameter(LayerParameter.LayerType.SIGMOID); paramSigmoid.sigmoid_param.engine = EngineParameter.Engine.CUDNN; }
/// <summary> /// Fills the NetParameter with the LSTM network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput * 4; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT); List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("c_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone()); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone()); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xc")); x_transform_param.parameters.Add(new ParamSpec("b_c")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xc_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xc_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xc_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xc_x_static_reshape"; reshape_param.bottom.Add("W_xc_x_static_preshape"); reshape_param.top.Add("W_xc_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xc_x_slice"; x_slice_param.bottom.Add("W_xc_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "h_concat"; output_concat_layer.top.Add("h"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xc_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.group_start = true; cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "transform_" + ts; w_param.parameters.Add(new ParamSpec("W_hc")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hc_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add the outputs of the linear transformations to compute the gate input. // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter input_sum_layer = sum_param.Clone(false); input_sum_layer.name = "gate_input_" + ts; input_sum_layer.bottom.Add("W_hc_h_" + tm1s); input_sum_layer.bottom.Add("W_xc_x_" + ts); if (m_bStaticInput) { input_sum_layer.bottom.Add("W_xc_x_static"); } input_sum_layer.top.Add("gate_input_" + ts); net_param.layer.Add(input_sum_layer); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT); lstm_unit_param.bottom.Add("c_" + tm1s); lstm_unit_param.bottom.Add("gate_input_" + ts); lstm_unit_param.bottom.Add("cont_" + ts); lstm_unit_param.top.Add("c_" + ts); lstm_unit_param.top.Add("h_" + ts); lstm_unit_param.name = "unit_" + ts; net_param.layer.Add(lstm_unit_param); } output_concat_layer.bottom.Add("h_" + ts); } { LayerParameter c_T_copy_param = split_param.Clone(false); c_T_copy_param.bottom.Add("c_" + m_nT.ToString()); c_T_copy_param.top.Add("c_T"); net_param.layer.Add(c_T_copy_param); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Fills the NetParameter with the RNN network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH); LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0]); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xh_x = W_xh * x + b_h { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xh")); x_transform_param.parameters.Add(new ParamSpec("b_h")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xh_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xh_x_static = W_xh_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xh_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xh_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xh_x_static_reshape"; reshape_param.bottom.Add("W_xh_x_static_preshape"); reshape_param.top.Add("W_xh_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xh_x_slice"; x_slice_param.bottom.Add("W_xh_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "o_concat"; output_concat_layer.top.Add("o"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xh_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hh_h_{t-1} := W_hh * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "W_hh_h_" + tm1s; w_param.parameters.Add(new ParamSpec("W_hh")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hh_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layers to compute // h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h ) // = \tanh( W_hh_h_{t-1} + W_xh_t ) { LayerParameter h_input_sum_param = sum_param.Clone(false); h_input_sum_param.name = "h_input_sum_" + ts; h_input_sum_param.bottom.Add("W_hh_h_" + tm1s); h_input_sum_param.bottom.Add("W_xh_x_" + ts); if (m_bStaticInput) { h_input_sum_param.bottom.Add("W_xh_x_static"); } h_input_sum_param.top.Add("h_neuron_input_" + ts); net_param.layer.Add(h_input_sum_param); } { LayerParameter h_neuron_param = tanh_param.Clone(false); h_neuron_param.name = "h_neuron_input_" + ts; h_neuron_param.bottom.Add("h_neuron_input_" + ts); h_neuron_param.top.Add("h_" + ts); net_param.layer.Add(h_neuron_param); } // Add layer to compute // W_ho_h_t := W_ho * h_t + b_o { LayerParameter w_param = biased_hidden_param.Clone(false); w_param.name = "W_ho_h_" + ts; w_param.parameters.Add(new ParamSpec("W_ho")); w_param.parameters.Add(new ParamSpec("b_o")); w_param.bottom.Add("h_" + ts); w_param.top.Add("W_ho_h_" + ts); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layer to compute // o_t := \tanh( W_ho * h_t + b_o // = \tanh( W_ho_h_t ) { LayerParameter o_neuron_param = tanh_param.Clone(false); o_neuron_param.name = "o_neuron_" + ts; o_neuron_param.bottom.Add("W_ho_h_" + ts); o_neuron_param.top.Add("o_" + ts); net_param.layer.Add(o_neuron_param); } output_concat_layer.bottom.Add("o_" + ts); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_nNum = colBottom[0].num; m_nChannels = colBottom[0].channels; m_nBottomH = colBottom[0].height; m_nBottomW = colBottom[0].width; m_bReshapedFirstTime = false; m_log.CHECK_GT(m_nBottomH, 0, "Input dimensions cannot be zero."); m_log.CHECK_GT(m_nBottomW, 0, "Input dimensions cannot be zero."); m_nPyramidHeight = (int)m_param.spp_param.pyramid_height; m_colBlobSplitTopVec = new BlobCollection <T>(); m_rgPoolingBottomVec = new List <BlobCollection <T> >(); m_rgPoolingLayers = new List <PoolingLayer <T> >(); m_rgPoolingTopVecs = new List <BlobCollection <T> >(); m_colBlobPoolingOutputs = new BlobCollection <T>(); m_rgFlattenLayers = new List <FlattenLayer <T> >(); m_rgFlattenLayerTopVecs = new List <BlobCollection <T> >(); m_colBlobFlattenOutputs = new BlobCollection <T>(); m_colBlobConcatBottomVec = new BlobCollection <T>(); if (m_nPyramidHeight == 1) { // pooling layer setup LayerParameter pp = getPoolingParam(0, m_nBottomH, m_nBottomW, m_param.spp_param); m_rgPoolingLayers.Add(new PoolingLayer <T>(m_cuda, m_log, pp)); m_rgPoolingLayers[0].Setup(colBottom, colTop); return; } // split layer output holders setup for (int i = 0; i < m_nPyramidHeight; i++) { m_colBlobSplitTopVec.Add(new Blob <T>(m_cuda, m_log)); } // split layer setup LayerParameter split_param = new param.LayerParameter(LayerParameter.LayerType.SPLIT); m_split_layer = new SplitLayer <T>(m_cuda, m_log, split_param); m_split_layer.Setup(colBottom, m_colBlobSplitTopVec); for (int i = 0; i < m_nPyramidHeight; i++) { // pooling layer input holders setup m_rgPoolingBottomVec.Add(new BlobCollection <T>()); m_rgPoolingBottomVec[i].Add(m_colBlobSplitTopVec[i]); // pooling layer output holders setup m_colBlobPoolingOutputs.Add(new Blob <T>(m_cuda, m_log)); m_rgPoolingTopVecs.Add(new BlobCollection <T>()); m_rgPoolingTopVecs[i].Add(m_colBlobPoolingOutputs[i]); // pooling layer setup LayerParameter pooling_param = getPoolingParam(i, m_nBottomH, m_nBottomW, m_param.spp_param); m_rgPoolingLayers.Add(new PoolingLayer <T>(m_cuda, m_log, pooling_param)); m_rgPoolingLayers[i].Setup(m_rgPoolingBottomVec[i], m_rgPoolingTopVecs[i]); // flatten layer output holders setup m_colBlobFlattenOutputs.Add(new Blob <T>(m_cuda, m_log)); m_rgFlattenLayerTopVecs.Add(new BlobCollection <T>()); m_rgFlattenLayerTopVecs[i].Add(m_colBlobFlattenOutputs[i]); // flatten layer setup LayerParameter flatten_param = new LayerParameter(LayerParameter.LayerType.FLATTEN); m_rgFlattenLayers.Add(new FlattenLayer <T>(m_cuda, m_log, flatten_param)); m_rgFlattenLayers[i].Setup(m_rgPoolingTopVecs[i], m_rgFlattenLayerTopVecs[i]); // concat layer input holders setup m_colBlobConcatBottomVec.Add(m_colBlobFlattenOutputs[i]); } // concat layer setup LayerParameter concat_param = new LayerParameter(LayerParameter.LayerType.CONCAT); m_concat_layer = new ConcatLayer <T>(m_cuda, m_log, concat_param); m_concat_layer.Setup(m_colBlobConcatBottomVec, colTop); }