/// <summary> /// The SoftmaxCrossEntropyLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type SOFTMAXCROSSENTROPY_LOSS. /// </param> public SoftmaxCrossEntropyLossLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS; m_blobSoftmaxOutput = new Blob <T>(cuda, log); m_blobSoftmaxOutput.Name = m_param.name + " softmax out"; m_blobLoss = new Blob <T>(cuda, log); m_blobLoss.Name = m_param.name + " loss"; LayerParameter param_softmax = p.Clone(false); param_softmax.loss_weight.Clear(); m_softmaxLayer = new SoftmaxLayer <T>(cuda, log, param_softmax); }
/// <summary> /// The SigmoidCrossEntropyLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type SIGMOIDCROSSENTROPY_LOSS. /// </param> public SigmoidCrossEntropyLossLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.SIGMOIDCROSSENTROPY_LOSS; m_blobSigmoidOutput = new Blob <T>(cuda, log); m_blobSigmoidOutput.Name = m_param.name + " sigmoid out"; m_blobLoss = new Blob <T>(cuda, log); m_blobLoss.Name = m_param.name + " loss"; LayerParameter param_sigmoid = p.Clone(false); param_sigmoid.loss_weight.Clear(); m_sigmoidLayer = new SigmoidLayer <T>(cuda, log, param_sigmoid); }
/// <summary> /// The Layer constructor. /// </summary> /// <remarks> /// Setup code for derivative classes should go into an override of the LayerSetup function where the /// dimensionsn of the Blob%s are provided to the Layer. /// </remarks> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter that contains the settings of the Layer.</param> public Layer(CudaDnn <T> cuda, Log log, LayerParameter p) { m_cuda = cuda; m_log = log; m_param = p.Clone(true); m_phase = p.phase; m_rgbParamPropagateDown = new DictionaryMap <bool>(false); m_rgLoss = new DictionaryMap <double>(0.0); m_colBlobs = new BlobCollection <T>(); for (int i = 0; i < p.blobs.Count; i++) { m_colBlobs.Add(new Blob <T>(cuda, log, p.blobs[i])); } m_tOne = (T)Convert.ChangeType(1, typeof(T)); m_tZero = (T)Convert.ChangeType(0, typeof(T)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { base.LayerSetUp(colBottom, colTop); LayerParameter p = m_param.Clone(false); p.SetType(LayerParameter.LayerType.SOFTMAX); LayerParameter param_softmax = p.Clone(false); param_softmax.loss_weight.Clear(); m_softmaxLayer = new SoftmaxLayer <T>(m_cuda, m_log, param_softmax); m_colSoftmaxBottom = new BlobCollection <T>(); m_colSoftmaxTop = new BlobCollection <T>(); m_colSoftmaxBottom.Add(colBottom[0]); m_colSoftmaxTop.Add(m_blobProb); m_softmaxLayer.Setup(m_colSoftmaxBottom, m_colSoftmaxTop); m_nIgnoreLabel = m_param.loss_param.ignore_label; }
/// <summary> /// Fills the NetParameter with the LSTM network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput * 4; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT); List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("c_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone()); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone()); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xc")); x_transform_param.parameters.Add(new ParamSpec("b_c")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xc_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xc_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xc_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xc_x_static_reshape"; reshape_param.bottom.Add("W_xc_x_static_preshape"); reshape_param.top.Add("W_xc_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xc_x_slice"; x_slice_param.bottom.Add("W_xc_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "h_concat"; output_concat_layer.top.Add("h"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xc_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.group_start = true; cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "transform_" + ts; w_param.parameters.Add(new ParamSpec("W_hc")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hc_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add the outputs of the linear transformations to compute the gate input. // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter input_sum_layer = sum_param.Clone(false); input_sum_layer.name = "gate_input_" + ts; input_sum_layer.bottom.Add("W_hc_h_" + tm1s); input_sum_layer.bottom.Add("W_xc_x_" + ts); if (m_bStaticInput) { input_sum_layer.bottom.Add("W_xc_x_static"); } input_sum_layer.top.Add("gate_input_" + ts); net_param.layer.Add(input_sum_layer); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT); lstm_unit_param.bottom.Add("c_" + tm1s); lstm_unit_param.bottom.Add("gate_input_" + ts); lstm_unit_param.bottom.Add("cont_" + ts); lstm_unit_param.top.Add("c_" + ts); lstm_unit_param.top.Add("h_" + ts); lstm_unit_param.name = "unit_" + ts; net_param.layer.Add(lstm_unit_param); } output_concat_layer.bottom.Add("h_" + ts); } { LayerParameter c_T_copy_param = split_param.Clone(false); c_T_copy_param.bottom.Add("c_" + m_nT.ToString()); c_T_copy_param.top.Add("c_T"); net_param.layer.Add(c_T_copy_param); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Fills the NetParameter with the RNN network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH); LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0]); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xh_x = W_xh * x + b_h { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xh")); x_transform_param.parameters.Add(new ParamSpec("b_h")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xh_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xh_x_static = W_xh_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xh_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xh_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xh_x_static_reshape"; reshape_param.bottom.Add("W_xh_x_static_preshape"); reshape_param.top.Add("W_xh_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xh_x_slice"; x_slice_param.bottom.Add("W_xh_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "o_concat"; output_concat_layer.top.Add("o"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xh_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hh_h_{t-1} := W_hh * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "W_hh_h_" + tm1s; w_param.parameters.Add(new ParamSpec("W_hh")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hh_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layers to compute // h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h ) // = \tanh( W_hh_h_{t-1} + W_xh_t ) { LayerParameter h_input_sum_param = sum_param.Clone(false); h_input_sum_param.name = "h_input_sum_" + ts; h_input_sum_param.bottom.Add("W_hh_h_" + tm1s); h_input_sum_param.bottom.Add("W_xh_x_" + ts); if (m_bStaticInput) { h_input_sum_param.bottom.Add("W_xh_x_static"); } h_input_sum_param.top.Add("h_neuron_input_" + ts); net_param.layer.Add(h_input_sum_param); } { LayerParameter h_neuron_param = tanh_param.Clone(false); h_neuron_param.name = "h_neuron_input_" + ts; h_neuron_param.bottom.Add("h_neuron_input_" + ts); h_neuron_param.top.Add("h_" + ts); net_param.layer.Add(h_neuron_param); } // Add layer to compute // W_ho_h_t := W_ho * h_t + b_o { LayerParameter w_param = biased_hidden_param.Clone(false); w_param.name = "W_ho_h_" + ts; w_param.parameters.Add(new ParamSpec("W_ho")); w_param.parameters.Add(new ParamSpec("b_o")); w_param.bottom.Add("h_" + ts); w_param.top.Add("W_ho_h_" + ts); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layer to compute // o_t := \tanh( W_ho * h_t + b_o // = \tanh( W_ho_h_t ) { LayerParameter o_neuron_param = tanh_param.Clone(false); o_neuron_param.name = "o_neuron_" + ts; o_neuron_param.bottom.Add("W_ho_h_" + ts); o_neuron_param.top.Add("o_" + ts); net_param.layer.Add(o_neuron_param); } output_concat_layer.bottom.Add("o_" + ts); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_dfAlphaIn = m_param.convolution_octave_param.alpha_in; m_dfAlphaOut = m_param.convolution_octave_param.alpha_out; m_log.CHECK_GE(m_dfAlphaIn, 0, "The alpha in must be >= 0."); m_log.CHECK_LE(m_dfAlphaIn, 1, "The alpha in must be <= 1."); m_log.CHECK_GE(m_dfAlphaOut, 0, "The alpha out must be >= 0."); m_log.CHECK_LT(m_dfAlphaOut, 1, "The alpha out must be < 1."); m_nStride = (int)m_param.convolution_param.stride[0]; m_log.CHECK_GE(m_nStride, 1, "The stride should be >= 1."); m_log.CHECK_LE(m_nStride, 2, "The stride should be <= 2."); //-------------------------------------------- // Create the blobs. //-------------------------------------------- // process high frequency. m_blob_x_h = new Blob <T>(m_cuda, m_log); m_blob_x_h.Name = "x_h"; m_blob_x_h2h = new Blob <T>(m_cuda, m_log); m_blob_x_h2h.Name = "x_h2h"; if (m_dfAlphaOut > 0) { m_blob_x_h_ds = new Blob <T>(m_cuda, m_log); m_blob_x_h_ds.Name = "x_h_ds"; m_blob_x_h2l = new Blob <T>(m_cuda, m_log); m_blob_x_h2l.Name = "x_h2l"; } // process low frequency. if (colBottom.Count > 1) { m_blob_x_l = new Blob <T>(m_cuda, m_log); m_blob_x_l.Name = "x_l"; m_blob_x_l_ds = new Blob <T>(m_cuda, m_log); m_blob_x_l_ds.Name = "x_l_ds"; m_blob_x_l2h = new Blob <T>(m_cuda, m_log); m_blob_x_l2h.Name = "x_l2h"; m_blob_x_l2h_us = new Blob <T>(m_cuda, m_log); m_blob_x_l2h_us.Name = "x_l2h_us"; m_blob_x_l2l = new Blob <T>(m_cuda, m_log); m_blob_x_l2l.Name = "x_l2l"; } //-------------------------------------------- // Create the internal layers. //-------------------------------------------- LayerParameter poolParam = new LayerParameter(LayerParameter.LayerType.POOLING, "downsample"); poolParam.pooling_param.kernel_size.Add(2); poolParam.pooling_param.stride.Add(2); poolParam.pooling_param.pool = PoolingParameter.PoolingMethod.AVE; poolParam.pooling_param.engine = m_param.convolution_param.engine; if (m_nStride == 2) { m_downsampleLayer1 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(colBottom[0], m_blob_x_h); m_downsampleLayer1.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer1.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_h.ReshapeLike(colBottom[0]); } LayerParameter convParamBase = new LayerParameter(LayerParameter.LayerType.CONVOLUTION); convParamBase.convolution_param.engine = m_param.convolution_param.engine; convParamBase.convolution_param.kernel_size = m_param.convolution_param.kernel_size; convParamBase.convolution_param.stride.Add(1); convParamBase.convolution_param.pad = m_param.convolution_param.pad; convParamBase.convolution_param.dilation = m_param.convolution_param.dilation; convParamBase.convolution_param.bias_filler = m_param.convolution_param.bias_filler; convParamBase.convolution_param.bias_term = m_param.convolution_param.bias_term; int nInChannels = colBottom[0].channels; uint nOutChannels = m_param.convolution_param.num_output; uint nGroup = m_param.convolution_param.group; uint nGroupTmp; // h2h Layer { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "h2h conv"; convParam.convolution_param.num_output = nOutChannels - (uint)(m_dfAlphaOut * nOutChannels); nGroupTmp = (uint)Math.Ceiling(nGroup - m_dfAlphaIn * nGroup); convParam.convolution_param.group = (nInChannels % nGroupTmp == 0) ? nGroupTmp : 1; m_conv_h2h = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_h2h.OnGetWorkspace += layer_OnGetWorkspace; m_conv_h2h.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_h, m_blob_x_h2h); m_conv_h2h.LayerSetUp(m_rgBtm, m_rgTop); m_conv_h2h.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_h2h.blobs); } // h2l Layer if (m_dfAlphaOut > 0) { m_downsampleLayer2 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(m_blob_x_h, m_blob_x_h_ds); m_downsampleLayer2.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer2.Reshape(m_rgBtm, m_rgTop); LayerParameter convParam = convParamBase.Clone(false); convParam.name = "h2l conv"; convParam.convolution_param.num_output = (uint)(m_dfAlphaOut * nOutChannels); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroup == 0) ? nGroup : 1; m_conv_h2l = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_h2l.OnGetWorkspace += layer_OnGetWorkspace; m_conv_h2l.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_h_ds, m_blob_x_h2l); m_conv_h2l.LayerSetUp(m_rgBtm, m_rgTop); m_conv_h2l.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_h2l.blobs); } if (colBottom.Count > 1) { m_blob_x_l.ReshapeLike(colBottom[1]); // downsample3 Layer if (m_nStride == 2) { m_downsampleLayer3 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(colBottom[1], m_blob_x_l_ds); m_downsampleLayer3.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer3.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_l_ds.ReshapeLike(m_blob_x_l); } // l2l layer if (m_dfAlphaOut > 0) { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "l2l conv"; convParam.convolution_param.num_output = (uint)(m_dfAlphaOut * nOutChannels); nGroupTmp = (uint)Math.Ceiling(m_dfAlphaIn * nGroup); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroupTmp == 0) ? nGroupTmp : 1; m_conv_l2l = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_l2l.OnGetWorkspace += layer_OnGetWorkspace; m_conv_l2l.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_l_ds, m_blob_x_l2l); m_conv_l2l.LayerSetUp(m_rgBtm, m_rgTop); m_conv_l2l.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_l2l.blobs); } // l2h Layer { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "l2h conv"; convParam.convolution_param.num_output = nOutChannels - (uint)(m_dfAlphaOut * nOutChannels); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroup == 0) ? nGroup : 1; m_conv_l2h = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_l2h.OnGetWorkspace += layer_OnGetWorkspace; m_conv_l2h.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_l, m_blob_x_l2h); m_conv_l2h.LayerSetUp(m_rgBtm, m_rgTop); m_conv_l2h.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_l2h.blobs); } // upsample Layer if (m_nStride == 1) { LayerParameter interpParam = new LayerParameter(LayerParameter.LayerType.INTERP, "upsample"); interpParam.interp_param.zoom_factor = 2; m_upsampleLayer = Layer <T> .Create(m_cuda, m_log, interpParam, null); setupBtmTop(m_blob_x_l2h, m_blob_x_l2h_us); m_upsampleLayer.LayerSetUp(m_rgBtm, m_rgTop); m_upsampleLayer.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_l2h_us.ReshapeLike(m_blob_x_l2h); } // add Layer LayerParameter eltAdd = new LayerParameter(LayerParameter.LayerType.ELTWISE); eltAdd.name = "eltadd"; eltAdd.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; m_add = Layer <T> .Create(m_cuda, m_log, eltAdd, null); setupBtmTop(m_blob_x_l2h_us, m_blob_x_h2h, colTop[0]); m_add.LayerSetUp(m_rgBtm, m_rgTop); m_add.Reshape(m_rgBtm, m_rgTop); if (m_dfAlphaOut > 0) { setupBtmTop(m_blob_x_h2l, m_blob_x_l2l, colTop[1]); m_add.Reshape(m_rgBtm, m_rgTop); } } }