/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_log.CHECK(colTop[0] != colBottom[0], type.ToString() + " Layer does not allow in-place computation."); m_nInferredAxis = -1; m_nConstantCount = 1; m_rgCopyAxes.Clear(); BlobShape top_blob_shape = m_param.reshape_param.shape; int top_num_axes = top_blob_shape.dim.Count(); for (int i = 0; i < top_num_axes; i++) { int top_dim = top_blob_shape.dim[i]; if (top_dim == 0) { m_rgCopyAxes.Add(i); } else if (top_dim == -1) { m_log.CHECK_EQ(m_nInferredAxis, -1, "new shape contains multiple -1 dims; at most a single (1) value of -1 may be specified."); m_nInferredAxis = i; } else { m_nConstantCount *= top_dim; } } }
/// <summary> /// Parses the parameter from a RawProto. /// </summary> /// <param name="rp">Specifies the RawProto to parse.</param> /// <returns>A new instance of the parameter is returned.</returns> public static TransposeParameter FromProto(RawProto rp) { TransposeParameter p = new TransposeParameter(); p.m_shape = BlobShape.FromProto(rp); return(p); }
/// <summary> /// Parses the parameter from a RawProto. /// </summary> /// <param name="rp">Specifies the RawProto to parse.</param> /// <returns>A new instance of the parameter is returned.</returns> public static SqueezeParameter FromProto(RawProto rp) { SqueezeParameter p = new SqueezeParameter(); p.m_shape = BlobShape.FromProto(rp); return(p); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the input blobs, which are not used.</param> /// <param name="colTop">Specifies the output blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_shape = m_param.constant_param.output_shape; m_rgF = m_param.constant_param.values_f; if (!string.IsNullOrEmpty(m_param.constant_param.binary_data_file)) { m_log.CHECK(File.Exists(m_param.constant_param.binary_data_file), "The 'binary_data_file' specified ('" + m_param.constant_param.binary_data_file + "') could not be found!"); using (FileStream fs = new FileStream(m_param.constant_param.binary_data_file, FileMode.Open, FileAccess.Read)) using (BinaryReader br = new BinaryReader(fs)) { BlobProto proto = BlobProto.Load(br); m_rgF = proto.data; } } }
/// <summary> /// Initialize the gym with the specified properties. /// </summary> /// <param name="log">Specifies the output log to use.</param> /// <param name="properties">Specifies the properties containing Gym specific initialization parameters.</param> /// <remarks> /// The ModelGym uses the following initialization properties. /// /// 'GpuID' - the GPU to run on. /// 'ModelDescription' - the model description of the model to use. /// 'Dataset' - the name of the dataset to use. /// 'Weights' - the model trained weights. /// 'CudaPath' - the path of the CudaDnnDLL to use. /// 'BatchSize' - the batch size used when running images through the model (default = 16). /// 'RecreateData' - when 'True' the data is re-run through the model, otherwise if already run the data is loaded from file (faster). /// </remarks> public void Initialize(Log log, PropertySet properties) { m_nGpuID = properties.GetPropertyAsInt("GpuID"); m_strModelDesc = properties.GetProperty("ModelDescription"); m_strDataset = properties.GetProperty("Dataset"); m_rgWeights = properties.GetPropertyBlob("Weights"); m_nBatchSize = properties.GetPropertyAsInt("BatchSize", 16); m_bRecreateData = properties.GetPropertyAsBool("RecreateData", false); m_strProject = properties.GetProperty("ProjectName"); if (string.IsNullOrEmpty(m_strProject)) { m_strProject = "default"; } string strCudaPath = properties.GetProperty("CudaPath"); SettingsCaffe s = new SettingsCaffe(); s.GpuIds = m_nGpuID.ToString(); s.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ON_DEMAND_BACKGROUND; m_imgdb = new MyCaffeImageDatabase2(log); m_imgdb.InitializeWithDsName1(s, m_strDataset); m_ds = m_imgdb.GetDatasetByName(m_strDataset); SimpleDatum sd = m_imgdb.QueryImage(m_ds.TrainingSource.ID, 0, IMGDB_LABEL_SELECTION_METHOD.NONE, IMGDB_IMAGE_SELECTION_METHOD.NONE); BlobShape shape = new BlobShape(1, sd.Channels, sd.Height, sd.Width); if (m_evtCancel == null) { m_evtCancel = new CancelEvent(); } m_mycaffe = new MyCaffeControl <float>(s, log, m_evtCancel, null, null, null, null, strCudaPath); m_mycaffe.LoadToRun(m_strModelDesc, m_rgWeights, shape); m_log = log; }
/// <summary> /// Reshape the bottom (input) and top (output) blobs. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void Reshape(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int input_start_axis = m_param.reshape_param.axis; int start_axis = (input_start_axis >= 0) ? input_start_axis : colBottom[0].num_axes + input_start_axis + 1; m_log.CHECK_GE(start_axis, 0, "axis " + input_start_axis.ToString() + " out of range"); m_log.CHECK_LE(start_axis, colBottom[0].num_axes, "axis " + input_start_axis.ToString() + " out of range for " + colBottom[0].num_axes.ToString() + "-D input blob"); int num_axes = m_param.reshape_param.num_axes; m_log.CHECK_GE(num_axes, -1, "num_axes must be >= 0, or -1 for all"); int end_axis = (num_axes == -1) ? colBottom[0].num_axes : (start_axis + num_axes); m_log.CHECK_LE(end_axis, colBottom[0].num_axes, "end_axis = axis + num_axes is out of range"); int num_axes_replaced = end_axis - start_axis; int num_axes_retained = colBottom[0].num_axes - num_axes_replaced; BlobShape top_blob_shape = m_param.reshape_param.shape; int num_new_axes = top_blob_shape.dim.Count; List <int> rgTopShape = new List <int>(); int top_shape_index = 0; for (int i = 0; i < start_axis; i++) { rgTopShape.Add(colBottom[0].shape(i)); top_shape_index++; } for (int i = 0; i < num_new_axes; i++) { rgTopShape.Add(top_blob_shape.dim[i]); top_shape_index++; } for (int i = end_axis; i < colBottom[0].num_axes; i++) { rgTopShape.Add(colBottom[0].shape(i)); top_shape_index++; } m_log.CHECK_EQ(top_shape_index, rgTopShape.Count, "The top shape count should equal the top_shape_index."); for (int i = 0; i < m_rgCopyAxes.Count; i++) { int copy_axis_index = m_rgCopyAxes[i]; m_log.CHECK_GT(colBottom[0].num_axes, start_axis + copy_axis_index, "new shape contains a 0, but there was no corresponding bottom axis to copy"); rgTopShape[start_axis + copy_axis_index] = colBottom[0].shape(start_axis + copy_axis_index); } if (m_nInferredAxis >= 0) { // A -1 dim was specified; infer the correct dimension by computing the // product of the other dimensions. int explicit_count = m_nConstantCount; explicit_count *= colBottom[0].count(0, start_axis); explicit_count *= colBottom[0].count(end_axis); for (int i = 0; i < m_rgCopyAxes.Count; i++) { int copy_axis_index = m_rgCopyAxes[i]; explicit_count *= rgTopShape[start_axis + copy_axis_index]; } m_log.CHECK_EQ(0, colBottom[0].count() % explicit_count, "bottom count (" + colBottom[0].count().ToString() + ") must be divisible by the product of the specified dimensions( " + explicit_count.ToString() + ")"); int inferred_dim = colBottom[0].count() / explicit_count; rgTopShape[start_axis + m_nInferredAxis] = inferred_dim; } colTop[0].Reshape(rgTopShape); m_log.CHECK_EQ(colTop[0].count(), colBottom[0].count(), "output count must match input count"); colTop[0].ShareData(colBottom[0]); colTop[0].ShareDiff(colBottom[0]); }
/// <summary> /// Fills the NetParameter with the LSTM network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput * 4; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT); List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("c_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone()); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone()); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xc")); x_transform_param.parameters.Add(new ParamSpec("b_c")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xc_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xc_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xc_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xc_x_static_reshape"; reshape_param.bottom.Add("W_xc_x_static_preshape"); reshape_param.top.Add("W_xc_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xc_x_slice"; x_slice_param.bottom.Add("W_xc_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "h_concat"; output_concat_layer.top.Add("h"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xc_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.group_start = true; cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "transform_" + ts; w_param.parameters.Add(new ParamSpec("W_hc")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hc_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add the outputs of the linear transformations to compute the gate input. // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter input_sum_layer = sum_param.Clone(false); input_sum_layer.name = "gate_input_" + ts; input_sum_layer.bottom.Add("W_hc_h_" + tm1s); input_sum_layer.bottom.Add("W_xc_x_" + ts); if (m_bStaticInput) { input_sum_layer.bottom.Add("W_xc_x_static"); } input_sum_layer.top.Add("gate_input_" + ts); net_param.layer.Add(input_sum_layer); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT); lstm_unit_param.bottom.Add("c_" + tm1s); lstm_unit_param.bottom.Add("gate_input_" + ts); lstm_unit_param.bottom.Add("cont_" + ts); lstm_unit_param.top.Add("c_" + ts); lstm_unit_param.top.Add("h_" + ts); lstm_unit_param.name = "unit_" + ts; net_param.layer.Add(lstm_unit_param); } output_concat_layer.bottom.Add("h_" + ts); } { LayerParameter c_T_copy_param = split_param.Clone(false); c_T_copy_param.bottom.Add("c_" + m_nT.ToString()); c_T_copy_param.top.Add("c_T"); net_param.layer.Add(c_T_copy_param); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Fills the NetParameter with the RNN network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH); LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0]); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xh_x = W_xh * x + b_h { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xh")); x_transform_param.parameters.Add(new ParamSpec("b_h")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xh_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xh_x_static = W_xh_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xh_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xh_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xh_x_static_reshape"; reshape_param.bottom.Add("W_xh_x_static_preshape"); reshape_param.top.Add("W_xh_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xh_x_slice"; x_slice_param.bottom.Add("W_xh_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "o_concat"; output_concat_layer.top.Add("o"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xh_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hh_h_{t-1} := W_hh * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "W_hh_h_" + tm1s; w_param.parameters.Add(new ParamSpec("W_hh")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hh_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layers to compute // h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h ) // = \tanh( W_hh_h_{t-1} + W_xh_t ) { LayerParameter h_input_sum_param = sum_param.Clone(false); h_input_sum_param.name = "h_input_sum_" + ts; h_input_sum_param.bottom.Add("W_hh_h_" + tm1s); h_input_sum_param.bottom.Add("W_xh_x_" + ts); if (m_bStaticInput) { h_input_sum_param.bottom.Add("W_xh_x_static"); } h_input_sum_param.top.Add("h_neuron_input_" + ts); net_param.layer.Add(h_input_sum_param); } { LayerParameter h_neuron_param = tanh_param.Clone(false); h_neuron_param.name = "h_neuron_input_" + ts; h_neuron_param.bottom.Add("h_neuron_input_" + ts); h_neuron_param.top.Add("h_" + ts); net_param.layer.Add(h_neuron_param); } // Add layer to compute // W_ho_h_t := W_ho * h_t + b_o { LayerParameter w_param = biased_hidden_param.Clone(false); w_param.name = "W_ho_h_" + ts; w_param.parameters.Add(new ParamSpec("W_ho")); w_param.parameters.Add(new ParamSpec("b_o")); w_param.bottom.Add("h_" + ts); w_param.top.Add("W_ho_h_" + ts); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layer to compute // o_t := \tanh( W_ho * h_t + b_o // = \tanh( W_ho_h_t ) { LayerParameter o_neuron_param = tanh_param.Clone(false); o_neuron_param.name = "o_neuron_" + ts; o_neuron_param.bottom.Add("W_ho_h_" + ts); o_neuron_param.top.Add("o_" + ts); net_param.layer.Add(o_neuron_param); } output_concat_layer.bottom.Add("o_" + ts); } net_param.layer.Add(output_concat_layer.Clone(false)); }
private void layerSetUpCaffe(BlobCollection <T> colBottom, BlobCollection <T> colTop) { // Get (recurrent) input/output names. List <string> rgOutputNames = new List <string>(); OutputBlobNames(rgOutputNames); List <string> rgRecurInputNames = new List <string>(); RecurrentInputBlobNames(rgRecurInputNames); List <string> rgRecurOutputNames = new List <string>(); RecurrentOutputBlobNames(rgRecurOutputNames); int nNumRecurBlobs = rgRecurInputNames.Count; m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names."); // If provided, bottom[2] is a static input to the recurrent net. int nNumHiddenExposed = (m_bExposeHidden) ? nNumRecurBlobs : 0; m_bStaticInput = (colBottom.Count > 2 + nNumHiddenExposed) ? true : false; if (m_bStaticInput) { m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1"); m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString()); } // Create a NetParameter; setup the inputs that aren't unique to particular // recurrent architectures. NetParameter net_param = new NetParameter(); LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer.top.Add("x"); BlobShape input_shape1 = new param.BlobShape(); for (int i = 0; i < colBottom[0].num_axes; i++) { input_shape1.dim.Add(colBottom[0].shape(i)); } input_layer.input_param.shape.Add(input_shape1); input_layer.top.Add("cont"); BlobShape input_shape2 = new param.BlobShape(); for (int i = 0; i < colBottom[1].num_axes; i++) { input_shape2.dim.Add(colBottom[1].shape(i)); } input_layer.input_param.shape.Add(input_shape2); if (m_bStaticInput) { input_layer.top.Add("x_static"); BlobShape input_shape3 = new BlobShape(); for (int i = 0; i < colBottom[2].num_axes; i++) { input_shape3.dim.Add(colBottom[2].shape(i)); } input_layer.input_param.shape.Add(input_shape3); } net_param.layer.Add(input_layer); // Call the child's FillUnrolledNet implementation to specify the unrolled // recurrent architecture. FillUnrolledNet(net_param); // Prepend this layer's name to the names of each layer in the unrolled net. string strLayerName = m_param.name; if (strLayerName.Length > 0) { for (int i = 0; i < net_param.layer.Count; i++) { LayerParameter layer = net_param.layer[i]; layer.name = strLayerName + "_" + layer.name; } } // Add 'pseudo-losses' to all outputs to force backpropagation. // (Setting force_backward is too agressive as we may not need to backprop to // all inputs, e.g., the sequence continuation indicators.) List <string> rgPseudoLosses = new List <string>(); for (int i = 0; i < rgOutputNames.Count; i++) { rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss"); LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]); layer.bottom.Add(rgOutputNames[i]); layer.top.Add(rgPseudoLosses[i]); layer.loss_weight.Add(1.0); net_param.layer.Add(layer); } // Create the unrolled net. Net <T> sharedNet = null; if (m_param is LayerParameterEx <T> ) { RecurrentLayer <T> sharedLayer = ((LayerParameterEx <T>)m_param).SharedLayer as RecurrentLayer <T>; if (sharedLayer != null) { sharedNet = sharedLayer.m_unrolledNet; } } m_unrolledNet = new Net <T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet); m_unrolledNet.set_debug_info(m_param.recurrent_param.debug_info); // Setup pointers to the inputs. m_blobXInputBlob = m_unrolledNet.blob_by_name("x"); m_blobContInputBlob = m_unrolledNet.blob_by_name("cont"); if (m_bStaticInput) { m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static"); } // Setup pointers to paired recurrent inputs/outputs. m_colRecurInputBlobs = new common.BlobCollection <T>(); m_colRecurOutputBlobs = new common.BlobCollection <T>(); for (int i = 0; i < nNumRecurBlobs; i++) { m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i])); m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i])); } // Setup pointers to outputs. m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top."); m_colOutputBlobs = new common.BlobCollection <T>(); for (int i = 0; i < rgOutputNames.Count; i++) { m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i])); } // We should have 2 inputs (x and cont), plus a number of recurrent inputs, // plus maybe a static input. int nStaticInput = (m_bStaticInput) ? 1 : 0; m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")"); // This layer's parameters are any parameters in the layers of the unrolled // net. We only want one copy of each parameter, so check that the parameter // is 'owned' by the layer, rather than shared with another. blobs.Clear(); for (int i = 0; i < m_unrolledNet.parameters.Count; i++) { if (m_unrolledNet.param_owners[i] == -1) { m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]); blobs.Add(m_unrolledNet.parameters[i]); } } // Check that param_propagate_down is set for all of the parameters in the // unrolled net; set param_propagate_down to true in this layer. for (int i = 0; i < m_unrolledNet.layers.Count; i++) { for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++) { m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString()); } } m_rgbParamPropagateDown = new DictionaryMap <bool>(blobs.Count, true); // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across // batches. for (int i = 0; i < m_colRecurOutputBlobs.Count; i++) { m_colRecurOutputBlobs[i].SetDiff(0); } // Check that the last output_names.count layers are the pseudo-losses; // set last_layer_index so that we don't actually run these layers. List <string> rgLayerNames = m_unrolledNet.layer_names; m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count; for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++) { m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]); } }
/** @copydoc LayerParameterBase::Copy */ public override void Copy(LayerParameterBase src) { TransposeParameter p = (TransposeParameter)src; m_shape = p.m_shape.Clone(); }
/** @copydoc LayerParameterBase::Copy */ public override void Copy(LayerParameterBase src) { SqueezeParameter p = (SqueezeParameter)src; m_shape = p.m_shape.Clone(); }