/// <summary> /// The GramLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter.</param> public GramLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.GRAM; }
/// <summary> /// The TanhLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type TANH with parameter tanh_param, /// with options: /// - engine. The engine to use, either Engine.CAFFE, or Engine.CUDNN. /// </param> public TanhLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.TANH; }
/// <summary> /// The ConvolutionOctaveLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">provides FlattenParameter flatten_param /// </param> public ConvolutionOctaveLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.CONVOLUTION_OCTAVE; }
/// <summary> /// The DeconvolutionLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p"> /// Provides ConvolutionParameter convolution_param with DeconvolutionLayer options: /// - num_output. The number of filters. /// /// - kernel_size / kernel_h / kernel_w. The filter dimensions, given by /// kernel_size for square filters or kernel_h and kernel-w for rectangular /// filters. /// /// - stride / stride_h / stride_w. (\b optional, default 1). The filter /// stride, given by stride_size for equal dimensions of stride_h and stride_w /// for different strides. By default the convolution is dense with stride 1. /// /// - pad / pad_h / pad_w. (\b optional, default 0). The zero-padding for /// convolutions, given by pad for equal dimensions or pad_h and pad_w for /// different padding. Input padding is computed implicitly instead of /// actual padding. /// /// - dilation (\b optional, default 1). The filter /// dilation, given by dilation_size for equal dimensions for different /// dilation. By default the convolution has dilation 1. /// /// - group (\b optional, default 1). The number of filter groups. Group /// convolution is a method for reducing parameterization by selectively /// connecting input and output channels. The input and output channel dimensions /// must be divisible by the number of groups. For group = 1, the /// convolutionjf ilters input and output channels are separeated s.t. each /// group takes 1/group of the input channels and makes 1/group of the /// output channels. Concretely 4 input channels, 8 output channels, and /// 2 groups separate input chanels 1-2 and output channels 1-4 into the /// first group and input channels 3-4 and output channels 5-8 into the xecond /// group. /// /// - bias_term (\b optional, default, true). Whether to have a bias. /// /// - engine: convolution has Engine.CAFFE (matrix multiplication) and Engine.CUDNN (library /// kernels + stream parallelism) engines. /// </param> public DeconvolutionLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.DECONVOLUTION; }
/// <summary> /// The DebugLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">provides DebugParameter debug_param with options: /// - max_stored_batches. Specifies the number of batches that the DebugLayer should store. /// </param> public DebugLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.DEBUG; m_nMaxBatches = p.debug_param.max_stored_batches; }
/// <summary> /// The ClipLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type Clip with parameter Clip_param, /// with options: /// - min the value @f$ \min @f$ /// - max the value @f$ \max @f$ /// </param> public ClipLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.CLIP; }
/// <summary> /// The EventLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter.</param> public EventLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.EVENT; }
/// <summary> /// Fills the NetParameter with the LSTM network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput * 4; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT); List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("c_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone()); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone()); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xc")); x_transform_param.parameters.Add(new ParamSpec("b_c")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xc_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xc_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xc_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xc_x_static_reshape"; reshape_param.bottom.Add("W_xc_x_static_preshape"); reshape_param.top.Add("W_xc_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xc_x_slice"; x_slice_param.bottom.Add("W_xc_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "h_concat"; output_concat_layer.top.Add("h"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xc_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.group_start = true; cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "transform_" + ts; w_param.parameters.Add(new ParamSpec("W_hc")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hc_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add the outputs of the linear transformations to compute the gate input. // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter input_sum_layer = sum_param.Clone(false); input_sum_layer.name = "gate_input_" + ts; input_sum_layer.bottom.Add("W_hc_h_" + tm1s); input_sum_layer.bottom.Add("W_xc_x_" + ts); if (m_bStaticInput) { input_sum_layer.bottom.Add("W_xc_x_static"); } input_sum_layer.top.Add("gate_input_" + ts); net_param.layer.Add(input_sum_layer); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT); lstm_unit_param.bottom.Add("c_" + tm1s); lstm_unit_param.bottom.Add("gate_input_" + ts); lstm_unit_param.bottom.Add("cont_" + ts); lstm_unit_param.top.Add("c_" + ts); lstm_unit_param.top.Add("h_" + ts); lstm_unit_param.name = "unit_" + ts; net_param.layer.Add(lstm_unit_param); } output_concat_layer.bottom.Add("h_" + ts); } { LayerParameter c_T_copy_param = split_param.Clone(false); c_T_copy_param.bottom.Add("c_" + m_nT.ToString()); c_T_copy_param.top.Add("c_T"); net_param.layer.Add(c_T_copy_param); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Create the model used to train the Encoder/Decoder using the TextData Layer as input. /// Seq2Seq model using two LSTM layers where the first /// acts as the Encoder and the second the Decoder. /// </summary> /// <param name="strInputFile">Specifies the input data.</param> /// <param name="strTargetFile">Specifies the target data.</param> /// <param name="nHiddenCount">Specifies hidden data count.</param> /// <param name="nWordSize">Specifies the size of the word embeddings.</param> /// <param name="phase">Specifies phase of the model to create.</param> /// <returns>The NetParameter of the model is returned.</returns> public NetParameter CreateModel(string strInputFile, string strTargetFile, int nHiddenCount, int nWordSize, bool bUseSoftmax, bool bUseExternalIp, Phase phase = Phase.TRAIN) { m_nHidden = nHiddenCount; NetParameter net = new NetParameter(); // Add data input layer that takes care of loading inputs and feeding the data // to the network. LayerParameter data = new LayerParameter(LayerParameter.LayerType.TEXT_DATA); data.name = "data"; data.text_data_param.time_steps = (uint)m_nTimeSteps; data.text_data_param.batch_size = (uint)m_nBatch; data.text_data_param.enable_normal_encoder_output = true; data.text_data_param.enable_reverse_encoder_output = true; data.text_data_param.encoder_source = strInputFile; data.text_data_param.decoder_source = strTargetFile; data.text_data_param.sample_size = (uint)m_nSampleSize; data.text_data_param.shuffle = true; if (phase == Phase.RUN) { // Loaded with TextDataLayer.PreProcessInput data.bottom.Add("idec"); // decoder input data.bottom.Add("ienc"); // encoder input data.bottom.Add("iencr"); // encoder inputr data.bottom.Add("iencc"); // encoder clip } data.top.Add("dec_input"); data.top.Add("clipD"); data.top.Add("data"); data.top.Add("datar"); data.top.Add("clipE"); data.top.Add("vocabcount"); data.top.Add("label"); net.layer.Add(data); // Create the embedding layer that converts sentence word indexes into an embedding of // size nWordSize for each word in the sentence. LayerParameter embed1 = new LayerParameter(LayerParameter.LayerType.EMBED); embed1.embed_param.input_dim = 1; // (uint)nVocabCount + 2; (set via bottom[6]) embed1.embed_param.num_output = (uint)nWordSize; // Word size. embed1.embed_param.bias_term = true; embed1.embed_param.weight_filler = m_fillerParam; embed1.parameters.Add(new ParamSpec("embed_wts")); embed1.parameters.Add(new ParamSpec("embed_bias")); embed1.bottom.Add("data"); embed1.bottom.Add("vocabcount"); embed1.top.Add("embed1"); net.layer.Add(embed1); // Create the encoder layer that encodes the input 'ip1' image representatons, // learned from the input model. LayerParameter lstm1 = new LayerParameter(LayerParameter.LayerType.LSTM); lstm1.recurrent_param.bias_filler = new FillerParameter("constant", 0); lstm1.recurrent_param.weight_filler = m_fillerParam; lstm1.recurrent_param.engine = EngineParameter.Engine.CUDNN; lstm1.recurrent_param.num_output = (uint)m_nHidden; lstm1.recurrent_param.num_layers = 2; lstm1.recurrent_param.dropout_ratio = 0.1; lstm1.name = "encoder1"; lstm1.bottom.Add("embed1"); lstm1.bottom.Add("clipE"); lstm1.top.Add("lstm1"); net.layer.Add(lstm1); // Create the embedding layer that converts sentence word indexes into an embedding of // size nWordSize for each word in the sentence. LayerParameter embed2 = new LayerParameter(LayerParameter.LayerType.EMBED); embed2.embed_param.input_dim = 1; // (uint)nVocabCount + 2; (set via bottom[6]) embed2.embed_param.num_output = (uint)nWordSize; // Word size. embed2.embed_param.bias_term = true; embed2.embed_param.weight_filler = m_fillerParam; embed2.parameters.Add(new ParamSpec("embed_wts")); embed2.parameters.Add(new ParamSpec("embed_bias")); embed2.bottom.Add("datar"); embed2.bottom.Add("vocabcount"); embed2.top.Add("embed2"); net.layer.Add(embed2); // Create the encoder layer that encodes the input 'ip1' image representatons, // learned from the input model. LayerParameter lstm2 = new LayerParameter(LayerParameter.LayerType.LSTM); lstm2.recurrent_param.bias_filler = new FillerParameter("constant", 0); lstm2.recurrent_param.weight_filler = m_fillerParam; lstm2.recurrent_param.engine = EngineParameter.Engine.CUDNN; lstm2.recurrent_param.num_output = (uint)m_nHidden; lstm2.recurrent_param.num_layers = 2; lstm2.recurrent_param.dropout_ratio = 0.1; lstm2.name = "encoder2"; lstm2.bottom.Add("embed2"); lstm2.bottom.Add("clipE"); lstm2.top.Add("lstm2"); net.layer.Add(lstm2); LayerParameter concat = new LayerParameter(LayerParameter.LayerType.CONCAT); concat.concat_param.axis = 2; concat.bottom.Add("lstm1"); concat.bottom.Add("lstm2"); concat.top.Add("encoded"); net.layer.Add(concat); // Create embedding for decoder input. LayerParameter embed3 = new LayerParameter(LayerParameter.LayerType.EMBED); embed3.name = "dec_input_embed"; embed3.embed_param.input_dim = 1; // (uint)nVocabCount + 2; (set via bottom[6]) embed3.embed_param.num_output = (uint)nWordSize; // Word size. embed3.embed_param.bias_term = true; embed3.embed_param.weight_filler = m_fillerParam; embed3.bottom.Add("dec_input"); embed3.bottom.Add("vocabcount"); embed3.top.Add("dec_input_embed"); net.layer.Add(embed3); LayerParameter lstm3 = new LayerParameter(LayerParameter.LayerType.LSTM_ATTENTION); lstm3.lstm_attention_param.bias_filler = new FillerParameter("constant", 0); lstm3.lstm_attention_param.weight_filler = m_fillerParam; lstm3.lstm_attention_param.num_output = (uint)m_nHidden; lstm3.lstm_attention_param.num_output_ip = (uint)((bUseExternalIp) ? 0 : 1); // (uint)nVocabCount + 2; (set via bottom[6]) lstm3.lstm_attention_param.enable_attention = true; lstm3.name = "decoder"; lstm3.bottom.Add("dec_input_embed"); lstm3.bottom.Add("clipD"); lstm3.bottom.Add("encoded"); lstm3.bottom.Add("clipE"); if (!bUseExternalIp) { lstm3.bottom.Add("vocabcount"); lstm3.top.Add("ip1"); } else { lstm3.top.Add("lstm3"); } net.layer.Add(lstm3); if (bUseExternalIp) { LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT); ip1.inner_product_param.axis = 2; ip1.inner_product_param.bias_filler = new FillerParameter("constant", 0); ip1.inner_product_param.weight_filler = m_fillerParam; ip1.inner_product_param.bias_term = true; ip1.bottom.Add("lstm3"); ip1.bottom.Add("vocabcount"); ip1.top.Add("ip1"); net.layer.Add(ip1); } if (phase != Phase.RUN) { if (bUseSoftmax) { LayerParameter loss = new LayerParameter(LayerParameter.LayerType.SOFTMAXCROSSENTROPY_LOSS); loss.name = "loss"; loss.softmax_param.axis = 2; loss.bottom.Add("ip1"); loss.bottom.Add("label"); loss.top.Add("loss"); net.layer.Add(loss); } else { LayerParameter loss = new LayerParameter(LayerParameter.LayerType.MEMORY_LOSS); loss.name = "loss"; loss.loss_param.normalization = LossParameter.NormalizationMode.NONE; loss.bottom.Add("ip1"); loss.bottom.Add("label"); loss.top.Add("loss"); net.layer.Add(loss); } LayerParameter accuracy = new LayerParameter(LayerParameter.LayerType.ACCURACY); accuracy.accuracy_param.axis = 2; accuracy.accuracy_param.ignore_label = 0; accuracy.bottom.Add("ip1"); accuracy.bottom.Add("label"); accuracy.top.Add("accuracy"); accuracy.include.Add(new NetStateRule(Phase.TEST)); net.layer.Add(accuracy); } else { LayerParameter output = new LayerParameter(LayerParameter.LayerType.SOFTMAX); output.softmax_param.axis = 2; output.bottom.Add("ip1"); output.top.Add("softmax"); net.layer.Add(output); } LayerParameter silence = new LayerParameter(LayerParameter.LayerType.SILENCE); silence.bottom.Add("label"); silence.include.Add(new NetStateRule(Phase.RUN)); net.layer.Add(silence); return(net); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { LSTMAttentionParameter p = m_param.lstm_attention_param; if (m_param.lstm_attention_param.enable_attention) { m_log.CHECK_GE(colBottom.Count, 4, "When using attention, four bottoms are required: x, xClip, encoding, encodingClip."); m_log.CHECK_LE(colBottom.Count, 5, "When using attention, four bottoms are required: x, xClip, encoding, encodingClip, vocabcount (optional)."); if (colBottom.Count == 5) { if (p.num_output_ip != 0) { p.num_output_ip = (uint)convertF(colBottom[4].GetData(0)); } } } else { m_log.CHECK_GE(colBottom.Count, 1, "When not using attention, at least one bottom is required: x."); m_log.CHECK_LE(colBottom.Count, 2, "When not using attention, no more than two bottoms is required: x, clip."); } m_dfClippingThreshold = p.clipping_threshold; m_nN = colBottom[0].channels; m_nH = (int)p.num_output; // number of hidden units. m_nI = colBottom[0].count(2); // input dimension. // Check if we need to set up the weights. if (m_colBlobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else { m_colBlobs = new BlobCollection <T>(); Filler <T> weight_filler = Filler <T> .Create(m_cuda, m_log, p.weight_filler); Filler <T> bias_filler = Filler <T> .Create(m_cuda, m_log, p.bias_filler); // input-to-hidden weights // Initialize the weight. List <int> rgShape1 = new List <int>() { 4 * m_nH, m_nI }; Blob <T> blobWeights_I_H = new Blob <T>(m_cuda, m_log); blobWeights_I_H.Name = m_param.name + " weights I to H"; blobWeights_I_H.type = BLOB_TYPE.WEIGHT; if (!shareParameter(blobWeights_I_H, rgShape1)) { blobWeights_I_H.Reshape(rgShape1); weight_filler.Fill(blobWeights_I_H); } m_nWeightItoHidx = m_colBlobs.Count; m_colBlobs.Add(blobWeights_I_H); // hidden-to-hidden weights // Initialize the weight. List <int> rgShape2 = new List <int>() { 4 * m_nH, m_nH }; Blob <T> blobWeights_H_H = new Blob <T>(m_cuda, m_log); blobWeights_H_H.Name = m_param.name + " weights H to H"; blobWeights_H_H.type = BLOB_TYPE.WEIGHT; if (!shareParameter(blobWeights_H_H, rgShape2)) { blobWeights_H_H.Reshape(rgShape2); weight_filler.Fill(blobWeights_H_H); } m_nWeightHtoHidx = m_colBlobs.Count; m_colBlobs.Add(blobWeights_H_H); // If necessary, initialize and fill the bias term. List <int> rgShape3 = new List <int>() { 4 * m_nH }; Blob <T> blobBias = new Blob <T>(m_cuda, m_log); blobBias.Name = m_param.name + " bias weights"; blobBias.type = BLOB_TYPE.WEIGHT; if (!shareParameter(blobBias, rgShape3)) { blobBias.Reshape(rgShape3); bias_filler.Fill(blobBias); } m_nWeightBiasidx = m_colBlobs.Count; m_colBlobs.Add(blobBias); // Initialize the bias for the forget gate to 5.0 as described in the // Clockwork RNN paper: // [1] Koutnik, J., Greff, K., Gomez, F., Schmidhuber, J., 'A Clockwork RNN', 2014" if (p.enable_clockwork_forgetgate_bias) { double[] rgBias = convertD(blobBias.mutable_cpu_data); for (int i = m_nH; i < 2 * m_nH; i++) { rgBias[i] = 5.0; } blobBias.mutable_cpu_data = convert(rgBias); } if (m_param.lstm_attention_param.num_output_ip > 0) { Blob <T> blobWeightWhd = new Blob <T>(m_cuda, m_log); blobWeightWhd.Name = m_param.name + " weights Whd"; blobWeightWhd.type = BLOB_TYPE.WEIGHT; List <int> rgShapeWhd = new List <int>() { m_nH, (int)m_param.lstm_attention_param.num_output_ip }; if (!shareParameter(blobWeightWhd, rgShapeWhd)) { blobWeightWhd.Reshape(rgShapeWhd); weight_filler.Fill(blobWeightWhd); } m_nWeightWhdidx = m_colBlobs.Count; m_colBlobs.Add(blobWeightWhd); Blob <T> blobWeightWhdb = new Blob <T>(m_cuda, m_log); blobWeightWhdb.Name = m_param.name + " weights Whdb"; blobWeightWhdb.type = BLOB_TYPE.WEIGHT; List <int> rgShapeWhdb = new List <int>() { 1, (int)m_param.lstm_attention_param.num_output_ip }; if (!shareParameter(blobWeightWhdb, rgShape1)) { blobWeightWhdb.Reshape(rgShapeWhdb); bias_filler.Fill(blobWeightWhdb); } m_nWeightWhdbidx = m_colBlobs.Count; m_colBlobs.Add(blobWeightWhdb); } if (m_param.lstm_attention_param.enable_attention) { // context-to-hidden weights // Initialize the weight. Blob <T> blobWeights_C_H = new Blob <T>(m_cuda, m_log); blobWeights_C_H.Name = m_param.name + " weights C to H"; blobWeights_C_H.type = BLOB_TYPE.WEIGHT; if (!shareParameter(blobWeights_C_H, rgShape1)) { blobWeights_C_H.Reshape(rgShape1); // same shape as I to H weight_filler.Fill(blobWeights_C_H); } m_nWeightCtoHidx = m_colBlobs.Count; m_colBlobs.Add(blobWeights_C_H); } } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); List <int> rgCellShape = new List <int>() { m_nN, m_nH }; m_blob_C_0.Reshape(rgCellShape); m_blob_H_0.Reshape(rgCellShape); m_blob_C_T.Reshape(rgCellShape); m_blob_H_T.Reshape(rgCellShape); m_blob_H_to_H.Reshape(rgCellShape); List <int> rgGateShape = new List <int>() { m_nN, 4, m_nH }; m_blob_H_to_Gate.Reshape(rgGateShape); // Attention settings if (m_param.lstm_attention_param.enable_attention) { m_blob_C_to_Gate = new Blob <T>(m_cuda, m_log, false); m_blob_C_to_Gate.Name = m_param.name + "c_to_gate"; m_blob_C_to_Gate.Reshape(rgGateShape); m_blobContext = new Blob <T>(m_cuda, m_log); m_blobContext.Name = "context_out"; m_blobContextFull = new Blob <T>(m_cuda, m_log); m_blobContextFull.Name = "context_full"; m_blobPrevCt = new Blob <T>(m_cuda, m_log); m_blobPrevCt.Name = "prev_ct"; LayerParameter attentionParam = new LayerParameter(LayerParameter.LayerType.ATTENTION); attentionParam.attention_param.axis = 2; attentionParam.attention_param.dim = m_param.lstm_attention_param.num_output; attentionParam.attention_param.weight_filler = m_param.lstm_attention_param.weight_filler; attentionParam.attention_param.bias_filler = m_param.lstm_attention_param.bias_filler; if (m_param is LayerParameterEx <T> ) { LayerParameterEx <T> pEx = m_param as LayerParameterEx <T>; attentionParam = new LayerParameterEx <T>(attentionParam, pEx.SharedBlobs, pEx.SharedLayerBlobs, pEx.SharedLayer); } m_attention = new AttentionLayer <T>(m_cuda, m_log, attentionParam); Blob <T> blobEncoding = colBottom[2]; Blob <T> blobEncodingClip = colBottom[3]; addInternal(new List <Blob <T> >() { blobEncoding, m_blob_C_T, blobEncodingClip }, m_blobContext); m_attention.Setup(m_colInternalBottom, m_colInternalTop); foreach (Blob <T> b in m_attention.blobs) { m_colBlobs.Add(b); } } }
static void Main(string[] args) { if (!sqlCheck()) { return; } Log log = new Log("test"); log.OnWriteLine += Log_OnWriteLine; CancelEvent cancel = new CancelEvent(); SettingsCaffe settings = new SettingsCaffe(); // Load all images into memory before training. settings.ImageDbLoadMethod = IMAGEDB_LOAD_METHOD.LOAD_ALL; // Use GPU ID = 0 settings.GpuIds = "0"; // Load the descriptors from their respective files string strSolver = load_file("C:\\ProgramData\\MyCaffe\\test_data\\models\\siamese\\mnist\\solver.prototxt"); string strModel = load_file("C:\\ProgramData\\MyCaffe\\test_data\\models\\siamese\\mnist\\train_val.prototxt"); RawProto proto = RawProto.Parse(strModel); NetParameter net_param = NetParameter.FromProto(proto); LayerParameter layer = net_param.FindLayer(LayerParameter.LayerType.DECODE); layer.decode_param.target = DecodeParameter.TARGET.CENTROID; proto = net_param.ToProto("root"); strModel = proto.ToString(); // Load the MNIST data descriptor. DatasetFactory factory = new DatasetFactory(); DatasetDescriptor ds = factory.LoadDataset("MNIST"); // Create a test project with the dataset and descriptors ProjectEx project = new ProjectEx("Test"); project.SetDataset(ds); project.ModelDescription = strModel; project.SolverDescription = strSolver; // Crate the MyCaffeControl (with the 'float' base type) string strCudaPath = "C:\\Program Files\\SignalPop\\MyCaffe\\cuda_11.3\\CudaDnnDll.11.3.dll"; MyCaffeControl <float> mycaffe = new MyCaffeControl <float>(settings, log, cancel, null, null, null, null, strCudaPath); // Load the project, using the TRAIN phase. mycaffe.Load(Phase.TRAIN, project); // Train the model for 4000 iterations // (which uses the internal solver and internal training net) int nIterations = 4000; mycaffe.Train(nIterations); // Test the model for 100 iterations // (which uses the internal testing net) nIterations = 100; double dfAccuracy = mycaffe.Test(nIterations); // Report the testing accuracy. log.WriteLine("Accuracy = " + dfAccuracy.ToString("P")); mycaffe.Dispose(); Console.Write("Press any key..."); Console.ReadKey(); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Cuda engine.</param> /// <param name="log">General log.</param> /// <param name="p">provides LossParameter loss_param, with options: /// - ignore_label (optional) /// Specify a label value that whould be ignored when computing the loss. /// - normalize (optional, default true) /// If true, the loss is normalized by the number of (nonignored) labels /// present; otherwise the loss is imply summed over spatial locations. /// </param> public MemoryLossLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.MEMORY_LOSS; }
/// <summary> /// Create the model used to train the Encoder/Decoder /// Seq2Seq model using two LSTM layers where the first /// acts as the Encoder and the second the Decoder. /// </summary> /// <param name="nInputData">Specifies the count of the input data.</param> /// <param name="nInputLabel">Specifies the count of the label data.</param> /// <param name="nBatchOverride">Specifies an override for the batch count.</param> /// <param name="nTimeStepOverride">Specifies an override for the time-step count.</param> /// <returns>The NetParameter of the model is returned.</returns> public NetParameter CreateModel(int nHiddenCount, int nWordSize, int nVocabCount, Phase phase = Phase.TRAIN) { m_nHidden = nHiddenCount; NetParameter net = new NetParameter(); // Encoder Data inputs LayerParameter dataE = new LayerParameter(LayerParameter.LayerType.INPUT); dataE.top.Add("data"); dataE.input_param.shape.Add(new BlobShape(new List <int>() { m_nTimeSteps, m_nBatch, 1 })); dataE.top.Add("datar"); dataE.input_param.shape.Add(new BlobShape(new List <int>() { m_nTimeSteps, m_nBatch, 1 })); net.layer.Add(dataE); // Encoder Clip inputs LayerParameter clipE = new LayerParameter(LayerParameter.LayerType.INPUT); clipE.top.Add("clipE"); clipE.input_param.shape.Add(new BlobShape(new List <int>() { m_nTimeSteps, m_nBatch })); net.layer.Add(clipE); // Decoder Data inputs LayerParameter dataD = new LayerParameter(LayerParameter.LayerType.INPUT); dataD.top.Add("dec_input"); dataD.input_param.shape.Add(new BlobShape(new List <int>() { 1, m_nBatch, 1 })); if (phase != Phase.RUN) { dataD.top.Add("label"); dataD.input_param.shape.Add(new BlobShape(new List <int>() { 1, m_nBatch, 1 })); } net.layer.Add(dataD); // Decoder Clip inputs LayerParameter clipD = new LayerParameter(LayerParameter.LayerType.INPUT); clipD.top.Add("clipD"); clipD.input_param.shape.Add(new BlobShape(new List <int>() { 1, m_nBatch })); net.layer.Add(clipD); // Create the embedding layer that converts sentence word indexes into an embedding of // size nWordSize for each word in the sentence. LayerParameter embed1 = new LayerParameter(LayerParameter.LayerType.EMBED); embed1.embed_param.input_dim = (uint)nVocabCount + 2; embed1.embed_param.num_output = (uint)nWordSize; // Word size. embed1.embed_param.bias_term = true; embed1.embed_param.weight_filler = m_fillerParam; embed1.parameters.Add(new ParamSpec("embed_wts")); embed1.parameters.Add(new ParamSpec("embed_bias")); embed1.bottom.Add("data"); embed1.top.Add("embed1"); net.layer.Add(embed1); // Create the encoder layer that encodes the input 'ip1' image representatons, // learned from the input model. LayerParameter lstm1 = new LayerParameter(LayerParameter.LayerType.LSTM); lstm1.recurrent_param.bias_filler = new FillerParameter("constant", 0); lstm1.recurrent_param.weight_filler = m_fillerParam; lstm1.recurrent_param.engine = EngineParameter.Engine.CUDNN; lstm1.recurrent_param.num_output = (uint)m_nHidden; lstm1.name = "encoder1"; lstm1.bottom.Add("embed1"); lstm1.bottom.Add("clipE"); lstm1.top.Add("lstm1"); net.layer.Add(lstm1); // Create the embedding layer that converts sentence word indexes into an embedding of // size nWordSize for each word in the sentence. LayerParameter embed2 = new LayerParameter(LayerParameter.LayerType.EMBED); embed2.embed_param.input_dim = (uint)nVocabCount + 2; embed2.embed_param.num_output = (uint)nWordSize; // Word size. embed2.embed_param.bias_term = true; embed2.embed_param.weight_filler = m_fillerParam; embed2.parameters.Add(new ParamSpec("embed_wts")); embed2.parameters.Add(new ParamSpec("embed_bias")); embed2.bottom.Add("datar"); embed2.top.Add("embed2"); net.layer.Add(embed2); // Create the encoder layer that encodes the input 'ip1' image representatons, // learned from the input model. LayerParameter lstm2 = new LayerParameter(LayerParameter.LayerType.LSTM); lstm2.recurrent_param.bias_filler = new FillerParameter("constant", 0); lstm2.recurrent_param.weight_filler = m_fillerParam; lstm2.recurrent_param.engine = EngineParameter.Engine.CUDNN; lstm2.recurrent_param.num_output = (uint)m_nHidden; lstm2.name = "encoder2"; lstm2.bottom.Add("embed2"); lstm2.bottom.Add("clipE"); lstm2.top.Add("lstm2"); net.layer.Add(lstm2); LayerParameter concat = new LayerParameter(LayerParameter.LayerType.CONCAT); concat.concat_param.axis = 2; concat.bottom.Add("lstm1"); concat.bottom.Add("lstm2"); concat.top.Add("encoded"); net.layer.Add(concat); // Create embedding for decoder input. LayerParameter embed3 = new LayerParameter(LayerParameter.LayerType.EMBED); embed3.name = "dec_input_embed"; embed3.embed_param.input_dim = (uint)nVocabCount + 2; embed3.embed_param.num_output = (uint)nWordSize; // Word size. embed3.embed_param.bias_term = true; embed3.embed_param.weight_filler = m_fillerParam; embed3.bottom.Add("dec_input"); embed3.top.Add("dec_input_embed"); net.layer.Add(embed3); LayerParameter lstm3 = new LayerParameter(LayerParameter.LayerType.LSTM_ATTENTION); lstm3.lstm_attention_param.bias_filler = new FillerParameter("constant", 0); lstm3.lstm_attention_param.weight_filler = m_fillerParam; lstm3.lstm_attention_param.num_output = (uint)m_nHidden; lstm3.lstm_attention_param.num_output_ip = (uint)nVocabCount + 2; lstm3.lstm_attention_param.enable_attention = true; lstm3.name = "decoder"; lstm3.bottom.Add("dec_input_embed"); lstm3.bottom.Add("clipD"); lstm3.bottom.Add("encoded"); lstm3.bottom.Add("clipE"); lstm3.top.Add("ip1"); net.layer.Add(lstm3); if (phase != Phase.RUN) { LayerParameter loss = new LayerParameter(LayerParameter.LayerType.MEMORY_LOSS); loss.name = "loss"; loss.loss_param.normalization = LossParameter.NormalizationMode.NONE; loss.bottom.Add("ip1"); loss.bottom.Add("label"); loss.top.Add("loss"); net.layer.Add(loss); LayerParameter accuracy = new LayerParameter(LayerParameter.LayerType.ACCURACY); accuracy.accuracy_param.axis = 2; accuracy.accuracy_param.ignore_label = 0; accuracy.bottom.Add("ip1"); accuracy.bottom.Add("label"); accuracy.top.Add("accuracy"); accuracy.include.Add(new NetStateRule(Phase.TEST)); net.layer.Add(accuracy); } else { LayerParameter output = new LayerParameter(LayerParameter.LayerType.SOFTMAX); output.softmax_param.axis = 2; output.bottom.Add("ip1"); output.top.Add("softmax"); net.layer.Add(output); } return(net); }
/// <summary> /// The SigmoidLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type SIGMOID with parameter sigmoid_param, /// with options: /// - engine. The engine to use, either Engine.CAFFE, or Engine.CUDNN. /// </param> public SigmoidLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.SIGMOID; }
/// <summary> /// The AttentionLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">provides LayerParameter inner_product_param, with options: /// </param> public AttentionLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.ATTENTION; List <int> rgDimClip = new List <int>() { 1, 0 }; LayerParameter transposeClipparam = new LayerParameter(LayerParameter.LayerType.TRANSPOSE); transposeClipparam.transpose_param.dim = new List <int>(rgDimClip); m_transposeClip = new TransposeLayer <T>(cuda, log, transposeClipparam); LayerParameter ipUaParam = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT); ipUaParam.name = "ipUa"; ipUaParam.inner_product_param.axis = 2; ipUaParam.inner_product_param.num_output = m_param.attention_param.dim; ipUaParam.inner_product_param.weight_filler = m_param.attention_param.weight_filler; ipUaParam.inner_product_param.bias_filler = m_param.attention_param.bias_filler; if (m_param is LayerParameterEx <T> ) { LayerParameterEx <T> pEx = m_param as LayerParameterEx <T>; ipUaParam = new LayerParameterEx <T>(ipUaParam, pEx.SharedBlobs, pEx.SharedLayerBlobs, pEx.SharedLayer); } m_ipUa = new InnerProductLayer <T>(cuda, log, ipUaParam); LayerParameter ipWaParam = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT); ipWaParam.name = "ipWa"; ipWaParam.inner_product_param.axis = 1; ipWaParam.inner_product_param.num_output = m_param.attention_param.dim; ipWaParam.inner_product_param.weight_filler = m_param.attention_param.weight_filler; ipWaParam.inner_product_param.bias_filler = m_param.attention_param.bias_filler; if (m_param is LayerParameterEx <T> ) { LayerParameterEx <T> pEx = m_param as LayerParameterEx <T>; ipWaParam = new LayerParameterEx <T>(ipWaParam, pEx.SharedBlobs, pEx.SharedLayerBlobs, pEx.SharedLayer); } m_ipWa = new InnerProductLayer <T>(cuda, log, ipWaParam); LayerParameter addParam = new LayerParameter(LayerParameter.LayerType.ELTWISE); addParam.name = "add"; addParam.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; m_add1 = new EltwiseLayer <T>(cuda, log, addParam); LayerParameter tanhParam = new LayerParameter(LayerParameter.LayerType.TANH); tanhParam.name = "tanh"; tanhParam.tanh_param.engine = EngineParameter.Engine.CUDNN; m_tanh = new TanhLayer <T>(cuda, log, tanhParam); LayerParameter ipVParam = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT); ipVParam.name = "ipV"; ipVParam.inner_product_param.axis = 2; ipVParam.inner_product_param.num_output = 1; ipVParam.inner_product_param.bias_term = false; ipVParam.inner_product_param.weight_filler = m_param.attention_param.weight_filler; if (m_param is LayerParameterEx <T> ) { LayerParameterEx <T> pEx = m_param as LayerParameterEx <T>; ipVParam = new LayerParameterEx <T>(ipVParam, pEx.SharedBlobs, pEx.SharedLayerBlobs, pEx.SharedLayer); } m_ipV = new InnerProductLayer <T>(cuda, log, ipVParam); m_blobX = new Blob <T>(cuda, log); m_blobX.Name = "x"; m_blobClip = new Blob <T>(cuda, log); m_blobClip.Name = "clip"; m_blobX1 = new Blob <T>(cuda, log); m_blobX1.Name = "x1"; m_blobState = new Blob <T>(cuda, log); m_blobState.Name = "state"; m_blobUh = new Blob <T>(cuda, log); m_blobUh.Name = "Uh"; m_blobWc = new Blob <T>(cuda, log); m_blobWc.Name = "Wc"; m_blobFullWc = new Blob <T>(cuda, log); m_blobFullWc.Name = "Full Wc"; m_blobAddOutput = new Blob <T>(cuda, log); m_blobAddOutput.Name = "addOut"; m_blobGG = new Blob <T>(cuda, log); m_blobGG.Name = "gg"; m_blobAA = new Blob <T>(cuda, log); m_blobAA.Name = "aa"; m_blobScale = new Blob <T>(cuda, log, false); m_blobScale.Name = "scale"; m_blobSoftmax = new Blob <T>(cuda, log); m_blobSoftmax.Name = "softmax"; m_blobFocusedInput = new Blob <T>(cuda, log); m_blobFocusedInput.Name = "softmax_full"; m_blobContext = new Blob <T>(cuda, log); m_blobContext.Name = "context"; m_blobWork = new Blob <T>(cuda, log); m_blobWork.Name = "work"; }
/// <summary> /// Create the model used to train the Encoder/Decoder /// Seq2Seq model using two LSTM layers where the first /// acts as the Encoder and the second the Decoder. /// </summary> /// <param name="nInputData">Specifies the count of the input data.</param> /// <param name="nInputLabel">Specifies the count of the label data.</param> /// <param name="nBatchOverride">Specifies an override for the batch count.</param> /// <param name="nTimeStepOverride">Specifies an override for the time-step count.</param> /// <returns>The NetParameter of the model is returned.</returns> public NetParameter CreateModel(int nInputData, int nInputLabel, int?nBatchOverride = null, int?nTimeStepOverride = null) { NetParameter net = new NetParameter(); int nHidden = m_nHidden; int nBatch = (nBatchOverride.HasValue) ? nBatchOverride.Value : m_nBatch; int nSteps = (nTimeStepOverride.HasValue) ? nTimeStepOverride.Value : m_nTimeSteps; m_nInputData = nInputData; m_nInputLabel = nInputLabel; // 10,batch,1,1 LayerParameter data = new LayerParameter(LayerParameter.LayerType.INPUT); data.input_param.shape.Add(new BlobShape(new List <int>() { nSteps, nBatch, nInputData })); data.top.Add("data"); net.layer.Add(data); // 10,batch,1,1 (pred count) LayerParameter label = new LayerParameter(LayerParameter.LayerType.INPUT); label.input_param.shape.Add(new BlobShape(new List <int>() { nSteps, nBatch, nInputLabel })); label.top.Add("label"); net.layer.Add(label); // 10,batch (0 for first batch, then all 1's) LayerParameter clip1 = new LayerParameter(LayerParameter.LayerType.INPUT); clip1.input_param.shape.Add(new BlobShape(new List <int>() { nSteps, nBatch })); clip1.top.Add("clip1"); net.layer.Add(clip1); // Create the encoder layer that encodes the input 'ip1' image representatons, // learned from the input model. LayerParameter lstm1 = new LayerParameter(LayerParameter.LayerType.LSTM); if (lstm1.recurrent_param != null) { lstm1.recurrent_param.dropout_ratio = m_dfDropout; lstm1.recurrent_param.engine = m_lstmEngine; lstm1.recurrent_param.num_layers = (uint)m_nLayers; lstm1.recurrent_param.num_output = (uint)nHidden; lstm1.recurrent_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.5); lstm1.recurrent_param.bias_filler = new FillerParameter("constant", 0); } lstm1.name = "encoder"; lstm1.bottom.Add("data"); lstm1.bottom.Add("clip1"); lstm1.top.Add("lstm1"); net.layer.Add(lstm1); // Create the decoder layer used to decode the input encoding to the // data representing a section of the Sin curve. LayerParameter lstm2 = new LayerParameter(LayerParameter.LayerType.LSTM); lstm2.recurrent_param.dropout_ratio = m_dfDropout; lstm2.recurrent_param.engine = m_lstmEngine; lstm2.recurrent_param.num_layers = (uint)m_nLayers; lstm2.recurrent_param.num_output = (uint)nHidden; lstm2.recurrent_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.5); lstm2.recurrent_param.bias_filler = new FillerParameter("constant", 0); lstm2.name = "decoder"; lstm2.bottom.Add("lstm1"); lstm2.bottom.Add("clip1"); lstm2.top.Add("lstm2"); net.layer.Add(lstm2); // Combine the decoder output down to the input label count per step, // which are the number of items in the Sin curve section. LayerParameter ip1 = new LayerParameter(LayerParameter.LayerType.INNERPRODUCT); ip1.name = "ip1"; ip1.inner_product_param.num_output = (uint)nInputLabel; ip1.inner_product_param.axis = 2; ip1.inner_product_param.bias_term = true; ip1.inner_product_param.weight_filler = new FillerParameter("gaussian", 0, 0, 0.1); ip1.bottom.Add("lstm2"); ip1.top.Add("ip1"); net.layer.Add(ip1); // Calculate the loss. LayerParameter loss = new LayerParameter(LayerParameter.LayerType.EUCLIDEAN_LOSS); loss.bottom.Add("ip1"); loss.bottom.Add("label"); loss.top.Add("loss"); net.layer.Add(loss); return(net); }
/// <summary> /// The CopyLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type SILENCE.</param> public CopyLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.COPY; }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { ScaleParameter p = m_param.scale_param; if (colBottom.Count == 1 && blobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else if (colBottom.Count == 1) { // scale is a learned parameter; initialize it. m_nAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nNumAxes = p.num_axes; m_log.CHECK_GE(nNumAxes, -1, "num_axes must be non-negative, or -1 to extend to the end of bottom[0]."); if (nNumAxes >= 0) { m_log.CHECK_GE(colBottom[0].num_axes, m_nAxis + nNumAxes, "scale blob's shape extends past bottom[0]'s shape when applied starting with bottom[0] axis = " + m_nAxis.ToString()); } m_colBlobs = new BlobCollection <T>(); List <int> rgShape = new List <int>(); int nStart = m_nAxis; int nEnd = (nNumAxes == -1) ? colBottom[0].shape().Count : nStart + nNumAxes; for (int i = nStart; i < nEnd; i++) { rgShape.Add(colBottom[0].shape(i)); } Blob <T> blobScale = new Blob <T>(m_cuda, m_log); blobScale.Name = m_param.name + " scale"; if (!shareParameter(blobScale, rgShape)) { blobScale.Reshape(rgShape); FillerParameter fp = p.filler; // Default to unit (1) filler for identity operation. if (fp == null) { fp = new FillerParameter("constant", 1.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(blobScale); } m_colBlobs.Add(blobScale); } if (p.bias_term) { LayerParameter pb = new LayerParameter(LayerParameter.LayerType.BIAS); pb.bias_param.axis = p.axis; pb.bias_param.num_axes = (colBottom.Count > 1) ? colBottom[1].num_axes : p.num_axes; pb.bias_param.filler = p.bias_filler; m_colBiasBottomVec = new BlobCollection <T>(); m_colBiasBottomVec.Add(colBottom[0]); m_biasLayer = new BiasLayer <T>(m_cuda, m_log, pb); m_biasLayer.Setup(m_colBiasBottomVec, colTop); shareLayerBlobs(m_biasLayer); m_nBiasParamId = m_colBlobs.Count; m_colBlobs.Add(m_biasLayer.blobs[0]); m_rgbBiasPropagateDown = Utility.Create <bool>(1, false); } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count(), true); }
/// <summary> /// The DummyDataLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p"> /// Provides DummyDataParameter dummy_data_param with options: /// - data_filler. A list of Fillers to use. /// /// - shape. A list of shapes to use. /// </param> public DummyDataLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.DUMMYDATA; }
/// <summary> /// The ConstantLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type Constant with parameter constant_param</param> public ConstantLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.CONSTANT; }
/// <summary> /// The ReLULayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type RELU with parameter relu_param, /// with options: /// - engine. The engine to use, either Engine.CAFFE, or Engine.CUDNN. /// /// - negative_slope (/b optional, default = 0). The negative slope. Allow non-zero slope for negative inputs to speed up optimization. /// </param> public ReLULayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.RELU; }
/// <summary> /// The TripletLossLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type TRIPLET_LOSS with parameter triplet_loss_param. /// </param> public TripletLossLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.TRIPLET_LOSS; }
/// <summary> /// The FlattenLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">provides FlattenParameter flatten_param /// </param> public FlattenLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.FLATTEN; }
/// <summary> /// Fills the NetParameter with the RNN network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH); LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0]); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xh_x = W_xh * x + b_h { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xh")); x_transform_param.parameters.Add(new ParamSpec("b_h")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xh_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xh_x_static = W_xh_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xh_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xh_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xh_x_static_reshape"; reshape_param.bottom.Add("W_xh_x_static_preshape"); reshape_param.top.Add("W_xh_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xh_x_slice"; x_slice_param.bottom.Add("W_xh_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "o_concat"; output_concat_layer.top.Add("o"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xh_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hh_h_{t-1} := W_hh * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "W_hh_h_" + tm1s; w_param.parameters.Add(new ParamSpec("W_hh")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hh_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layers to compute // h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h ) // = \tanh( W_hh_h_{t-1} + W_xh_t ) { LayerParameter h_input_sum_param = sum_param.Clone(false); h_input_sum_param.name = "h_input_sum_" + ts; h_input_sum_param.bottom.Add("W_hh_h_" + tm1s); h_input_sum_param.bottom.Add("W_xh_x_" + ts); if (m_bStaticInput) { h_input_sum_param.bottom.Add("W_xh_x_static"); } h_input_sum_param.top.Add("h_neuron_input_" + ts); net_param.layer.Add(h_input_sum_param); } { LayerParameter h_neuron_param = tanh_param.Clone(false); h_neuron_param.name = "h_neuron_input_" + ts; h_neuron_param.bottom.Add("h_neuron_input_" + ts); h_neuron_param.top.Add("h_" + ts); net_param.layer.Add(h_neuron_param); } // Add layer to compute // W_ho_h_t := W_ho * h_t + b_o { LayerParameter w_param = biased_hidden_param.Clone(false); w_param.name = "W_ho_h_" + ts; w_param.parameters.Add(new ParamSpec("W_ho")); w_param.parameters.Add(new ParamSpec("b_o")); w_param.bottom.Add("h_" + ts); w_param.top.Add("W_ho_h_" + ts); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layer to compute // o_t := \tanh( W_ho * h_t + b_o // = \tanh( W_ho_h_t ) { LayerParameter o_neuron_param = tanh_param.Clone(false); o_neuron_param.name = "o_neuron_" + ts; o_neuron_param.bottom.Add("W_ho_h_" + ts); o_neuron_param.top.Add("o_" + ts); net_param.layer.Add(o_neuron_param); } output_concat_layer.bottom.Add("o_" + ts); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// The HingeLoss constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">provides LossParameter loss_param, with options: /// - ignore_label (optional) /// Specify a label value that whould be ignored when computing the loss. /// - normalize (optional, default true) /// If true, the loss is normalized by the number of (nonignored) labels /// present; otherwise the loss is imply summed over spatial locations. /// </param> public HingeLossLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.HINGE_LOSS; }
/// <summary> /// The RNNLayer constructor. /// </summary> /// <param name="cuda">Specifies the CudaDnn connection to Cuda.</param> /// <param name="log">Specifies the Log for output.</param> /// <param name="p">Specifies the LayerParameter of type RNN with parameter recurrent_param, /// with options: /// - num_output. The dimension of the output (and ususally hidden state) representation -- must be explicitly set to non-zero. /// /// - weight_filler (/b optional, default = "gaussian"). The weight filler used to initialize the weights. /// /// - bias_filler (/b optional, default = "constant, 1.0"). The bias filler used to initialize the bias values. /// /// - debug_info (/b optional, default = false). Whether or not to output extra debug information. /// /// - expose_hidden (/b optional, default = false). Whether t add as additional bottom (inputs) the initial hidden state /// Blob&s, and add a additional top (output) the final timestep hidden state Blob&s. The RNN architecture adds /// 1 additional Blob&s. /// </param> /// <param name="evtCancel">Specifies the CancelEvent used to cancel training operations.</param> public RNNLayer(CudaDnn <T> cuda, Log log, LayerParameter p, CancelEvent evtCancel) : base(cuda, log, p, evtCancel) { m_type = LayerParameter.LayerType.RNN; }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_dfAlphaIn = m_param.convolution_octave_param.alpha_in; m_dfAlphaOut = m_param.convolution_octave_param.alpha_out; m_log.CHECK_GE(m_dfAlphaIn, 0, "The alpha in must be >= 0."); m_log.CHECK_LE(m_dfAlphaIn, 1, "The alpha in must be <= 1."); m_log.CHECK_GE(m_dfAlphaOut, 0, "The alpha out must be >= 0."); m_log.CHECK_LT(m_dfAlphaOut, 1, "The alpha out must be < 1."); m_nStride = (int)m_param.convolution_param.stride[0]; m_log.CHECK_GE(m_nStride, 1, "The stride should be >= 1."); m_log.CHECK_LE(m_nStride, 2, "The stride should be <= 2."); //-------------------------------------------- // Create the blobs. //-------------------------------------------- // process high frequency. m_blob_x_h = new Blob <T>(m_cuda, m_log); m_blob_x_h.Name = "x_h"; m_blob_x_h2h = new Blob <T>(m_cuda, m_log); m_blob_x_h2h.Name = "x_h2h"; if (m_dfAlphaOut > 0) { m_blob_x_h_ds = new Blob <T>(m_cuda, m_log); m_blob_x_h_ds.Name = "x_h_ds"; m_blob_x_h2l = new Blob <T>(m_cuda, m_log); m_blob_x_h2l.Name = "x_h2l"; } // process low frequency. if (colBottom.Count > 1) { m_blob_x_l = new Blob <T>(m_cuda, m_log); m_blob_x_l.Name = "x_l"; m_blob_x_l_ds = new Blob <T>(m_cuda, m_log); m_blob_x_l_ds.Name = "x_l_ds"; m_blob_x_l2h = new Blob <T>(m_cuda, m_log); m_blob_x_l2h.Name = "x_l2h"; m_blob_x_l2h_us = new Blob <T>(m_cuda, m_log); m_blob_x_l2h_us.Name = "x_l2h_us"; m_blob_x_l2l = new Blob <T>(m_cuda, m_log); m_blob_x_l2l.Name = "x_l2l"; } //-------------------------------------------- // Create the internal layers. //-------------------------------------------- LayerParameter poolParam = new LayerParameter(LayerParameter.LayerType.POOLING, "downsample"); poolParam.pooling_param.kernel_size.Add(2); poolParam.pooling_param.stride.Add(2); poolParam.pooling_param.pool = PoolingParameter.PoolingMethod.AVE; poolParam.pooling_param.engine = m_param.convolution_param.engine; if (m_nStride == 2) { m_downsampleLayer1 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(colBottom[0], m_blob_x_h); m_downsampleLayer1.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer1.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_h.ReshapeLike(colBottom[0]); } LayerParameter convParamBase = new LayerParameter(LayerParameter.LayerType.CONVOLUTION); convParamBase.convolution_param.engine = m_param.convolution_param.engine; convParamBase.convolution_param.kernel_size = m_param.convolution_param.kernel_size; convParamBase.convolution_param.stride.Add(1); convParamBase.convolution_param.pad = m_param.convolution_param.pad; convParamBase.convolution_param.dilation = m_param.convolution_param.dilation; convParamBase.convolution_param.bias_filler = m_param.convolution_param.bias_filler; convParamBase.convolution_param.bias_term = m_param.convolution_param.bias_term; int nInChannels = colBottom[0].channels; uint nOutChannels = m_param.convolution_param.num_output; uint nGroup = m_param.convolution_param.group; uint nGroupTmp; // h2h Layer { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "h2h conv"; convParam.convolution_param.num_output = nOutChannels - (uint)(m_dfAlphaOut * nOutChannels); nGroupTmp = (uint)Math.Ceiling(nGroup - m_dfAlphaIn * nGroup); convParam.convolution_param.group = (nInChannels % nGroupTmp == 0) ? nGroupTmp : 1; m_conv_h2h = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_h2h.OnGetWorkspace += layer_OnGetWorkspace; m_conv_h2h.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_h, m_blob_x_h2h); m_conv_h2h.LayerSetUp(m_rgBtm, m_rgTop); m_conv_h2h.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_h2h.blobs); } // h2l Layer if (m_dfAlphaOut > 0) { m_downsampleLayer2 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(m_blob_x_h, m_blob_x_h_ds); m_downsampleLayer2.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer2.Reshape(m_rgBtm, m_rgTop); LayerParameter convParam = convParamBase.Clone(false); convParam.name = "h2l conv"; convParam.convolution_param.num_output = (uint)(m_dfAlphaOut * nOutChannels); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroup == 0) ? nGroup : 1; m_conv_h2l = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_h2l.OnGetWorkspace += layer_OnGetWorkspace; m_conv_h2l.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_h_ds, m_blob_x_h2l); m_conv_h2l.LayerSetUp(m_rgBtm, m_rgTop); m_conv_h2l.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_h2l.blobs); } if (colBottom.Count > 1) { m_blob_x_l.ReshapeLike(colBottom[1]); // downsample3 Layer if (m_nStride == 2) { m_downsampleLayer3 = Layer <T> .Create(m_cuda, m_log, poolParam, null); setupBtmTop(colBottom[1], m_blob_x_l_ds); m_downsampleLayer3.LayerSetUp(m_rgBtm, m_rgTop); m_downsampleLayer3.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_l_ds.ReshapeLike(m_blob_x_l); } // l2l layer if (m_dfAlphaOut > 0) { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "l2l conv"; convParam.convolution_param.num_output = (uint)(m_dfAlphaOut * nOutChannels); nGroupTmp = (uint)Math.Ceiling(m_dfAlphaIn * nGroup); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroupTmp == 0) ? nGroupTmp : 1; m_conv_l2l = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_l2l.OnGetWorkspace += layer_OnGetWorkspace; m_conv_l2l.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_l_ds, m_blob_x_l2l); m_conv_l2l.LayerSetUp(m_rgBtm, m_rgTop); m_conv_l2l.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_l2l.blobs); } // l2h Layer { LayerParameter convParam = convParamBase.Clone(false); convParam.name = "l2h conv"; convParam.convolution_param.num_output = nOutChannels - (uint)(m_dfAlphaOut * nOutChannels); convParam.convolution_param.group = (convParam.convolution_param.num_output % nGroup == 0) ? nGroup : 1; m_conv_l2h = Layer <T> .Create(m_cuda, m_log, convParam, null); m_conv_l2h.OnGetWorkspace += layer_OnGetWorkspace; m_conv_l2h.OnSetWorkspace += layer_OnSetWorkspace; setupBtmTop(m_blob_x_l, m_blob_x_l2h); m_conv_l2h.LayerSetUp(m_rgBtm, m_rgTop); m_conv_l2h.Reshape(m_rgBtm, m_rgTop); m_colBlobs.Add(m_conv_l2h.blobs); } // upsample Layer if (m_nStride == 1) { LayerParameter interpParam = new LayerParameter(LayerParameter.LayerType.INTERP, "upsample"); interpParam.interp_param.zoom_factor = 2; m_upsampleLayer = Layer <T> .Create(m_cuda, m_log, interpParam, null); setupBtmTop(m_blob_x_l2h, m_blob_x_l2h_us); m_upsampleLayer.LayerSetUp(m_rgBtm, m_rgTop); m_upsampleLayer.Reshape(m_rgBtm, m_rgTop); } else { m_blob_x_l2h_us.ReshapeLike(m_blob_x_l2h); } // add Layer LayerParameter eltAdd = new LayerParameter(LayerParameter.LayerType.ELTWISE); eltAdd.name = "eltadd"; eltAdd.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; m_add = Layer <T> .Create(m_cuda, m_log, eltAdd, null); setupBtmTop(m_blob_x_l2h_us, m_blob_x_h2h, colTop[0]); m_add.LayerSetUp(m_rgBtm, m_rgTop); m_add.Reshape(m_rgBtm, m_rgTop); if (m_dfAlphaOut > 0) { setupBtmTop(m_blob_x_h2l, m_blob_x_l2l, colTop[1]); m_add.Reshape(m_rgBtm, m_rgTop); } } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { Blob <T> blobX = colBottom[0]; Blob <T> blobCy = colBottom[1]; Blob <T> blobClip = colBottom[2]; m_log.CHECK_EQ(blobX.shape(1), 1, "Currently, only batch size = 1 is supported."); m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); List <int> rgDimX = new List <int>() { 1, 0 }; while (rgDimX.Count < colBottom[0].num_axes) { rgDimX.Add(rgDimX.Count); } LayerParameter transposeXparam = new LayerParameter(LayerParameter.LayerType.TRANSPOSE); transposeXparam.transpose_param.dim = new List <int>(rgDimX); m_transposeX = new TransposeLayer <T>(m_cuda, m_log, transposeXparam); addInternal(blobX, m_blobX); m_transposeX.Setup(m_colInternalBottom, m_colInternalTop); m_blobX1.ReshapeLike(m_blobX); addInternal(m_blobX, m_blobUh); m_ipUa.Setup(m_colInternalBottom, m_colInternalTop); addInternal(blobClip, m_blobClip); m_transposeClip.Setup(m_colInternalBottom, m_colInternalTop); // Make sure the first item is set to 1. m_blobClip.SetData(1, 0); m_blobState.ReshapeLike(blobCy); addInternal(m_blobState, m_blobWc); m_ipWa.Setup(m_colInternalBottom, m_colInternalTop); m_blobFullWc.ReshapeLike(m_blobUh); addInternal(new List <Blob <T> >() { m_blobUh, m_blobFullWc }, m_blobAddOutput); m_add1.Setup(m_colInternalBottom, m_colInternalTop); addInternal(m_blobAddOutput, m_blobGG); m_tanh.Setup(m_colInternalBottom, m_colInternalTop); addInternal(m_blobGG, m_blobAA); m_ipV.Setup(m_colInternalBottom, m_colInternalTop); List <int> rgFocusShape = Utility.Clone <int>(blobX.shape()); rgFocusShape[0] = blobX.shape(1); rgFocusShape[1] = blobX.shape(0); m_blobFocusedInput.Reshape(rgFocusShape); List <int> rgContextShape = Utility.Clone <int>(blobX.shape()); rgContextShape[0] = rgContextShape[1]; rgContextShape[1] = 1; m_blobContext.Reshape(rgContextShape); List <int> rgTopShape = Utility.Clone <int>(m_blobContext.shape()); rgTopShape[0] = m_blobContext.shape(1); rgTopShape[1] = m_blobContext.shape(0); colTop[0].Reshape(rgTopShape); blobs.Clear(); foreach (Blob <T> blob in m_ipUa.blobs) { blobs.Add(blob); } foreach (Blob <T> blob in m_ipWa.blobs) { blobs.Add(blob); } // V blobs.Add(m_ipV.blobs[0]); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Cuda engine.</param> /// <param name="log">General log.</param> /// <param name="p">provides AccuracyParameter accuracy_param, /// with AccuracyLayer options: /// - top_k (optional, default 1) /// Sets the maximumrank k at which prediction is considered /// correct, For example, if k = 5, a prediction is counted /// correct if the correct label is among the top 5 predicted labels.</param> public AccuracyLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.ACCURACY; m_blobNumsBuffer = new Blob <T>(cuda, log); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Cuda engine.</param> /// <param name="log">General log.</param> /// <param name="p">provides ArgMaxParameter argmax_param /// with ArgMaxLayer options: /// - top_k (optional uint, default 1). /// the number K of maximal items to output. /// - out_max_val (optional bool, default false). /// if set, output a vector of pairs (max_ind, max_val) unless axis is set then /// output max_val along the specified axis. /// - axis (optional int). /// if set, maximise along the specified axis else maximise the flattened /// trailing dimensions for each indes of the first / num dimension. /// </param> public ArgMaxLayer(CudaDnn <T> cuda, Log log, LayerParameter p) : base(cuda, log, p) { m_type = LayerParameter.LayerType.ARGMAX; }