/// <summary> /// Create a new Filler instance. /// </summary> /// <param name="cuda">Specifies the CudaDnn instance.</param> /// <param name="log">Specifies the log for output.</param> /// <param name="p">Specifies the filler parameter.</param> /// <returns></returns> public static Filler <T> Create(CudaDnn <T> cuda, Log log, FillerParameter p) { switch (p.type) { case "constant": return(new ConstantFiller <T>(cuda, log, p)); case "sequence": return(new SequenceFiller <T>(cuda, log, p)); case "gaussian": return(new GaussianFiller <T>(cuda, log, p)); case "uniform": return(new UniformFiller <T>(cuda, log, p)); case "positive_unitball": return(new PositiveUnitballFiller <T>(cuda, log, p)); case "xavier": return(new XavierFiller <T>(cuda, log, p)); case "msra": return(new MsraFiller <T>(cuda, log, p)); case "bilinear": return(new BilinearFiller <T>(cuda, log, p)); default: log.FAIL("Unknown filler type: " + p.type); return(null); } }
/// <summary> /// Parses the parameter from a RawProto. /// </summary> /// <param name="rp">Specifies the RawProto to parse.</param> /// <returns>A new instance of the parameter is returned.</returns> public static Normalization2Parameter FromProto(RawProto rp) { string strVal; Normalization2Parameter p = new Normalization2Parameter(); if ((strVal = rp.FindValue("across_spatial")) != null) { p.across_spatial = bool.Parse(strVal); } if ((strVal = rp.FindValue("channel_shared")) != null) { p.channel_shared = bool.Parse(strVal); } if ((strVal = rp.FindValue("eps")) != null) { p.eps = ParseFloat(strVal); } RawProto rgScaleFiller = rp.FindChild("scale_filler"); if (rgScaleFiller != null) { p.scale_filler = FillerParameter.FromProto(rgScaleFiller); } return(p); }
/// <summary> /// Copy on parameter to another. /// </summary> /// <param name="src">Specifies the parameter to copy.</param> public override void Copy(LayerParameterBase src) { Normalization2Parameter p = (Normalization2Parameter)src; m_bAcrossSpatial = p.m_bAcrossSpatial; m_bChannelShared = p.m_bChannelShared; m_fEps = p.m_fEps; m_scaleFiller = p.m_scaleFiller.Clone(); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { if (colBottom.Count == 1 && m_colBlobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else if (colBottom.Count == 1) { // bias is a learned parameter; initialize it. BiasParameter p = m_param.bias_param; int nAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nNumAxes = p.num_axes; m_log.CHECK_GE(nNumAxes, -1, "num_axes must be non-negative, or -1 to extend to end of bottom[0]."); if (nNumAxes >= 0) { m_log.CHECK_GE(colBottom[0].num_axes, nAxis + nNumAxes, "bias blob's shape extends past bottom[0]'s shape when applied starting with bottom[0] axis = " + nAxis.ToString()); } m_colBlobs = new BlobCollection <T>(); List <int> rgBiasShape = new List <int>(); int nStart = nAxis; int nEnd = (nNumAxes == -1) ? colBottom[0].shape().Count : nStart + nNumAxes; for (int i = nStart; i < nEnd; i++) { rgBiasShape.Add(colBottom[0].shape(i)); } Blob <T> blobBias = new Blob <T>(m_cuda, m_log); blobBias.Name = m_param.name + " bias"; blobBias.type = BLOB_TYPE.INTERNAL; blobBias.type = BLOB_TYPE.WEIGHT; if (!shareParameter(blobBias, rgBiasShape)) { blobBias.Reshape(rgBiasShape); FillerParameter fp = p.filler; if (fp == null) { fp = new FillerParameter("constant", 0.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(blobBias); } m_colBlobs.Add(blobBias); } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); }
private float randomUniformValue(float fMin, float fMax) { m_blobWork.Reshape(1, 1, 1, 1); FillerParameter fp = new FillerParameter("uniform"); fp.min = fMin; fp.max = fMax; Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(m_blobWork); float[] rg = Utility.ConvertVecF <T>(m_blobWork.mutable_cpu_data); return(rg[0]); }
/// <summary> /// Re-initialize the parameters of the layer. /// </summary> /// <returns>When handled, this method returns <i>true</i>, otherwise <i>false</i>.</returns> public override bool ReInitializeParameters() { base.ReInitializeParameters(); FillerParameter fp = m_param.prelu_param.filler; if (fp == null) { fp = new FillerParameter("constant", 0.25); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(m_colBlobs[0]); return(true); }
/// <summary> /// Re-initialize the parameters of the layer. /// </summary> /// <param name="target">Specifies the weights to target (e.g. weights, bias or both).</param> /// <returns>When handled, this method returns <i>true</i>, otherwise <i>false</i>.</returns> public override bool ReInitializeParameters(WEIGHT_TARGET target) { base.ReInitializeParameters(target); if (target == WEIGHT_TARGET.BOTH || target == WEIGHT_TARGET.BIAS) { FillerParameter fp = m_param.bias_param.filler; if (fp == null) { fp = new FillerParameter("constant", 0.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(m_colBlobs[0]); } return(true); }
/// <summary> /// Re-initialize the parameters of the layer. /// </summary> /// <returns>When handled, this method returns <i>true</i>, otherwise <i>false</i>.</returns> public override bool ReInitializeParameters() { base.ReInitializeParameters(); FillerParameter fp = m_param.scale_param.filler; if (fp == null) { fp = new FillerParameter("constant", 1.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(m_colBlobs[0]); if (m_param.scale_param.bias_term) { m_biasLayer.ReInitializeParameters(); } return(true); }
public TestEx(string strName, List <int> rgBottomShape = null, int nDeviceID = TestBase.DEFAULT_DEVICE_ID) : base(strName, nDeviceID) { if (rgBottomShape == null) { rgBottomShape = new List <int>() { 2, 3, 4, 5 } } ; m_blob_bottom = new Blob <T>(m_cuda, m_log, rgBottomShape); m_blob_top = new Blob <T>(m_cuda, m_log); m_colBottom.Add(m_blob_bottom); m_colTop.Add(m_blob_top); FillerParameter fp = getFillerParam(); m_filler = Filler <T> .Create(m_cuda, m_log, fp); m_filler.Fill(m_blob_bottom); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public PositiveUnitballFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { bool bUseCuDnn = m_param.batch_norm_param.useCudnn(); m_dfMovingAverageFraction = m_param.batch_norm_param.moving_average_fraction; m_bUseGlobalStats = (m_phase == Phase.TEST || m_phase == Phase.RUN) ? true : false; if (m_param.batch_norm_param.use_global_stats.HasValue) { m_bUseGlobalStats = m_param.batch_norm_param.use_global_stats.Value; } if (colBottom[0].num_axes == 1) { m_nChannels = 1; } else { m_nChannels = colBottom[0].shape(1); } m_dfEps = m_param.batch_norm_param.eps; m_bScaleBias = m_param.batch_norm_param.scale_bias; // by default = false; if (m_param.batch_norm_param.scale_filler != null || // implicit set. m_param.batch_norm_param.bias_filler != null) { m_bScaleBias = true; } if (m_bScaleBias && !bUseCuDnn) { m_bScaleBias = false; } if (m_colBlobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else { List <int> rgSize = new List <int>(); rgSize.Add(m_nChannels); m_colBlobs.Clear(true); m_colBlobs.Add(new Blob <T>(m_cuda, m_log, rgSize, false)); // global mean m_colBlobs[0].Name = "global mean"; m_colBlobs[0].SetData(0.0); m_colBlobs.Add(new Blob <T>(m_cuda, m_log, rgSize, false)); // glboal var m_colBlobs[1].Name = "global variance"; m_colBlobs[1].SetData(0.0); m_colBlobs.Add(new Blob <T>(m_cuda, m_log, rgSize, false)); // variance correction m_colBlobs[2].Name = "var correction"; m_colBlobs[2].SetData(1.0); if (m_bScaleBias) { m_colBlobs.Add(new Blob <T>(m_cuda, m_log, rgSize)); // scale m_colBlobs[3].Name = "scale"; FillerParameter fpScale = m_param.batch_norm_param.scale_filler; if (fpScale == null) { fpScale = new FillerParameter("constant", 1.0); } Filler <T> fillerScale = Filler <T> .Create(m_cuda, m_log, fpScale); fillerScale.Fill(m_colBlobs[3]); m_colBlobs.Add(new Blob <T>(m_cuda, m_log, rgSize)); // bias m_colBlobs[4].Name = "bias"; FillerParameter fpBias = m_param.batch_norm_param.bias_filler; if (fpBias == null) { fpBias = new FillerParameter("constant", 0.0); } Filler <T> fillerBias = Filler <T> .Create(m_cuda, m_log, fpBias); fillerBias.Fill(m_colBlobs[4]); } m_nIteration = 0; } // Mask statistics from optimization by setting local learning rates // for mean, variance, and variance correction to zero. for (int i = 0; i < 3; i++) { if (m_param.parameters.Count == i) { m_param.parameters.Add(new ParamSpec(0.0, 0.0)); } else { m_param.parameters[i].lr_mult = 0; m_param.parameters[i].decay_mult = 0; } } // Set lr for scale and bias to 1 if (m_bScaleBias) { for (int i = 3; i < 5; i++) { if (m_param.parameters.Count == i) { m_param.parameters.Add(new ParamSpec(1.0, 1.0)); } else { m_param.parameters[i].lr_mult = 1; m_param.parameters[i].decay_mult = 1; } } } if (!m_param.batch_norm_param.useCudnn()) { return; } //----------------------------------- // Handle cuDNN setup //----------------------------------- // Setup the convert to half flags used by the Layer just before calling forward and backward. m_bUseHalfSize = m_param.use_halfsize; int nChannels = colBottom[0].channels; List <int> rgShape = new List <int>() { 1, nChannels, 1, 1 }; if (!m_bScaleBias) { m_blobScaleOnes.Reshape(rgShape); m_blobScaleOnes.SetData(1.0); m_blobBiasZeros.Reshape(rgShape); m_blobBiasZeros.SetData(0.0); } m_hCuDnn = m_cuda.CreateCuDNN(); m_hFwdBottomDesc = m_cuda.CreateTensorDesc(); m_hFwdTopDesc = m_cuda.CreateTensorDesc(); m_hFwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc(); m_hBwdBottomDesc = m_cuda.CreateTensorDesc(); m_hBwdTopDesc = m_cuda.CreateTensorDesc(); m_hBwdScaleBiasMeanVarDesc = m_cuda.CreateTensorDesc(); m_mode = BATCHNORM_MODE.SPATIAL; m_dfEps = Math.Min(m_dfEps, CUDNN_BN_MIN_EPSILON); m_blobMean.Reshape(rgShape); m_blobVariance.Reshape(rgShape); if (colBottom[0] == colTop[0]) // CuDNN BN does not support in-place. { m_blobPrivateTop.ReshapeLike(colTop[0]); m_blobPrivateBottom.ReshapeLike(colBottom[0]); } }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { m_log.CHECK_GE(colBottom[0].num_axes, 2, "Number of axes of bottom must be >= 2"); PReLUParameter p = m_param.prelu_param; int nChannels = colBottom[0].channels; m_bChannelShared = p.channel_shared; if (m_colBlobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else { m_colBlobs = new BlobCollection <T>(); List <int> rgSlopeShape = new List <int>(); if (!m_bChannelShared) { rgSlopeShape.Add(nChannels); } Blob <T> blobSlope = new Blob <T>(m_cuda, m_log); blobSlope.Name = m_param.name + " slope"; blobSlope.type = BLOB_TYPE.INTERNAL; if (!shareParameter(blobSlope, rgSlopeShape)) { blobSlope.Reshape(rgSlopeShape); FillerParameter fp = p.filler; if (fp == null) { fp = new FillerParameter("constant", 0.25); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(blobSlope); } m_colBlobs.Add(blobSlope); } if (m_bChannelShared) { m_log.CHECK_EQ(m_colBlobs[0].count(), 1, "Negative slope size is inconsistent with prototxt config."); } else { m_log.CHECK_EQ(m_colBlobs[0].count(), nChannels, "Negative slope size is inconsistent with prototxt config."); } // Propagate gradients to the parameters (as directed by backward pass) m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); List <int> rgShape = new List <int>() { colBottom[0].count(1) }; m_blobMultiplier.Reshape(rgShape); m_blobBackwardBuff.Reshape(rgShape); m_blobMultiplier.SetData(1.0); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public UniformFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public ConstantFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }
/// <summary> /// Fills the NetParameter with the RNN network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter tanh_param = new LayerParameter(LayerParameter.LayerType.TANH); LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(1, rgInputShapes.Count, "There should only be one input shape."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0]); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xh_x = W_xh * x + b_h { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xh")); x_transform_param.parameters.Add(new ParamSpec("b_h")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xh_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xh_x_static = W_xh_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xh_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xh_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xh_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xh_x_static_reshape"; reshape_param.bottom.Add("W_xh_x_static_preshape"); reshape_param.top.Add("W_xh_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xh_x_slice"; x_slice_param.bottom.Add("W_xh_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "o_concat"; output_concat_layer.top.Add("o"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xh_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hh_h_{t-1} := W_hh * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "W_hh_h_" + tm1s; w_param.parameters.Add(new ParamSpec("W_hh")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hh_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layers to compute // h_t := \tanh( W_hh * h_conted_t{t-1} + W_xh * x_t + b_h ) // = \tanh( W_hh_h_{t-1} + W_xh_t ) { LayerParameter h_input_sum_param = sum_param.Clone(false); h_input_sum_param.name = "h_input_sum_" + ts; h_input_sum_param.bottom.Add("W_hh_h_" + tm1s); h_input_sum_param.bottom.Add("W_xh_x_" + ts); if (m_bStaticInput) { h_input_sum_param.bottom.Add("W_xh_x_static"); } h_input_sum_param.top.Add("h_neuron_input_" + ts); net_param.layer.Add(h_input_sum_param); } { LayerParameter h_neuron_param = tanh_param.Clone(false); h_neuron_param.name = "h_neuron_input_" + ts; h_neuron_param.bottom.Add("h_neuron_input_" + ts); h_neuron_param.top.Add("h_" + ts); net_param.layer.Add(h_neuron_param); } // Add layer to compute // W_ho_h_t := W_ho * h_t + b_o { LayerParameter w_param = biased_hidden_param.Clone(false); w_param.name = "W_ho_h_" + ts; w_param.parameters.Add(new ParamSpec("W_ho")); w_param.parameters.Add(new ParamSpec("b_o")); w_param.bottom.Add("h_" + ts); w_param.top.Add("W_ho_h_" + ts); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add layer to compute // o_t := \tanh( W_ho * h_t + b_o // = \tanh( W_ho_h_t ) { LayerParameter o_neuron_param = tanh_param.Clone(false); o_neuron_param.name = "o_neuron_" + ts; o_neuron_param.bottom.Add("W_ho_h_" + ts); o_neuron_param.top.Add("o_" + ts); net_param.layer.Add(o_neuron_param); } output_concat_layer.bottom.Add("o_" + ts); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Not used.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int num_top = colTop.Count; DummyDataParameter param = m_param.dummy_data_param; int num_data_filler = param.data_filler.Count; m_log.CHECK(num_data_filler == 0 || num_data_filler == 1 || num_data_filler == num_top, "Number of data fillers must be 0, 1 or equal to the number of tops: " + num_top.ToString() + "; you specified " + num_data_filler.ToString() + " data fillers."); bool legacy_dims = (param.num.Count > 0 || param.channels.Count > 0 || param.height.Count > 0 || param.width.Count > 0) ? true : false; if (legacy_dims) { m_log.CHECK_EQ(0, param.shape.Count, "Both shape and legacy fields were specified."); // Using depreciated 4D output dim specifiers. m_log.CHECK(param.num.Count == 1 || param.num.Count == num_top, "Must specify 'num' once, or once per top blob (" + num_top.ToString() + "); specified " + param.num.Count.ToString() + "."); m_log.CHECK(param.channels.Count == 1 || param.channels.Count == num_top, "Must specify 'channels' once, or once per top blob (" + num_top.ToString() + "); specified " + param.channels.Count.ToString() + "."); m_log.CHECK(param.height.Count == 1 || param.height.Count == num_top, "Must specify 'height' once, or once per top blob (" + num_top.ToString() + "); specified " + param.height.Count.ToString() + "."); m_log.CHECK(param.width.Count == 1 || param.width.Count == num_top, "Must specify 'width' once, or once per top blob (" + num_top.ToString() + "); specified " + param.width.Count.ToString() + "."); } // refill_[i] tells Forward i whether or not to actually refill top Blob i. // if refill_[i] is false, Forward does nothing for Blob i. We use this to // avoid wastefully refilling 'constant' Blobs in every forward pass. // We first fill refill_ in with the INVERSE of its final values. // The first time we run Forward from the LayerSetup method, we'll fill only // Blobs for which refill_ is normally false. These blobs will never be // filled again. m_rgbRefill = new List <bool>(); m_rgFillers = new List <Filler <T> >(); if (num_data_filler <= 1) { FillerParameter filler_param; if (num_data_filler == 0) { filler_param = new FillerParameter("constant"); filler_param.value = 0; } else { filler_param = param.data_filler[0].Clone(); } // Refill on each iteration iff not using a constant filler, // but use the inverse of this rule for the first run. m_rgbRefill.Add((filler_param.type == "constant") ? true : false); m_rgFillers.Add(Filler <T> .Create(m_cuda, m_log, filler_param)); } else { for (int i = 0; i < num_top; i++) { m_rgFillers.Add(Filler <T> .Create(m_cuda, m_log, param.data_filler[i])); // Refill on each iteration iff not using a constant filler, // but use the inverse of this rule for the first run. m_rgbRefill.Add((param.data_filler[i].type == "constant") ? true : false); } } for (int i = 0; i < num_top; i++) { if (legacy_dims) { int num = (int)((param.num.Count == 1) ? param.num[0] : param.num[i]); int channels = (int)((param.channels.Count == 1) ? param.channels[0] : param.channels[i]); int height = (int)((param.height.Count == 1) ? param.height[0] : param.height[i]); int width = (int)((param.width.Count == 1) ? param.width[0] : param.width[i]); colTop[i].Reshape(num, channels, height, width); } else { int shape_index = (param.shape.Count == 1) ? 0 : i; colTop[i].Reshape(param.shape[shape_index]); } } // Run Forward once, with refill_ inverted, to fill the constant blobs. Forward(colBottom, colTop); // Invert the inverted refill_ values to refill the desired (non-constant) // Blobs in every usual forward pass. for (int i = 0; i < m_rgbRefill.Count; i++) { m_rgbRefill[i] = !m_rgbRefill[i]; } }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public Filler(CudaDnn <T> cuda, Log log, FillerParameter p) { m_cuda = cuda; m_log = log; m_param = p; }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public SequenceFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }
/// <summary> /// Fills the NetParameter with the LSTM network architecture. /// </summary> /// <param name="net_param"></param> protected override void FillUnrolledNet(NetParameter net_param) { uint nNumOutput = m_param.recurrent_param.num_output; m_log.CHECK_GT(nNumOutput, 0, "num_output must be positive."); FillerParameter weight_filler = m_param.recurrent_param.weight_filler; FillerParameter bias_filler = m_param.recurrent_param.bias_filler; // Add generic LayerParameter's (without bottoms/tops) of layer types we'll // use to save redundant code. LayerParameter hidden_param = new param.LayerParameter(LayerParameter.LayerType.INNERPRODUCT); hidden_param.inner_product_param.num_output = nNumOutput * 4; hidden_param.inner_product_param.bias_term = false; hidden_param.inner_product_param.axis = 2; hidden_param.inner_product_param.weight_filler = weight_filler.Clone(); LayerParameter biased_hidden_param = hidden_param.Clone(false); biased_hidden_param.inner_product_param.bias_term = true; biased_hidden_param.inner_product_param.bias_filler = bias_filler.Clone(); LayerParameter sum_param = new param.LayerParameter(LayerParameter.LayerType.ELTWISE); sum_param.eltwise_param.operation = EltwiseParameter.EltwiseOp.SUM; LayerParameter scale_param = new LayerParameter(LayerParameter.LayerType.SCALE); scale_param.scale_param.axis = 0; LayerParameter slice_param = new LayerParameter(LayerParameter.LayerType.SLICE); slice_param.slice_param.axis = 0; LayerParameter split_param = new LayerParameter(LayerParameter.LayerType.SPLIT); List <BlobShape> rgInputShapes = new List <BlobShape>(); RecurrentInputShapes(rgInputShapes); m_log.CHECK_EQ(2, rgInputShapes.Count, "There should be 2 input shapes."); //--- Add the layers --- LayerParameter input_layer_param = new LayerParameter(LayerParameter.LayerType.INPUT); input_layer_param.top.Add("c_0"); input_layer_param.input_param.shape.Add(rgInputShapes[0].Clone()); input_layer_param.top.Add("h_0"); input_layer_param.input_param.shape.Add(rgInputShapes[1].Clone()); net_param.layer.Add(input_layer_param); LayerParameter cont_slice_param = slice_param.Clone(false); cont_slice_param.name = "cont_slice"; cont_slice_param.bottom.Add("cont"); cont_slice_param.slice_param.axis = 0; net_param.layer.Add(cont_slice_param); // Add layer to transform all timesteps of x to the hidden state dimension. // W_xc_x = W_xc * x + b_c { LayerParameter x_transform_param = biased_hidden_param.Clone(false); x_transform_param.name = "x_transform"; x_transform_param.parameters.Add(new ParamSpec("W_xc")); x_transform_param.parameters.Add(new ParamSpec("b_c")); x_transform_param.bottom.Add("x"); x_transform_param.top.Add("W_xc_x"); x_transform_param.propagate_down.Add(true); net_param.layer.Add(x_transform_param); } if (m_bStaticInput) { // Add layer to transform x_static to the hidden state dimension. // W_xc_x_static = W_xc_static * x_static LayerParameter x_static_transform_param = hidden_param.Clone(false); x_static_transform_param.inner_product_param.axis = 1; x_static_transform_param.name = "W_xc_x_static"; x_static_transform_param.parameters.Add(new ParamSpec("W_xc_static")); x_static_transform_param.bottom.Add("x_static"); x_static_transform_param.top.Add("W_xc_x_static_preshape"); x_static_transform_param.propagate_down.Add(true); net_param.layer.Add(x_static_transform_param); LayerParameter reshape_param = new LayerParameter(LayerParameter.LayerType.RESHAPE); BlobShape new_shape = reshape_param.reshape_param.shape; new_shape.dim.Add(1); // One timestep. new_shape.dim.Add(-1); // Should infer m_nN as the dimension so we can reshape on batch size. new_shape.dim.Add((int)x_static_transform_param.inner_product_param.num_output); reshape_param.name = "W_xc_x_static_reshape"; reshape_param.bottom.Add("W_xc_x_static_preshape"); reshape_param.top.Add("W_xc_x_static"); net_param.layer.Add(reshape_param); } LayerParameter x_slice_param = slice_param.Clone(false); x_slice_param.name = "W_xc_x_slice"; x_slice_param.bottom.Add("W_xc_x"); net_param.layer.Add(x_slice_param); LayerParameter output_concat_layer = new LayerParameter(LayerParameter.LayerType.CONCAT); output_concat_layer.name = "h_concat"; output_concat_layer.top.Add("h"); output_concat_layer.concat_param.axis = 0; for (int t = 1; t <= m_nT; t++) { string tm1s = (t - 1).ToString(); string ts = t.ToString(); cont_slice_param.top.Add("cont_" + ts); x_slice_param.top.Add("W_xc_x_" + ts); // Add layer to flush the hidden state when beginning a new sequence, // as indicated by cont_t. // h_conted_{t-1} := cont_t * h_{t-1} // // Normally, cont_t is binary (i.e., 0 or 1), so: // h_conted_{t-1} := h_{t-1} if cont_t == 1 // 0 otherwise. { LayerParameter cont_h_param = scale_param.Clone(false); cont_h_param.group_start = true; cont_h_param.name = "h_conted_" + tm1s; cont_h_param.bottom.Add("h_" + tm1s); cont_h_param.bottom.Add("cont_" + ts); cont_h_param.top.Add("h_conted_" + tm1s); net_param.layer.Add(cont_h_param); } // Add layer to compute // W_hc_h_{t-1} := W_hc * h_conted_{t-1} { LayerParameter w_param = hidden_param.Clone(false); w_param.name = "transform_" + ts; w_param.parameters.Add(new ParamSpec("W_hc")); w_param.bottom.Add("h_conted_" + tm1s); w_param.top.Add("W_hc_h_" + tm1s); w_param.inner_product_param.axis = 2; net_param.layer.Add(w_param); } // Add the outputs of the linear transformations to compute the gate input. // get_input_t := W_hc * h_conted_{t-1} + W_xc * x_t + b_c // = W_hc_h_{t-1} + W_xc_x_t + b_c { LayerParameter input_sum_layer = sum_param.Clone(false); input_sum_layer.name = "gate_input_" + ts; input_sum_layer.bottom.Add("W_hc_h_" + tm1s); input_sum_layer.bottom.Add("W_xc_x_" + ts); if (m_bStaticInput) { input_sum_layer.bottom.Add("W_xc_x_static"); } input_sum_layer.top.Add("gate_input_" + ts); net_param.layer.Add(input_sum_layer); } // Add LSTMUnit layer to compute the cell & hidden vectors c_t and h_t. // Inputs: c_{t-1}, gate_input_t = (i_t, f_t, o_t, g_t), cont_t // Outputs: c_t, h_t // [ i_t' ] // [ f_t' ] := gate_input_t // [ o_t' ] // [ g_t' ] // i_t := \sigmoid[i_t'] // f_t := \sigmoid[f_t'] // o_t := \sigmoid[o_t'] // g_t := \tanh[g_t'] // c_t := cont_t * (f_t .* c_{t-1}) + (i_t .* g_t) // h_t := o_t .* \tanh[c_t] { LayerParameter lstm_unit_param = new LayerParameter(LayerParameter.LayerType.LSTM_UNIT); lstm_unit_param.bottom.Add("c_" + tm1s); lstm_unit_param.bottom.Add("gate_input_" + ts); lstm_unit_param.bottom.Add("cont_" + ts); lstm_unit_param.top.Add("c_" + ts); lstm_unit_param.top.Add("h_" + ts); lstm_unit_param.name = "unit_" + ts; net_param.layer.Add(lstm_unit_param); } output_concat_layer.bottom.Add("h_" + ts); } { LayerParameter c_T_copy_param = split_param.Clone(false); c_T_copy_param.bottom.Add("c_" + m_nT.ToString()); c_T_copy_param.top.Add("c_T"); net_param.layer.Add(c_T_copy_param); } net_param.layer.Add(output_concat_layer.Clone(false)); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public GaussianFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { m_randVec = new SyncedMemory <T>(m_cuda, m_log); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { ScaleParameter p = m_param.scale_param; if (colBottom.Count == 1 && blobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else if (colBottom.Count == 1) { // scale is a learned parameter; initialize it. m_nAxis = colBottom[0].CanonicalAxisIndex(p.axis); int nNumAxes = p.num_axes; m_log.CHECK_GE(nNumAxes, -1, "num_axes must be non-negative, or -1 to extend to the end of bottom[0]."); if (nNumAxes >= 0) { m_log.CHECK_GE(colBottom[0].num_axes, m_nAxis + nNumAxes, "scale blob's shape extends past bottom[0]'s shape when applied starting with bottom[0] axis = " + m_nAxis.ToString()); } m_colBlobs = new BlobCollection <T>(); List <int> rgShape = new List <int>(); int nStart = m_nAxis; int nEnd = (nNumAxes == -1) ? colBottom[0].shape().Count : nStart + nNumAxes; for (int i = nStart; i < nEnd; i++) { rgShape.Add(colBottom[0].shape(i)); } Blob <T> blobScale = new Blob <T>(m_cuda, m_log, rgShape); blobScale.Name = "scale"; FillerParameter fp = p.filler; // Default to unit (1) filler for identity operation. if (fp == null) { fp = new FillerParameter("constant", 1.0); } Filler <T> filler = Filler <T> .Create(m_cuda, m_log, fp); filler.Fill(blobScale); m_colBlobs.Add(blobScale); } if (p.bias_term) { LayerParameter pb = new LayerParameter(LayerParameter.LayerType.BIAS); pb.bias_param.axis = p.axis; pb.bias_param.num_axes = (colBottom.Count > 1) ? colBottom[1].num_axes : p.num_axes; pb.bias_param.filler = p.bias_filler; m_colBiasBottomVec = new BlobCollection <T>(); m_colBiasBottomVec.Add(colBottom[0]); m_biasLayer = new BiasLayer <T>(m_cuda, m_log, pb); m_biasLayer.Setup(m_colBiasBottomVec, colTop); m_nBiasParamId = m_colBlobs.Count; m_colBlobs.Add(m_biasLayer.blobs[0]); m_rgbBiasPropagateDown = Utility.Create <bool>(1, false); } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count(), true); }
/// <summary> /// Setup the layer. /// </summary> /// <param name="colBottom">Specifies the collection of bottom (input) Blobs.</param> /// <param name="colTop">Specifies the collection of top (output) Blobs.</param> public override void LayerSetUp(BlobCollection <T> colBottom, BlobCollection <T> colTop) { int nNumOutput = (int)m_param.inner_product_param.num_output; m_bBiasTerm = m_param.inner_product_param.bias_term; m_bTranspose = m_param.inner_product_param.transpose; m_bEnableNoise = m_param.inner_product_param.enable_noise; m_dfSigmaInit = m_param.inner_product_param.sigma_init; m_nN = nNumOutput; List <int> rgShape = colBottom[0].shape(); int nShapeCount = rgShape.Count; for (int i = nShapeCount; i <= m_param.inner_product_param.axis; i++) { rgShape.Add(1); } if (nShapeCount != rgShape.Count) { colBottom[0].Reshape(rgShape); } int nAxis = colBottom[0].CanonicalAxisIndex(m_param.inner_product_param.axis); // Dimensions starting from 'axis' are 'flattened' into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are preformed.. m_nK = colBottom[0].count(nAxis); // Check if we need to set up the weights. if (m_colBlobs.Count > 0) { m_log.WriteLine("Skipping parameter initialization."); } else { // Initialize the weight. List <int> rgWeightShape = Utility.Create <int>(2, 0); if (m_bTranspose) { rgWeightShape[0] = m_nK; rgWeightShape[1] = m_nN; } else { rgWeightShape[0] = m_nN; rgWeightShape[1] = m_nK; } double dfNoiseRange = 1.0 / Math.Sqrt(rgWeightShape[1]); Blob <T> blobWeight = new Blob <T>(m_cuda, m_log); blobWeight.Name = m_param.name + " weights"; blobWeight.type = BLOB_TYPE.IP_WEIGHT; if (!shareParameter(blobWeight, rgWeightShape)) { blobWeight.Reshape(rgWeightShape); Filler <T> weight_filler = Filler <T> .Create(m_cuda, m_log, m_param.inner_product_param.weight_filler); weight_filler.Fill(blobWeight); if (m_bEnableNoise) { blobWeight.scale_data(dfNoiseRange); } } m_colBlobs.Add(blobWeight); // If necessary, initialize and fill the bias term. if (m_bBiasTerm) { List <int> rgBiasShape = Utility.Create <int>(1, 0); rgBiasShape[0] = m_nN; Blob <T> blobBias = new Blob <T>(m_cuda, m_log); blobBias.Name = m_param.name + " bias"; blobBias.type = BLOB_TYPE.IP_WEIGHT; if (!shareParameter(blobBias, rgBiasShape)) { blobBias.Reshape(rgBiasShape); Filler <T> bias_filler = Filler <T> .Create(m_cuda, m_log, m_param.inner_product_param.bias_filler); bias_filler.Fill(blobBias); if (m_bEnableNoise) { blobBias.scale_data(dfNoiseRange); } } m_colBlobs.Add(blobBias); } // Add Noise sigma weight and bias if (m_bEnableNoise) { FillerParameter fp = new FillerParameter("uniform"); fp.min = -1; fp.max = 1; m_fillerEpsilon = Filler <T> .Create(m_cuda, m_log, fp); Blob <T> blobSigmaWeight = new Blob <T>(m_cuda, m_log); blobSigmaWeight.Name = m_param.name + " sigma_wt"; blobSigmaWeight.type = BLOB_TYPE.WEIGHT; blobSigmaWeight.ReshapeLike(m_colBlobs[0]); blobSigmaWeight.SetData(m_dfSigmaInit / Math.Sqrt(blobSigmaWeight.shape(1))); m_colBlobs.Add(blobSigmaWeight); m_blobEpsilonWeight.ReshapeLike(blobSigmaWeight); if (m_bBiasTerm) { Blob <T> blobSigmaBias = new Blob <T>(m_cuda, m_log); blobSigmaBias.Name = m_param.name + " sigma_bias"; blobSigmaBias.type = BLOB_TYPE.WEIGHT; blobSigmaBias.ReshapeLike(m_colBlobs[1]); blobSigmaBias.SetData(m_dfSigmaInit / Math.Sqrt(blobSigmaBias.shape(0))); m_colBlobs.Add(blobSigmaBias); m_blobEpsilonBias.ReshapeLike(blobSigmaBias); } ResetNoise(); } } m_rgbParamPropagateDown = new DictionaryMap <bool>(m_colBlobs.Count, true); }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public XavierFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }
/// <summary> /// Constructor. /// </summary> /// <param name="cuda">Instance of CudaDnn - connection to cuda.</param> /// <param name="log">Log used for output.</param> /// <param name="p">Filler parameter that defines the filler settings.</param> public BilinearFiller(CudaDnn <T> cuda, Log log, FillerParameter p) : base(cuda, log, p) { }