コード例 #1
0
ファイル: GradientChecker.cs プロジェクト: yuduanopen/MyCaffe
        /// <summary>
        /// Checks the gradient of a single output with respect to particular input
        /// blob(s).  If check_bottom = i >= 0, check only the ith bottom Blob<T>.
        /// If check_bottom == -1, check everything -- all bottom Blobs and all
        /// param Blobs.  Otherwise (if check_bottom less than -1), check only param Blobs.
        /// </summary>
        public void CheckGradientSingle(Layer <T> layer, BlobCollection <T> colBottom, BlobCollection <T> colTop, int nCheckBottom, int nTopID, int nTopDataID, bool bElementwise = false)
        {
            if (bElementwise)
            {
                m_log.CHECK_EQ(0, layer.blobs.Count(), "Cannot have blobs in the layer checked for element-wise checking.");
                m_log.CHECK_LE(0, nTopID, "The top ID '" + nTopID.ToString() + "' must be zero or greater with element-wise checking.");
                m_log.CHECK_LE(0, nTopDataID, "The top data ID '" + nTopDataID.ToString() + "' must be zero or greater with element-wise checking.");

                int nTopCount = colTop[nTopID].count();

                for (int nBlobID = 0; nBlobID < colBottom.Count(); nBlobID++)
                {
                    m_log.CHECK_EQ(nTopCount, colBottom[nBlobID].count(), "The top count and blob counts must be equal for element-wise checking.");
                }
            }

            // First, figure out what blobs we need to check against, and zero init
            // parameter blobs.
            BlobCollection <T> colBlobsToCheck = new BlobCollection <T>();
            List <bool>        rgPropagateDown = new List <bool>();

            for (int i = 0; i < colBottom.Count; i++)
            {
                rgPropagateDown.Add((nCheckBottom == -1) ? true : false);
            }

            for (int i = 0; i < layer.blobs.Count; i++)
            {
                Blob <T> blob = layer.blobs[i];

                blob.SetDiff(0);
                colBlobsToCheck.Add(blob);
            }

            if (nCheckBottom == -1)
            {
                for (int i = 0; i < colBottom.Count; i++)
                {
                    colBlobsToCheck.Add(colBottom[i]);
                }
            }
            else if (nCheckBottom >= 0)
            {
                m_log.CHECK_LT(nCheckBottom, colBottom.Count, "The check bottom value '" + nCheckBottom.ToString() + "' must be less than the number of bottom blobs.");
                colBlobsToCheck.Add(colBottom[nCheckBottom]);
                rgPropagateDown[nCheckBottom] = true;
            }

            m_log.CHECK_GT(colBlobsToCheck.Count, 0, "No blobs to check!");

            // Compute the gradient analytically using Backward.
            m_cuda.rng_setseed(m_uiSeed);

            // Ignore the loss from the layer (it's just the weighted sum of the losses
            // from the top blobs, whose gradients we may want to test individually).
            layer.Forward(colBottom, colTop);

            // Get additional loss from the objective.
            GetObjAndGradient(layer, colTop, nTopID, nTopDataID);
            layer.Backward(colTop, rgPropagateDown, colBottom);

            // Store computed gradients for all checked blobs
            BlobCollection <T> colComputedGradientBlobs = new BlobCollection <T>();

            for (int nBlobID = 0; nBlobID < colBlobsToCheck.Count; nBlobID++)
            {
                Blob <T> current_blob = colBlobsToCheck[nBlobID];
                Blob <T> new_blob     = new Blob <T>(m_cuda, m_log);

                if (current_blob.DiffExists)
                {
                    new_blob.ReshapeLike(current_blob);
                    m_cuda.copy(current_blob.count(), current_blob.gpu_diff, new_blob.mutable_gpu_data);
                }

                colComputedGradientBlobs.Add(new_blob);
            }

            // Compute derivative of top w.r.t. each bottom and parameter input using
            // finite differencing.

            for (int nBlobID = 0; nBlobID < colBlobsToCheck.Count; nBlobID++)
            {
                Blob <T> current_blob = colBlobsToCheck[nBlobID];

                if (!current_blob.DiffExists)
                {
                    continue;
                }

                T[]    rgdfComputedGradients = colComputedGradientBlobs[nBlobID].update_cpu_data();
                double dfData;

                for (int nFeatID = 0; nFeatID < current_blob.count(); nFeatID++)
                {
                    if (m_evtCancel.WaitOne(0))
                    {
                        throw new Exception("Aborted!");
                    }

                    // For an element-wise layer, we only need to do finite differencing to
                    // compute the derivative of top[nTopID][nTopDataID] w.r.t.
                    // bottom[nBlobID][i] only for i == nTopDataID.  For any otehr
                    // i != nTopDataID, we know the derivative is 0 by definition, and simply
                    // check that that's true.
                    double dfEstimateGradient  = 0;
                    double dfPositiveObjective = 0;
                    double dfNegativeObjective = 0;

                    if (!bElementwise || (nFeatID == nTopDataID))
                    {
                        // Do finite differencing.
                        // Compute loss with stepwise added to input.
                        dfData  = (double)Convert.ChangeType(current_blob.GetData(nFeatID), typeof(double));
                        dfData += m_dfStepsize;
                        current_blob.SetData(dfData, nFeatID);
                        m_cuda.rng_setseed(m_uiSeed);

                        layer.Forward(colBottom, colTop);
                        dfPositiveObjective = GetObjAndGradient(layer, colTop, nTopID, nTopDataID);

                        // Compute loss with stepsize subtracted from input.
                        dfData  = (double)Convert.ChangeType(current_blob.GetData(nFeatID), typeof(double));
                        dfData -= (m_dfStepsize * 2);
                        current_blob.SetData(dfData, nFeatID);
                        m_cuda.rng_setseed(m_uiSeed);

                        layer.Forward(colBottom, colTop);
                        dfNegativeObjective = GetObjAndGradient(layer, colTop, nTopID, nTopDataID);

                        // Recover original input value.
                        dfData  = (double)Convert.ChangeType(current_blob.GetData(nFeatID), typeof(double));
                        dfData += m_dfStepsize;
                        current_blob.SetData(dfData, nFeatID);

                        dfEstimateGradient = (dfPositiveObjective - dfNegativeObjective) / m_dfStepsize / 2.0;
                    }

                    double dfComputedGradient = (double)Convert.ChangeType(rgdfComputedGradients[nFeatID], typeof(double));
                    double dfFeature          = (double)Convert.ChangeType(current_blob.GetData(nFeatID), typeof(double));

                    if (m_dfKink - m_dfKinkRange > Math.Abs(dfFeature) ||
                        Math.Abs(dfFeature) > m_dfKink + m_dfKinkRange)
                    {
                        // We check the relative accuracy, but for too small values, we threshold
                        // the scale factor by 1.
                        double dfScale = Math.Max(Math.Max(Math.Abs(dfComputedGradient), Math.Abs(dfEstimateGradient)), 1.0);

                        m_log.EXPECT_NEAR(dfComputedGradient, dfEstimateGradient, m_dfThreshold * dfScale, "DEBUG: (nTopID, nTopDataID, nBlobID, nFeatID)=" + nTopID.ToString() + ", " + nTopDataID.ToString() + ", " + nBlobID.ToString() + ", " + nFeatID.ToString() + "; feat = " + dfFeature.ToString() + "; objective+ = " + dfPositiveObjective.ToString() + "; objective- = " + dfNegativeObjective.ToString());
                    }
                }
            }
        }
コード例 #2
0
ファイル: RecurrentLayer.cs プロジェクト: maplewei/MyCaffe
        private void layerSetUpCaffe(BlobCollection <T> colBottom, BlobCollection <T> colTop)
        {
            // Get (recurrent) input/output names.
            List <string> rgOutputNames = new List <string>();

            OutputBlobNames(rgOutputNames);

            List <string> rgRecurInputNames = new List <string>();

            RecurrentInputBlobNames(rgRecurInputNames);

            List <string> rgRecurOutputNames = new List <string>();

            RecurrentOutputBlobNames(rgRecurOutputNames);

            int nNumRecurBlobs = rgRecurInputNames.Count;

            m_log.CHECK_EQ(nNumRecurBlobs, rgRecurOutputNames.Count, "The number of recurrent input names must equal the number of recurrent output names.");

            // If provided, bottom[2] is a static input to the recurrent net.
            int nNumHiddenExposed = (m_bExposeHidden) ? nNumRecurBlobs : 0;

            m_bStaticInput = (colBottom.Count > 2 + nNumHiddenExposed) ? true : false;

            if (m_bStaticInput)
            {
                m_log.CHECK_GE(colBottom[2].num_axes, 1, "When static input is present, the bottom[2].num_axes must be >= 1");
                m_log.CHECK_EQ(m_nN, colBottom[2].shape(0), "When static input is present, the bottom[2].shape(0) must = N which is " + m_nN.ToString());
            }

            // Create a NetParameter; setup the inputs that aren't unique to particular
            // recurrent architectures.
            NetParameter net_param = new NetParameter();

            LayerParameter input_layer = new LayerParameter(LayerParameter.LayerType.INPUT);

            input_layer.top.Add("x");
            BlobShape input_shape1 = new param.BlobShape();

            for (int i = 0; i < colBottom[0].num_axes; i++)
            {
                input_shape1.dim.Add(colBottom[0].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape1);

            input_layer.top.Add("cont");
            BlobShape input_shape2 = new param.BlobShape();

            for (int i = 0; i < colBottom[1].num_axes; i++)
            {
                input_shape2.dim.Add(colBottom[1].shape(i));
            }
            input_layer.input_param.shape.Add(input_shape2);

            if (m_bStaticInput)
            {
                input_layer.top.Add("x_static");
                BlobShape input_shape3 = new BlobShape();
                for (int i = 0; i < colBottom[2].num_axes; i++)
                {
                    input_shape3.dim.Add(colBottom[2].shape(i));
                }
                input_layer.input_param.shape.Add(input_shape3);
            }

            net_param.layer.Add(input_layer);

            // Call the child's FillUnrolledNet implementation to specify the unrolled
            // recurrent architecture.
            FillUnrolledNet(net_param);

            // Prepend this layer's name to the names of each layer in the unrolled net.
            string strLayerName = m_param.name;

            if (strLayerName.Length > 0)
            {
                for (int i = 0; i < net_param.layer.Count; i++)
                {
                    LayerParameter layer = net_param.layer[i];
                    layer.name = strLayerName + "_" + layer.name;
                }
            }

            // Add 'pseudo-losses' to all outputs to force backpropagation.
            // (Setting force_backward is too agressive as we may not need to backprop to
            // all inputs, e.g., the sequence continuation indicators.)
            List <string> rgPseudoLosses = new List <string>();

            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                rgPseudoLosses.Add(rgOutputNames[i] + "_pseudoloss");
                LayerParameter layer = new LayerParameter(LayerParameter.LayerType.REDUCTION, rgPseudoLosses[i]);
                layer.bottom.Add(rgOutputNames[i]);
                layer.top.Add(rgPseudoLosses[i]);
                layer.loss_weight.Add(1.0);
                net_param.layer.Add(layer);
            }

            // Create the unrolled net.
            Net <T> sharedNet = null;

            if (m_param is LayerParameterEx <T> )
            {
                RecurrentLayer <T> sharedLayer = ((LayerParameterEx <T>)m_param).SharedLayer as RecurrentLayer <T>;
                if (sharedLayer != null)
                {
                    sharedNet = sharedLayer.m_unrolledNet;
                }
            }

            m_unrolledNet = new Net <T>(m_cuda, m_log, net_param, m_evtCancel, null, m_phase, null, sharedNet);
            m_unrolledNet.set_debug_info(m_param.recurrent_param.debug_info);

            // Setup pointers to the inputs.
            m_blobXInputBlob    = m_unrolledNet.blob_by_name("x");
            m_blobContInputBlob = m_unrolledNet.blob_by_name("cont");

            if (m_bStaticInput)
            {
                m_blobXStaticInputBlob = m_unrolledNet.blob_by_name("x_static");
            }

            // Setup pointers to paired recurrent inputs/outputs.
            m_colRecurInputBlobs  = new common.BlobCollection <T>();
            m_colRecurOutputBlobs = new common.BlobCollection <T>();

            for (int i = 0; i < nNumRecurBlobs; i++)
            {
                m_colRecurInputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurInputNames[i]));
                m_colRecurOutputBlobs.Add(m_unrolledNet.blob_by_name(rgRecurOutputNames[i]));
            }

            // Setup pointers to outputs.
            m_log.CHECK_EQ(colTop.Count() - nNumHiddenExposed, rgOutputNames.Count, "OutputBlobNames must provide output blob name for each top.");
            m_colOutputBlobs = new common.BlobCollection <T>();
            for (int i = 0; i < rgOutputNames.Count; i++)
            {
                m_colOutputBlobs.Add(m_unrolledNet.blob_by_name(rgOutputNames[i]));
            }

            // We should have 2 inputs (x and cont), plus a number of recurrent inputs,
            // plus maybe a static input.
            int nStaticInput = (m_bStaticInput) ? 1 : 0;

            m_log.CHECK_EQ(2 + nNumRecurBlobs + nStaticInput, m_unrolledNet.input_blobs.Count, "The unrolled net input count should equal 2 + number of recurrent blobs (" + nNumRecurBlobs.ToString() + ") + static inputs (" + nStaticInput.ToString() + ")");

            // This layer's parameters are any parameters in the layers of the unrolled
            // net.  We only want one copy of each parameter, so check that the parameter
            // is 'owned' by the layer, rather than shared with another.
            blobs.Clear();
            for (int i = 0; i < m_unrolledNet.parameters.Count; i++)
            {
                if (m_unrolledNet.param_owners[i] == -1)
                {
                    m_log.WriteLine("Adding parameter " + i.ToString() + ": " + m_unrolledNet.param_display_names[i]);
                    blobs.Add(m_unrolledNet.parameters[i]);
                }
            }

            // Check that param_propagate_down is set for all of the parameters in the
            // unrolled net; set param_propagate_down to true in this layer.
            for (int i = 0; i < m_unrolledNet.layers.Count; i++)
            {
                for (int j = 0; j < m_unrolledNet.layers[i].blobs.Count; j++)
                {
                    m_log.CHECK(m_unrolledNet.layers[i].param_propagate_down(j), "param_propagate_down not set for layer " + i.ToString() + ", param " + j.ToString());
                }
            }
            m_rgbParamPropagateDown = new DictionaryMap <bool>(blobs.Count, true);

            // Set the diffs of recurrent outputs to 0 -- we can't backpropagate across
            // batches.
            for (int i = 0; i < m_colRecurOutputBlobs.Count; i++)
            {
                m_colRecurOutputBlobs[i].SetDiff(0);
            }

            // Check that the last output_names.count layers are the pseudo-losses;
            // set last_layer_index so that we don't actually run these layers.
            List <string> rgLayerNames = m_unrolledNet.layer_names;

            m_nLastLayerIndex = rgLayerNames.Count - 1 - rgPseudoLosses.Count;
            for (int i = m_nLastLayerIndex + 1, j = 0; i < rgLayerNames.Count; i++, j++)
            {
                m_log.CHECK(rgLayerNames[i] == rgPseudoLosses[j], "The last layer at idx " + i.ToString() + " should be the pseudo layer named " + rgPseudoLosses[j]);
            }
        }