public static void CheckLayer(ILayer layer, int fanInWidth, int fanInHeight, int fanInDepth,
                                      int batchSize, float epsilon, Random random)
        {
            var accuracyCondition = 1e-2;

            layer.Initialize(fanInWidth, fanInHeight, fanInDepth, batchSize,
                             Initialization.GlorotUniform, random);

            var fanIn  = fanInWidth * fanInHeight * fanInDepth;
            var fanOut = layer.Width * layer.Height * layer.Depth;

            // Forward pass - set input activation in layer
            var input = Matrix <float> .Build.Random(batchSize, fanIn, random.Next());

            layer.Forward(input);

            // Set delta to 1
            var delta = Matrix <float> .Build.Dense(batchSize, fanOut, 1.0f);

            // Backward pass to calculate gradients
            layer.Backward(delta);

            // get weights and gradients
            var parametersAndGradients = new List <ParametersAndGradients>();

            layer.AddParameresAndGradients(parametersAndGradients);

            foreach (var parameterAndGradient in parametersAndGradients)
            {
                var gradients  = parameterAndGradient.Gradients;
                var parameters = parameterAndGradient.Parameters;

                var output1 = Matrix <float> .Build.Dense(batchSize, fanOut);

                var output2 = Matrix <float> .Build.Dense(batchSize, fanOut);

                for (int i = 0; i < parameters.Length; i++)
                {
                    output1.Clear();
                    output2.Clear();

                    var oldValue = parameters[i];

                    parameters[i] = oldValue + epsilon;
                    layer.Forward(input).CopyTo(output1);
                    parameters[i] = oldValue - epsilon;
                    layer.Forward(input).CopyTo(output2);

                    parameters[i] = oldValue;

                    output1.Subtract(output2, output1); // output1 = output1 - output2

                    var grad     = output1.ToRowMajorArray().Select(f => f / (2.0f * epsilon));
                    var gradient = grad.Sum(); // approximated gradient
                    var actual   = gradients[i];

                    Assert.AreEqual(gradient, actual, accuracyCondition);
                }
            }
        }
Ejemplo n.º 2
0
        /// <summary>
        /// 损失计算
        /// </summary>
        /// <param name="x"></param>
        /// <param name="t"></param>
        /// <returns></returns>
        public double Loss(Matrix x, Matrix t)
        {
            Matrix y    = Predict(x);
            double loss = softmaxWithLoss.Forward(y, t)[0, 0];

            LossUpdated?.Invoke(loss);

            return(loss);
        }
Ejemplo n.º 3
0
        public void Train(Vol x, object y)
        {
            net.Forward(x, true);
            var cost_loss = net.Backward(y);
            //Debug.Log ("loss:" + cost_loss);

            var l2_decay_loss = 0.0f;
            var l1_decay_loss = 0.0f;

            List <ParamsAndGrads> pglist = new List <ParamsAndGrads> ();

            net.GetParamsAndGrads(pglist);

            for (var i = 0; i < pglist.Count; i++)
            {
                var pg = pglist[i];
                var p  = pg.param;
                var g  = pg.grads;

                var l2_decay_mul = pg.l2_decay_mul;
                var l1_decay_mul = pg.l1_decay_mul;
                var l2_decay     = this.l2_decay * l2_decay_mul;
                var l1_decay     = this.l1_decay * l1_decay_mul;

                var plen = p.Length;
                for (var j = 0; j < plen; j++)
                {
                    l2_decay_loss += l2_decay * p[j] * p[j] / 2;
                    l1_decay_loss += l1_decay * Math.Abs(p[j]);
                    var l1grad = l1_decay * (p[j] > 0 ? 1 : -1);
                    var l2grad = l2_decay * (p[j]);
                    var gij    = (l2grad + l1grad + g [j]);
                    //p[j] +=  - this.learning_rate * gij;
                    p[j] += -this.learning_rate * g[j];

                    // 記得要歸0
                    g[j] = 0.0f;
                }
            }
        }