public (FloatArray dhprev, FloatArray dcprev) Backward(FloatArray dh, FloatArray dcnext, FloatArray cprev, FloatArray fGate, FloatArray iGate, FloatArray cGate, FloatArray cellState, FloatArray oGate)
        {
            // Set outputs
            F.Output = fGate;
            I.Output = iGate;
            C.Output = cGate;
            O.Output = oGate;

            // cell State Derivate
            var cellStateAct = cellState.Tanh();
            var cellStateDer = (1 - cellStateAct * cellStateAct);

            // cell state gradient
            var dcellSate = dcnext.Clone();

            dcellSate += dh * oGate * cellStateDer;

            // output gate gradient
            var doGate = O.BackWard(dh * cellStateAct);

            // cell gate gradient
            var dcGate = C.BackWard(dcellSate * iGate);

            // input gate gradient
            var diGate = I.BackWard(dcellSate * cGate);

            // forget gate gradient
            var dfGate = F.BackWard(dcellSate * cprev);

            // lstm next
            var dz = (F.Weights * dfGate).SumLine() + (I.Weights * diGate).SumLine() + (C.Weights * dcGate).SumLine() + (O.Weights * doGate).SumLine();

            //  dhPrev, cprev
            return(dz, fGate *dcellSate);
        }
Beispiel #2
0
                FloatArray hs, FloatArray cs) BPTT(Array <FloatArray> inputs, FloatArray error, FloatArray hprev, FloatArray cprev)
        {
            // store states
            var z_s   = new Array <FloatArray>(inputs.Length);
            var f_s   = new Array <FloatArray>(inputs.Length);
            var i_s   = new Array <FloatArray>(inputs.Length);
            var c_s_s = new Array <FloatArray>(inputs.Length);
            var c_s   = new Array <FloatArray>(inputs.Length);
            var o_s   = new Array <FloatArray>(inputs.Length);
            var h_s   = new Array <FloatArray>(inputs.Length);

            // init timing
            h_s[-1] = hprev.Clone();
            c_s[-1] = cprev.Clone();

            // forward
            for (var t = 0; t < inputs.Length; t++)
            {
                (z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t]) =
                    FeedForward(inputs[t], h_s[t - 1], c_s[t - 1]);
            }

            // gradients
            var dWf = NumMath.Array(Wf.W, Wf.H);
            var dWi = NumMath.Array(Wi.W, Wi.H);
            var dWc = NumMath.Array(Wc.W, Wc.H);
            var dWo = NumMath.Array(Wo.W, Wo.H);
            var dBf = NumMath.Array(Bf.Length);
            var dBi = NumMath.Array(Bi.Length);
            var dBc = NumMath.Array(Bc.Length);
            var dBo = NumMath.Array(Bo.Length);

            var dhnext = error;
            var dcnext = NumMath.Array(hidden_size);

            // backward
            for (var t = inputs.Length - 1; t >= 0; t--)
            {
                (dhnext, dcnext) = Backward(dhnext, dcnext, c_s[t - 1],
                                            z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t],
                                            ref dWf, ref dWi, ref dWc, ref dWo, ref dBf, ref dBi, ref dBc, ref dBo);
            }

            dWf = NumMath.Normalize(-5, 5, dWf / inputs.Length);
            dWi = NumMath.Normalize(-5, 5, dWi / inputs.Length);
            dWc = NumMath.Normalize(-5, 5, dWc / inputs.Length);
            dWo = NumMath.Normalize(-5, 5, dWo / inputs.Length);
            dBf = NumMath.Normalize(-5, 5, dBf / inputs.Length);
            dBi = NumMath.Normalize(-5, 5, dBi / inputs.Length);
            dBc = NumMath.Normalize(-5, 5, dBc / inputs.Length);
            dBo = NumMath.Normalize(-5, 5, dBo / inputs.Length);

            return(dWf, dWi, dWc, dWo, dBf, dBi, dBc, dBo, h_s[inputs.Length - 1], c_s[inputs.Length - 1]);
Beispiel #3
0
        Backward(int target, FloatArray dhnext, FloatArray dcnext, FloatArray cprev,
                 FloatArray z, FloatArray f, FloatArray i, FloatArray cbar, FloatArray c,
                 FloatArray o, FloatArray h, FloatArray v, FloatArray y,
                 ref FloatArray2D dWf, ref FloatArray2D dWi, ref FloatArray2D dWc, ref FloatArray2D dWo,
                 ref FloatArray2D dWv, ref FloatArray dBf, ref FloatArray dBi,
                 ref FloatArray dBc, ref FloatArray dBo, ref FloatArray dBv)
        {
            // output gradient
            var dv = y.Clone();

            dv[target] -= 1;
            dWv        += (h.T * dv);
            dBv        += dv;
            // output gate gradient
            var dh = (Wv * dv).SumColumn() + dhnext;
            var DO = Dsigmoid(o) * (dh * Tanh(c));

            dWo += (DO * z.T);
            dBo += DO;
            // cell gate gradient
            var dc = dcnext.Clone();

            dc += dh * o * Dtanh(Tanh(c));
            var dcbar = Dtanh(cbar) * (dc * i);

            dWc += dcbar * z.T;
            dBc += dcbar;
            // input gate gradient
            var di = Dsigmoid(i) * (dc * cbar);

            dWi += (di * z.T);
            dBi += di;
            // forget gate gradient
            var df = Dsigmoid(f) * (dc * cprev);

            dWf += (df * z.T);
            dBf += df;
            // lstm next
            var dz = (Wf * df).SumColumn() + (Wi * di).SumColumn() + (Wc * dcbar).SumColumn() + (Wo * DO).SumColumn();

            //  dhPrev, cprev
            return(dz, f *dc);
        }
Beispiel #4
0
                FloatArray hs, FloatArray cs) BPTT(int[] inputs, int[] targets, FloatArray hprev, FloatArray cprev)
        {
            // store states
            var x_s   = new Array <FloatArray>(inputs.Length);
            var z_s   = new Array <FloatArray>(inputs.Length);
            var f_s   = new Array <FloatArray>(inputs.Length);
            var i_s   = new Array <FloatArray>(inputs.Length);
            var c_s_s = new Array <FloatArray>(inputs.Length);
            var c_s   = new Array <FloatArray>(inputs.Length);
            var o_s   = new Array <FloatArray>(inputs.Length);
            var h_s   = new Array <FloatArray>(inputs.Length);
            var v_s   = new Array <FloatArray>(inputs.Length);
            var y_s   = new Array <FloatArray>(inputs.Length);
            var t_g   = new Array <FloatArray>(inputs.Length);

            // loss
            var loss = 0d;

            // init timing
            h_s[-1] = hprev.Clone();
            c_s[-1] = cprev.Clone();

            // forward
            for (var t = 0; t < inputs.Length; t++)
            {
                x_s[t]            = new FloatArray(input_size);
                x_s[t][inputs[t]] = 1;

                t_g[t]             = new FloatArray(output_size);
                t_g[t][targets[t]] = 1;

                (z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t], v_s[t], y_s[t]) =
                    FeedForward(x_s[t], h_s[t - 1], c_s[t - 1]);

                loss += -(t_g[t] * y_s[t].Log()).Sum();
            }

            // gradients
            var dWf = NumMath.Array(Wf.W, Wf.H);
            var dWi = NumMath.Array(Wi.W, Wi.H);
            var dWc = NumMath.Array(Wc.W, Wc.H);
            var dWo = NumMath.Array(Wo.W, Wo.H);
            var dWv = NumMath.Array(Wv.W, Wv.H);
            var dBf = NumMath.Array(Bf.Length);
            var dBi = NumMath.Array(Bi.Length);
            var dBc = NumMath.Array(Bc.Length);
            var dBo = NumMath.Array(Bo.Length);
            var dBv = NumMath.Array(Bv.Length);

            var dhnext = NumMath.Array(hidden_size);
            var dcnext = NumMath.Array(hidden_size);

            // backward
            for (var t = inputs.Length - 1; t >= 0; t--)
            {
                (dhnext, dcnext) = Backward(targets[t], dhnext, dcnext, c_s[t - 1],
                                            z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t], v_s[t], y_s[t],
                                            ref dWf, ref dWi, ref dWc, ref dWo, ref dWv, ref dBf, ref dBi, ref dBc, ref dBo, ref dBv);
            }

            dWf = NumMath.Normalize(-5, 5, dWf / inputs.Length);
            dWi = NumMath.Normalize(-5, 5, dWi / inputs.Length);
            dWc = NumMath.Normalize(-5, 5, dWc / inputs.Length);
            dWo = NumMath.Normalize(-5, 5, dWo / inputs.Length);
            dWv = NumMath.Normalize(-5, 5, dWv / inputs.Length);
            dBf = NumMath.Normalize(-5, 5, dBf / inputs.Length);
            dBi = NumMath.Normalize(-5, 5, dBi / inputs.Length);
            dBc = NumMath.Normalize(-5, 5, dBc / inputs.Length);
            dBo = NumMath.Normalize(-5, 5, dBo / inputs.Length);
            dBv = NumMath.Normalize(-5, 5, dBv / inputs.Length);

            return(loss, dWf, dWi, dWc, dWo, dWv, dBf, dBi, dBc, dBo, dBv, h_s[inputs.Length - 1], c_s[inputs.Length - 1]);