public (FloatArray dhprev, FloatArray dcprev) Backward(FloatArray dh, FloatArray dcnext, FloatArray cprev, FloatArray fGate, FloatArray iGate, FloatArray cGate, FloatArray cellState, FloatArray oGate) { // Set outputs F.Output = fGate; I.Output = iGate; C.Output = cGate; O.Output = oGate; // cell State Derivate var cellStateAct = cellState.Tanh(); var cellStateDer = (1 - cellStateAct * cellStateAct); // cell state gradient var dcellSate = dcnext.Clone(); dcellSate += dh * oGate * cellStateDer; // output gate gradient var doGate = O.BackWard(dh * cellStateAct); // cell gate gradient var dcGate = C.BackWard(dcellSate * iGate); // input gate gradient var diGate = I.BackWard(dcellSate * cGate); // forget gate gradient var dfGate = F.BackWard(dcellSate * cprev); // lstm next var dz = (F.Weights * dfGate).SumLine() + (I.Weights * diGate).SumLine() + (C.Weights * dcGate).SumLine() + (O.Weights * doGate).SumLine(); // dhPrev, cprev return(dz, fGate *dcellSate); }
FloatArray hs, FloatArray cs) BPTT(Array <FloatArray> inputs, FloatArray error, FloatArray hprev, FloatArray cprev) { // store states var z_s = new Array <FloatArray>(inputs.Length); var f_s = new Array <FloatArray>(inputs.Length); var i_s = new Array <FloatArray>(inputs.Length); var c_s_s = new Array <FloatArray>(inputs.Length); var c_s = new Array <FloatArray>(inputs.Length); var o_s = new Array <FloatArray>(inputs.Length); var h_s = new Array <FloatArray>(inputs.Length); // init timing h_s[-1] = hprev.Clone(); c_s[-1] = cprev.Clone(); // forward for (var t = 0; t < inputs.Length; t++) { (z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t]) = FeedForward(inputs[t], h_s[t - 1], c_s[t - 1]); } // gradients var dWf = NumMath.Array(Wf.W, Wf.H); var dWi = NumMath.Array(Wi.W, Wi.H); var dWc = NumMath.Array(Wc.W, Wc.H); var dWo = NumMath.Array(Wo.W, Wo.H); var dBf = NumMath.Array(Bf.Length); var dBi = NumMath.Array(Bi.Length); var dBc = NumMath.Array(Bc.Length); var dBo = NumMath.Array(Bo.Length); var dhnext = error; var dcnext = NumMath.Array(hidden_size); // backward for (var t = inputs.Length - 1; t >= 0; t--) { (dhnext, dcnext) = Backward(dhnext, dcnext, c_s[t - 1], z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t], ref dWf, ref dWi, ref dWc, ref dWo, ref dBf, ref dBi, ref dBc, ref dBo); } dWf = NumMath.Normalize(-5, 5, dWf / inputs.Length); dWi = NumMath.Normalize(-5, 5, dWi / inputs.Length); dWc = NumMath.Normalize(-5, 5, dWc / inputs.Length); dWo = NumMath.Normalize(-5, 5, dWo / inputs.Length); dBf = NumMath.Normalize(-5, 5, dBf / inputs.Length); dBi = NumMath.Normalize(-5, 5, dBi / inputs.Length); dBc = NumMath.Normalize(-5, 5, dBc / inputs.Length); dBo = NumMath.Normalize(-5, 5, dBo / inputs.Length); return(dWf, dWi, dWc, dWo, dBf, dBi, dBc, dBo, h_s[inputs.Length - 1], c_s[inputs.Length - 1]);
Backward(int target, FloatArray dhnext, FloatArray dcnext, FloatArray cprev, FloatArray z, FloatArray f, FloatArray i, FloatArray cbar, FloatArray c, FloatArray o, FloatArray h, FloatArray v, FloatArray y, ref FloatArray2D dWf, ref FloatArray2D dWi, ref FloatArray2D dWc, ref FloatArray2D dWo, ref FloatArray2D dWv, ref FloatArray dBf, ref FloatArray dBi, ref FloatArray dBc, ref FloatArray dBo, ref FloatArray dBv) { // output gradient var dv = y.Clone(); dv[target] -= 1; dWv += (h.T * dv); dBv += dv; // output gate gradient var dh = (Wv * dv).SumColumn() + dhnext; var DO = Dsigmoid(o) * (dh * Tanh(c)); dWo += (DO * z.T); dBo += DO; // cell gate gradient var dc = dcnext.Clone(); dc += dh * o * Dtanh(Tanh(c)); var dcbar = Dtanh(cbar) * (dc * i); dWc += dcbar * z.T; dBc += dcbar; // input gate gradient var di = Dsigmoid(i) * (dc * cbar); dWi += (di * z.T); dBi += di; // forget gate gradient var df = Dsigmoid(f) * (dc * cprev); dWf += (df * z.T); dBf += df; // lstm next var dz = (Wf * df).SumColumn() + (Wi * di).SumColumn() + (Wc * dcbar).SumColumn() + (Wo * DO).SumColumn(); // dhPrev, cprev return(dz, f *dc); }
FloatArray hs, FloatArray cs) BPTT(int[] inputs, int[] targets, FloatArray hprev, FloatArray cprev) { // store states var x_s = new Array <FloatArray>(inputs.Length); var z_s = new Array <FloatArray>(inputs.Length); var f_s = new Array <FloatArray>(inputs.Length); var i_s = new Array <FloatArray>(inputs.Length); var c_s_s = new Array <FloatArray>(inputs.Length); var c_s = new Array <FloatArray>(inputs.Length); var o_s = new Array <FloatArray>(inputs.Length); var h_s = new Array <FloatArray>(inputs.Length); var v_s = new Array <FloatArray>(inputs.Length); var y_s = new Array <FloatArray>(inputs.Length); var t_g = new Array <FloatArray>(inputs.Length); // loss var loss = 0d; // init timing h_s[-1] = hprev.Clone(); c_s[-1] = cprev.Clone(); // forward for (var t = 0; t < inputs.Length; t++) { x_s[t] = new FloatArray(input_size); x_s[t][inputs[t]] = 1; t_g[t] = new FloatArray(output_size); t_g[t][targets[t]] = 1; (z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t], v_s[t], y_s[t]) = FeedForward(x_s[t], h_s[t - 1], c_s[t - 1]); loss += -(t_g[t] * y_s[t].Log()).Sum(); } // gradients var dWf = NumMath.Array(Wf.W, Wf.H); var dWi = NumMath.Array(Wi.W, Wi.H); var dWc = NumMath.Array(Wc.W, Wc.H); var dWo = NumMath.Array(Wo.W, Wo.H); var dWv = NumMath.Array(Wv.W, Wv.H); var dBf = NumMath.Array(Bf.Length); var dBi = NumMath.Array(Bi.Length); var dBc = NumMath.Array(Bc.Length); var dBo = NumMath.Array(Bo.Length); var dBv = NumMath.Array(Bv.Length); var dhnext = NumMath.Array(hidden_size); var dcnext = NumMath.Array(hidden_size); // backward for (var t = inputs.Length - 1; t >= 0; t--) { (dhnext, dcnext) = Backward(targets[t], dhnext, dcnext, c_s[t - 1], z_s[t], f_s[t], i_s[t], c_s_s[t], c_s[t], o_s[t], h_s[t], v_s[t], y_s[t], ref dWf, ref dWi, ref dWc, ref dWo, ref dWv, ref dBf, ref dBi, ref dBc, ref dBo, ref dBv); } dWf = NumMath.Normalize(-5, 5, dWf / inputs.Length); dWi = NumMath.Normalize(-5, 5, dWi / inputs.Length); dWc = NumMath.Normalize(-5, 5, dWc / inputs.Length); dWo = NumMath.Normalize(-5, 5, dWo / inputs.Length); dWv = NumMath.Normalize(-5, 5, dWv / inputs.Length); dBf = NumMath.Normalize(-5, 5, dBf / inputs.Length); dBi = NumMath.Normalize(-5, 5, dBi / inputs.Length); dBc = NumMath.Normalize(-5, 5, dBc / inputs.Length); dBo = NumMath.Normalize(-5, 5, dBo / inputs.Length); dBv = NumMath.Normalize(-5, 5, dBv / inputs.Length); return(loss, dWf, dWi, dWc, dWo, dWv, dBf, dBi, dBc, dBo, dBv, h_s[inputs.Length - 1], c_s[inputs.Length - 1]);