private void GateInptEr(double[] memE, int t) { Og.ComputeInputError(M.ElMult(memE, NonLin.Activ(MemoryCell[t])), inputs[t], BlockOutputs[t], t); Fg.ComputeInputError(M.ElMult(memE, MemoryCell[t - 1]), inputs[t], BlockOutputs[t], t); Ig.ComputeInputError(M.ElMult(memE, Zg.PostActvOut[t]), inputs[t], BlockOutputs[t], t); Zg.ComputeInputError(M.ElMult(memE, Ig.PostActvOut[t]), inputs[t], BlockOutputs[t], t); }
/// <summary> /// trunc means 'truncated BPTT' /// </summary> /// <param name="E"></param> /// <param name="lr"></param> /// <param name="maxGrad"></param> /// <param name="lastDec"></param> /// <param name="trunc"></param> /// <returns></returns> public double[] ComputeInputError(double[] E, Random rnd, double lr = 0.00001, int maxGrad = 1, int lastDec = 15, int trunc = 5) { double[] memE = new double[NumberNodes]; // next timestep memory cell error double[] RV = new double[NumberInputs]; RV = BlockGateInptEr(memE = M.ElMult(E = LRgrs.ComputeErrorSignal(E), Og.PostActvOut[T], NonLin.Activ(MemoryCell[T], false, true)), T); if (T == NumberTimeSteps - 1) { for (int t = T; t >= T - trunc && t >= 1; --t) // BPTT { if (t < T) { E = M.Sum(new double[][] { E, M.Dot(M.T(Ig.RW), Ig.eSgnl[t + 1]), M.Dot(M.T(Zg.RW), Ig.eSgnl[t + 1]), M.Dot(M.T(Fg.RW), Fg.eSgnl[t + 1]), M.Dot(M.T(Og.RW), Og.eSgnl[t + 1]) }); memE = M.Sum(M.ElMult(E, Og.PostActvOut[t], NonLin.Activ(MemoryCell[t], false, true)), M.ElMult(memE, Fg.PostActvOut[t + 1])); GateInptEr(memE, t); } } } Clock(); if (T == 0) { UpdateWs(lr, rnd, maxGrad, lastDec); MemoryCell = M.MakeMatrix(NumberTimeSteps, NumberNodes); } // reset cell state and time steps return(RV); }
public double[] ComputeInputError(double[] outputdelta, int backsteps = 5, double LearnRate = 0.00001) { double[] E = M.ElMult(outputdelta, NonLin.Activ(Output[T], Logistic, true)); // current ErrorSignal BPTT_Grads(E, backsteps, LearnRate); // Gradients Clock(); return(M.Dot(M.T(W), E)); // This Layer's Input Error (the Input's effect on the Weights' error) }
public double[] ComputeInputError(double[] Error, double[] input_t, double[] prevBlockOutput, int t) { //does grads too eSgnl[t] = M.ElMult(Error, NonLin.Activ(PreActivOutput[t], Logistic, true)); // gate error backpropped through nonlin W_Grads = M.Sum(W_Grads, M.Outer(eSgnl[t], input_t)); RW_Grads = M.Sum(RW_Grads, M.Outer(eSgnl[t], prevBlockOutput)); // recurrent gradients B_Grads = M.Sum(B_Grads, eSgnl[t]); return(M.Dot(M.T(W), eSgnl[t])); }
public double[] ComputeOutputs(double[] inputs) { Input.SetValue(inputs, T); double[] prev = new double[NumberNodes]; if (T > 0) { prev = Output[T - 1]; } return(Output[T] = NonLin.Activ(M.Sum(new double[][] { M.Dot(W, inputs), B, M.Dot(RW, prev) }), Logistic)); }
private double[] BlockGateInptEr(double[] memE, int t) { double[] prvmc = new double[NumberNodes]; if (t > 0) { prvmc = MemoryCell[t - 1]; } return(M.Sum(new double[][] { Og.ComputeInputError(M.ElMult(memE, NonLin.Activ(MemoryCell[t])), inputs[t], BlockOutputs[t], t), Fg.ComputeInputError(M.ElMult(memE, prvmc), inputs[t], BlockOutputs[t], t), Ig.ComputeInputError(M.ElMult(memE, Zg.PostActvOut[t]), inputs[t], BlockOutputs[t], t), Zg.ComputeInputError(M.ElMult(memE, Ig.PostActvOut[t]), inputs[t], BlockOutputs[t], t) })); }
public double[] ComputeOutputs(double[] input) { inputs[T] = input; double[] Bprv = new double[NumberNodes]; // prev Block output double[] Sprv = new double[NumberNodes]; // prev memoryCell State if (T > 0) { Bprv = BlockOutputs[T - 1]; Sprv = MemoryCell[T - 1]; } MemoryCell[T] = M.Sum(M.ElMult(Zg.ComputeOutputs(input, Bprv, T), Ig.ComputeOutputs(input, Bprv, T)), M.ElMult(Fg.ComputeOutputs(input, Bprv, T), Sprv)); return(LRgrs.ComputeOutputs(BlockOutputs[T] = M.ElMult(NonLin.Activ(MemoryCell[T]), Og.ComputeOutputs(input, Bprv, T)))); }
/// <summary> /// backprops through time from currenttimestep 'T' to T-[truncated BBPT 'backsteps' idx], to get gradients for this layer's weights /// Returns Recurrent PreviousErrorSignal effect on current output error /// </summary> /// <param name="backsteps"></param> private void BPTT_Grads(double[] E, int backsteps = 5, double LearnRate = 0.00001, int maxGrad = 1, int clipToDecimal = 15) { if (T == 0) { UpdateWeights(LearnRate, maxGrad, clipToDecimal); } W_Grads = M.Sum(W_Grads, M.Outer(E, Input[T])); B_Grads = M.Sum(B_Grads, E); for (int t = T; t >= T - backsteps + 1 && t >= (0 + 1); --t)//the +1 because we are backpropping just until 0 { RW_Grads = M.Sum(RW_Grads, M.Outer(E, Output[t - 1])); E = M.ElMult(M.Dot(M.T(RW), E), NonLin.Activ(Output[t - 1], Logistic, true)); } }
/// <summary> /// Performs a HyperbolicTangent or LogisticSigmoid ActivationFunction over a vector. if(derivative), performs the derivative function of the htan or logsig over the vector /// </summary> /// <param name="In"></param> /// <param name="logisticSigmoid"></param> /// <param name="derivative"></param> /// <returns></returns> public static double[] Activ(double[] In, bool logisticSigmoid = false, bool derivative = false) { double[] Out = new double[In.Length]; if (derivative) { if (logisticSigmoid) { for (int N = 0; N < In.Length; ++N) { Out[N] = NonLin.LogisticSigmoid_Deriv(In[N]); } } else { for (int N = 0; N < In.Length; ++N) { Out[N] = NonLin.TanH_Deriv(In[N]); } } } else { if (logisticSigmoid) { for (int N = 0; N < In.Length; ++N) { Out[N] = NonLin.LogisticSigmoid(In[N]); } } else { for (int N = 0; N < In.Length; ++N) { Out[N] = NonLin.HyperTan(In[N]); } } } return(Out); }
public double[] ComputeOutputs(double[] input, double[] prevBlockOutput, int t) { return(PostActvOut[t] = NonLin.Activ(PreActivOutput[t] = M.ElMult(DropoutIndices, M.Sum(new double[][] { M.Dot(W, input), M.Dot(RW, prevBlockOutput), B })), Logistic)); }