public void AllocateMemoryForLSTMCells() { cell = new LSTMCell[LayerSize]; for (int i = 0; i < LayerSize; i++) { cell[i] = new LSTMCell(); } }
public LSTMLayer(LSTMLayerConfig config) : base(config) { this.config = config; LSTMCells = new LSTMCell[LayerSize]; for (var i = 0; i < LayerSize; i++) { LSTMCells[i] = new LSTMCell(); } }
private void InitializeLSTMCell(LSTMCell c, LSTMCellWeight cw, LSTMCellWeightDeri deri) { c.cellState = 0; //partial derivatives deri.dSWPeepholeIn = 0; deri.dSWPeepholeForget = 0; deri.dSWCellIn = 0; deri.dSWCellForget = 0; deri.dSWCellState = 0; }
private void InitializeLSTMCell(LSTMCell c) { c.previousCellState = 0; c.cellState = 0; //partial derivatives c.dSWPeepholeIn = 0; c.dSWPeepholeForget = 0; c.dSWCellIn = 0; c.dSWCellForget = 0; c.dSWCellState = 0; }
public void Set(LSTMCell cell) { previousCellState = cell.previousCellState; previousCellOutput = cell.previousCellOutput; cellState = cell.cellState; netCellState = cell.netCellState; netForget = cell.netForget; netIn = cell.netIn; netOut = cell.netOut; yCellState = cell.yCellState; yForget = cell.yForget; yIn = cell.yIn; yOut = cell.yOut; }
public void LSTMCellInit(LSTMCell c) { c.previousCellState = 0; c.cellState = 0; //partial derivatives c.dSWPeepholeIn = 0; c.dSWPeepholeForget = 0; // c.dSWCellState = 0; c.dSWCellIn = 0; c.dSWCellForget = 0; c.dSWCellState = 0; }
private void CreateCell(BinaryReader br) { neuFeatures = null; OutputLayer = new SimpleLayer(L2); neuHidden = new LSTMCell[L1]; for (int i = 0; i < L1; i++) { neuHidden[i] = new LSTMCell(); LSTMCellInit(neuHidden[i], i == L1 - 1); } if (br != null) { //Load weight from input file for (int i = 0; i < L1; i++) { neuHidden[i].wPeepholeIn = br.ReadDouble(); neuHidden[i].wPeepholeForget = br.ReadDouble(); neuHidden[i].wPeepholeOut = br.ReadDouble(); neuHidden[i].wCellIn = br.ReadDouble(); neuHidden[i].wCellForget = br.ReadDouble(); neuHidden[i].wCellState = br.ReadDouble(); neuHidden[i].wCellOut = br.ReadDouble(); } } else { //Initialize weight by random number for (int i = 0; i < L1; i++) { //internal weights, also important neuHidden[i].wPeepholeIn = RandInitWeight(); neuHidden[i].wPeepholeForget = RandInitWeight(); neuHidden[i].wPeepholeOut = RandInitWeight(); neuHidden[i].wCellIn = RandInitWeight(); neuHidden[i].wCellForget = RandInitWeight(); neuHidden[i].wCellState = RandInitWeight(); neuHidden[i].wCellOut = RandInitWeight(); } } }
public void LSTMCellInit(LSTMCell c, bool bBias = false) { c.previousCellState = 0; c.cellState = 0; //partial derivatives c.dSWPeepholeIn = 0; c.dSWPeepholeForget = 0; // c.dSWCellState = 0; c.dSWCellIn = 0; c.dSWCellForget = 0; c.dSWCellState = 0; if (bBias == false) { //cell output c.cellOutput = 0; } else { c.cellOutput = 1.0; } }
// forward process. output layer consists of tag value public override void computeHiddenLayer(State state, bool isTrain = true) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer var sparse = state.SparseData; int sparseFeatureSize = sparse.Count; Parallel.For(0, L1 - 1, parallelOption, j => { LSTMCell cell_j = neuHidden[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; cell_j.previousCellOutput = cell_j.cellOutput; Vector4 vecCell_j = Vector4.Zero; //Apply sparse weights Vector4[] weights = input2hidden[j]; for (int i = 0; i < sparseFeatureSize; i++) { var entry = sparse.GetEntry(i); vecCell_j += weights[entry.Key] * entry.Value; } //Apply dense weights if (DenseFeatureSize > 0) { weights = feature2hidden[j]; for (int i = 0; i < DenseFeatureSize; i++) { vecCell_j += weights[i] * neuFeatures[i]; } } //rest the value of the net input to zero cell_j.netIn = vecCell_j.X; cell_j.netForget = vecCell_j.Y; //reset each netCell state to zero cell_j.netCellState = vecCell_j.Z; //reset each netOut to zero cell_j.netOut = vecCell_j.W; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j.previousCellOutput * cell_j.wCellIn; //squash input cell_j.yIn = Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j.previousCellOutput * cell_j.wCellForget; cell_j.yForget = Sigmoid(cell_j.netForget); cell_j.netCellState += cell_j.previousCellOutput * cell_j.wCellState; cell_j.yCellState = TanH(cell_j.netCellState); if (cell_j.mask == true) { cell_j.cellState = 0; } else { //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState; } if (isTrain == false) { cell_j.cellState = cell_j.cellState * (1.0 - Dropout); } ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j.previousCellOutput * cell_j.wCellOut; //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); cell_j.cellOutput = TanH(cell_j.cellState) * cell_j.yOut; neuHidden[j] = cell_j; }); }
public override void LearnNet(State state, int numStates, int curState) { //Get sparse feature and apply it into hidden layer var sparse = state.SparseData; int sparseFeatureSize = sparse.Count; //put variables for derivaties in weight class and cell class Parallel.For(0, L1 - 1, parallelOption, i => { LSTMCell c = neuHidden[i]; //using the error find the gradient of the output gate var gradientOutputGate = (float)(SigmoidDerivative(c.netOut) * TanH(c.cellState) * c.er); //internal cell state error var cellStateError = (float)(c.yOut * c.er * TanHDerivative(c.cellState)); Vector4 vecErr = new Vector4(cellStateError, cellStateError, cellStateError, gradientOutputGate); var Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn = TanH(c.netCellState) * SigmoidDerivative(c.netIn); var ci_previousCellState_mul_SigmoidDerivative_ci_netForget = c.previousCellState * SigmoidDerivative(c.netForget); var Sigmoid2Derivative_ci_netCellState_mul_ci_yIn = TanHDerivative(c.netCellState) * c.yIn; Vector3 vecDerivate = new Vector3( (float)(Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn), (float)(ci_previousCellState_mul_SigmoidDerivative_ci_netForget), (float)(Sigmoid2Derivative_ci_netCellState_mul_ci_yIn)); float c_yForget = (float)c.yForget; Vector4[] w_i = input2hidden[i]; Vector3[] wd_i = input2hiddenDeri[i]; Vector4[] wlr_i = Input2HiddenLearningRate[i]; for (int k = 0; k < sparseFeatureSize; k++) { var entry = sparse.GetEntry(k); Vector3 wd = vecDerivate * entry.Value; if (curState > 0) { //Adding historical information wd += wd_i[entry.Key] * c_yForget; } wd_i[entry.Key] = wd; //Computing final err delta Vector4 vecDelta = new Vector4(wd, entry.Value); vecDelta = vecErr * vecDelta; vecDelta = Vector4.Clamp(vecDelta, vecMinGrad, vecMaxGrad); //Computing actual learning rate Vector4 vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i[entry.Key]); w_i[entry.Key] += vecLearningRate * vecDelta; } if (DenseFeatureSize > 0) { w_i = feature2hidden[i]; wd_i = feature2hiddenDeri[i]; wlr_i = Feature2HiddenLearningRate[i]; for (int j = 0; j < DenseFeatureSize; j++) { float feature = neuFeatures[j]; Vector3 wd = vecDerivate * feature; if (curState > 0) { //Adding historical information wd += wd_i[j] * c_yForget; } wd_i[j] = wd; Vector4 vecDelta = new Vector4(wd, feature); vecDelta = vecErr * vecDelta; vecDelta = Vector4.Clamp(vecDelta, vecMinGrad, vecMaxGrad); //Computing actual learning rate Vector4 vecLearningRate = ComputeLearningRate(vecDelta, ref wlr_i[j]); w_i[j] += vecLearningRate * vecDelta; } } //Update peephols weights //partial derivatives for internal connections c.dSWPeepholeIn = c.dSWPeepholeIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.previousCellState; //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero c.dSWPeepholeForget = c.dSWPeepholeForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellState; //update internal weights Vector3 vecCellDelta = new Vector3((float)c.dSWPeepholeIn, (float)c.dSWPeepholeForget, (float)c.cellState); Vector3 vecErr3 = new Vector3(cellStateError, cellStateError, gradientOutputGate); vecCellDelta = vecErr3 * vecCellDelta; //Normalize err by gradient cut-off vecCellDelta = Vector3.Clamp(vecCellDelta, vecMinGrad3, vecMaxGrad3); //Computing actual learning rate Vector3 vecCellLearningRate = ComputeLearningRate(vecCellDelta, ref PeepholeLearningRate[i]); vecCellDelta = vecCellLearningRate * vecCellDelta; c.wPeepholeIn += vecCellDelta.X; c.wPeepholeForget += vecCellDelta.Y; c.wPeepholeOut += vecCellDelta.Z; //Update cells weights //partial derivatives for internal connections c.dSWCellIn = c.dSWCellIn * c.yForget + Sigmoid2_ci_netCellState_mul_SigmoidDerivative_ci_netIn * c.previousCellOutput; //partial derivatives for internal connections, initially zero as dS is zero and previous cell state is zero c.dSWCellForget = c.dSWCellForget * c.yForget + ci_previousCellState_mul_SigmoidDerivative_ci_netForget * c.previousCellOutput; c.dSWCellState = c.dSWCellState * c.yForget + Sigmoid2Derivative_ci_netCellState_mul_ci_yIn * c.previousCellOutput; Vector4 vecCellDelta4 = new Vector4((float)c.dSWCellIn, (float)c.dSWCellForget, (float)c.dSWCellState, (float)c.previousCellOutput); vecCellDelta4 = vecErr * vecCellDelta4; //Normalize err by gradient cut-off vecCellDelta4 = Vector4.Clamp(vecCellDelta4, vecMinGrad, vecMaxGrad); //Computing actual learning rate Vector4 vecCellLearningRate4 = ComputeLearningRate(vecCellDelta4, ref CellLearningRate[i]); vecCellDelta4 = vecCellLearningRate4 * vecCellDelta4; c.wCellIn += vecCellDelta4.X; c.wCellForget += vecCellDelta4.Y; c.wCellState += vecCellDelta4.Z; c.wCellOut += vecCellDelta4.W; neuHidden[i] = c; }); }
private void CreateHiddenLayerCells(BinaryReader br) { neuHidden = new LSTMCell[L1 + 1]; for (int i = 0; i < L1; i++) { neuHidden[i] = new LSTMCell(); LSTMCellInit(NORMAL, neuHidden[i]); } neuHidden[L1] = new LSTMCell(); LSTMCellInit(BIAS, neuHidden[L1]); if (br != null) { //Load weight from input file for (int i = 0; i < L1 + 1; i++) { neuHidden[i].wCellIn = br.ReadDouble(); neuHidden[i].wCellForget = br.ReadDouble(); neuHidden[i].wCellOut = br.ReadDouble(); } } else { //Initialize weight by random number double internalRand = 1 / Math.Sqrt(3); for (int i = 0; i < L1; i++) { //internal weights, also important neuHidden[i].wCellIn = (((double)((rand() % 100) + 1) / 100) * 2 * internalRand) - internalRand; neuHidden[i].wCellForget = (((double)((rand() % 100) + 1) / 100) * 2 * internalRand) - internalRand; neuHidden[i].wCellOut = (((double)((rand() % 100) + 1) / 100) * 2 * internalRand) - internalRand; } //internal weights neuHidden[L1].wCellIn = 0; neuHidden[L1].wCellForget = 0; neuHidden[L1].wCellOut = 0; } }
private void CreateCell(BinaryReader br) { neuFeatures = new SingleVector(DenseFeatureSize); OutputLayer = new neuron[L2]; for (int a = 0; a < L2; a++) { OutputLayer[a].cellOutput = 0; OutputLayer[a].er = 0; } neuHidden = new LSTMCell[L1]; for (int i = 0; i < L1; i++) { neuHidden[i] = new LSTMCell(); LSTMCellInit(neuHidden[i]); } if (br != null) { //Load weight from input file for (int i = 0; i < L1; i++) { neuHidden[i].wCellIn = br.ReadSingle(); neuHidden[i].wCellForget = br.ReadSingle(); neuHidden[i].wCellOut = br.ReadSingle(); } } else { //Initialize weight by random number for (int i = 0; i < L1; i++) { //internal weights, also important neuHidden[i].wCellIn = RandInitWeight(); neuHidden[i].wCellForget = RandInitWeight(); neuHidden[i].wCellOut = RandInitWeight(); } } }
public void LSTMCellInit(bool type, LSTMCell c) { //input gate c.netIn = 0; c.yIn = 0; //forget gate c.netForget = 0; c.yForget = 0; //cell state c.netCellState = 0; c.previousCellState = 0; //this is important c.cellState = 0; c.cellStateError = 0; //partial derivatives c.dSWCellIn = 0; c.dSWCellForget = 0; //output gate c.netOut = 0; c.yOut = 0; c.gradientOutputGate = 0; //cell output c.cellOutput = (type == true) ? 0 : -1; }
// forward process. output layer consists of tag value public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer SparseFeature = sparseFeature; DenseFeature = denseFeature; Parallel.For(0, LayerSize, parallelOption, j => { LSTMCell cell_j = cell[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; previousCellOutput[j] = cellOutput[j]; Vector4 vecCell_j = Vector4.Zero; if (SparseFeatureSize > 0) { //Apply sparse weights Vector4[] weights = input2hidden[j]; for (int i = 0; i < SparseFeature.Count; i++) { var entry = SparseFeature.GetEntry(i); vecCell_j += weights[entry.Key] * entry.Value; } } //Apply dense weights if (DenseFeatureSize > 0) { Vector4[] weights = feature2hidden[j]; for (int i = 0; i < DenseFeatureSize; i++) { vecCell_j += weights[i] * (float)DenseFeature[i]; } } //rest the value of the net input to zero cell_j.netIn = vecCell_j.X; cell_j.netForget = vecCell_j.Y; //reset each netCell state to zero cell_j.netCellState = vecCell_j.Z; //reset each netOut to zero cell_j.netOut = vecCell_j.W; double cell_j_previousCellOutput = previousCellOutput[j]; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j_previousCellOutput * cell_j.wCellIn; //squash input cell_j.yIn = Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j_previousCellOutput * cell_j.wCellForget; cell_j.yForget = Sigmoid(cell_j.netForget); cell_j.netCellState += cell_j_previousCellOutput * cell_j.wCellState; cell_j.yCellState = TanH(cell_j.netCellState); //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState; ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j_previousCellOutput * cell_j.wCellOut; //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); cellOutput[j] = TanH(cell_j.cellState) * cell_j.yOut; cell[j] = cell_j; }); }
public LSTMCell(LSTMCell cell) { Set(cell); }
public void matrixXvectorADD(neuron[] dest, LSTMCell[] srcvec, Matrix srcmatrix, int from, int to, int from2, int to2) { //ac mod Parallel.For(0, (to - from), parallelOption, i => { for (int j = 0; j < to2 - from2; j++) { dest[i + from].ac += srcvec[j + from2].cellOutput * srcmatrix[j][i]; } }); }
public void matrixXvectorADD(LSTMCell[] dest, neuron[] srcvec, LSTMWeight[][] srcmatrix, int from, int to, int from2, int to2) { //ac mod Parallel.For(0, (to - from), parallelOption, i => { for (int j = 0; j < to2 - from2; j++) { dest[i + from].netIn += srcvec[j + from2].ac * srcmatrix[i][j].wInputInputGate; } }); }
public void LSTMCellInit(LSTMCell c) { //input gate c.netIn = 0; c.yIn = 0; //forget gate c.netForget = 0; c.yForget = 0; //cell state c.netCellState = 0; c.previousCellState = 0; //this is important c.cellState = 0; //partial derivatives c.dSWCellIn = 0; c.dSWCellForget = 0; //output gate c.netOut = 0; c.yOut = 0; //cell output c.cellOutput = 0; }