/// <summary> /// Creates a LSTM cell that implements one repetition step in a recursive network. /// It takes the previous state of the cell (c - cell state) and output (h - hidden state) as arguments. /// Returns a tuple of the new state of the cell (c - cell state) and exit (h - hidden state). /// </summary> /// <param name="input">Entrance to LSTM (X at step t)</param> /// <param name="prevOutput">The previous state of the output LSTM (h at step t-1)</param> /// <param name="prevCellState">The previous state of the LSTM cell (s in step t-1)</param> /// <param name="useShortcutConnections">Specifies whether to create ShortcutConnections for this cell.</param> /// <param name="selfStabilizerLayer">Self-stabilization layer to the prevOutput and prevCellState inputs</param> /// <param name="device">Device for calculations</param> /// <returns></returns> private static Tuple <Function, Function> LSTMCell(Variable input, Variable prevOutput, Variable prevCellState, bool useShortcutConnections, Layer selfStabilizerLayer, DeviceDescriptor device) { int lstmOutputDimension = prevOutput.Shape[0]; int lstmCellDimension = prevCellState.Shape[0]; bool hasDifferentOutputAndCellDimension = lstmCellDimension != lstmOutputDimension; DataType dataType = input.DataType; if (selfStabilizerLayer != null) { prevOutput = selfStabilizerLayer.Create(prevOutput, device); prevCellState = selfStabilizerLayer.Create(prevCellState, device); } uint seed = CNTKLib.GetRandomSeed(); //create an input projection of data from the input X [t] and the hidden state H [t-1] Func <int, Variable> createInput = (outputDim) => { var inputWeigths = new Parameter(new[] { outputDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputBias = new Parameter(new[] { outputDim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputToCell = CNTKLib.Times(inputWeigths, input) + inputBias; var gateInput = CNTKLib.Plus(inputToCell, prevOutput); return(gateInput); }; Func <int, Variable, Variable> createProjection = (targetDim, variableNeedsToProjection) => { var cellWeigths = new Parameter(new[] { targetDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var projection = CNTKLib.Times(cellWeigths, variableNeedsToProjection); return(projection); }; Variable forgetProjection = createInput(lstmOutputDimension); Variable inputProjection = createInput(lstmOutputDimension); Variable candidateProjection = createInput(lstmOutputDimension); Variable outputProjection = createInput(lstmOutputDimension); Function forgetGate = CNTKLib.Sigmoid(forgetProjection); // forget valve (from the input in step t) Function inputGate = CNTKLib.Sigmoid(inputProjection); //input gate (from the input in step t) Function candidateGate = CNTKLib.Tanh(candidateProjection); //the candidate selection gate for storing in the cellular state (from the input in step t) Function outputGate = CNTKLib.Sigmoid(outputProjection); //output gate (from the input in step t) forgetGate = hasDifferentOutputAndCellDimension ? createProjection(lstmCellDimension, forgetGate) : (Variable)forgetGate; Function forgetState = CNTKLib.ElementTimes(prevCellState, forgetGate); //forget what you need to forget in the cellular state Function inputState = CNTKLib.ElementTimes(inputGate, candidateProjection); //we get what we need to save in the cellular state (from the input in step t) inputState = hasDifferentOutputAndCellDimension ? createProjection(lstmCellDimension, inputState) : (Variable)inputState; Function cellState = CNTKLib.Plus(forgetState, inputState); //add new information to the cellular state Variable cellToOutputProjection = hasDifferentOutputAndCellDimension ? createProjection(lstmOutputDimension, cellState) : (Variable)cellState; Function h = CNTKLib.ElementTimes(outputGate, CNTKLib.Tanh(cellToOutputProjection)); //get exit / hidden state Function c = cellState; if (useShortcutConnections) { var forwarding = input; var inputDim = input.Shape[0]; if (inputDim != lstmOutputDimension) { var scales = new Parameter(new[] { lstmOutputDimension, inputDim }, dataType, CNTKLib.UniformInitializer(seed++), device); forwarding = CNTKLib.Times(scales, input); } h = CNTKLib.Plus(h, forwarding); } return(new Tuple <Function, Function>(h, c)); }
/// <summary> /// Creates an LSTM cell that implements a single repetition step in a recurrent network. /// Takes as arguments the previous states of the cell (c - cell state) and the output (h - hidden state). /// Returns the tuple of the new state of the cell (c - cell state) and output (h - hidden state). /// </summary> /// <param name = "input"> Input to LSTM (X in step t) </param> /// <param name = "prevOutput"> Previous output state of LSTM (h in step t-1) </param> /// <param name = "prevCellState"> The previous state of the LSTM cell (as in step t-1) </param> /// <param name = "useShortcutConnections"> Specifies whether to create a ShortcutConnections for this cell </param> /// <param name = "selfStabilizerLayer"> A layer that implements self-stabilization. If not null, self-stabilization will be applied to the prevOutput and prevCellState inputs </param> /// <param name = "device"> Device for calculations </param> /// <returns> Function (prev_h, prev_c, input) -> (h, c) which implements one step of repeating LSTM layer </returns> private static Tuple <Function, Function> Cell(Variable input, Variable prevOutput, Variable prevCellState, bool useShortcutConnections, Layer selfStabilizerLayer, DeviceDescriptor device) { int lstmOutputDimension = prevOutput.Shape[0]; int lstmCellDimension = prevCellState.Shape[0]; DataType dataType = input.DataType; if (selfStabilizerLayer != null) { prevOutput = selfStabilizerLayer.Create(prevOutput, device); prevCellState = selfStabilizerLayer.Create(prevCellState, device); } uint seed = CNTKLib.GetRandomSeed(); // create an input data projection for the cell from the input X [t] and the hidden state H [t-1] Variable CreateInput(int cellDim, int hiddenDim) { var inputWeights = new Parameter(new[] { cellDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputBias = new Parameter(new[] { cellDim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputToCell = CNTKLib.Times(inputWeights, input) + inputBias; var hiddenWeights = new Parameter(new[] { cellDim, hiddenDim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var hiddenState = CNTKLib.Times(hiddenWeights, prevOutput); var gateInput = CNTKLib.Plus(inputToCell, hiddenState); return(gateInput); } Variable forgetProjection = CreateInput(lstmCellDimension, lstmOutputDimension); Variable inputProjection = CreateInput(lstmCellDimension, lstmOutputDimension); Variable candidateProjection = CreateInput(lstmCellDimension, lstmOutputDimension); Variable outputProjection = CreateInput(lstmCellDimension, lstmOutputDimension); Function forgetGate = CNTKLib.Sigmoid(forgetProjection); // gate "forgetting" (from the input in step t) Function inputGate = CNTKLib.Sigmoid(inputProjection); // input gate (from the input in step t) Function candidateGate = CNTKLib.Tanh(candidateProjection); // valve for selecting candidates for memorization in the cellular state (from the input data in step t) Function outputGate = CNTKLib.Sigmoid(outputProjection); // output gate (from the input in step t) Function forgetState = CNTKLib.ElementTimes(prevCellState, forgetGate); // forget what you need to forget in the cellular state Function inputState = CNTKLib.ElementTimes(inputGate, candidateProjection); // we get what we need to save in the cellular state (from the input data in step t) Function cellState = CNTKLib.Plus(forgetState, inputState); // add new information to the cellular state Function h = CNTKLib.ElementTimes(outputGate, CNTKLib.Tanh(cellState)); // get output / hidden state Function c = cellState; if (lstmOutputDimension != lstmCellDimension) { Parameter scale = new Parameter(new[] { lstmOutputDimension, lstmCellDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); h = CNTKLib.Times(scale, h); } if (useShortcutConnections) { var forwarding = input; var inputDim = input.Shape[0]; if (inputDim != lstmOutputDimension) { var scales = new Parameter(new[] { lstmOutputDimension, inputDim }, dataType, CNTKLib.UniformInitializer(seed++), device); forwarding = CNTKLib.Times(scales, input); } h = CNTKLib.Plus(h, forwarding); } return(new Tuple <Function, Function>(h, c)); }
/// <summary> /// Создает ЛСТМ ячейку, которая реализует один шаг повторения в реккурентной сети. /// В качестве аргументов принимает предыдущие состояния ячейки(c - cell state) и выхода(h - hidden state). /// Возвращает кортеж нового состояния ячейки(c - cell state) и выхода(h - hidden state). /// </summary> /// <param name="input">Вход в ЛСТМ (Х на шаге t)</param> /// <param name="prevOutput">Предыдущее состояние выхода ЛСТМ (h на шаге t-1)</param> /// <param name="prevCellState">Предыдущее состояние ячейки ЛСТМ (с на шаге t-1)</param> /// <param name="useShortcutConnections">Указывает, следует ли создавать ShortcutConnections для этой ячейки</param> /// <param name="selfStabilizerLayer">Слой, реализующий самостабилизацию к входам prevOutput и prevCellState</param> /// <param name="device">Устройтсво для расчетов</param> /// <returns></returns> private static Tuple <Function, Function> LSTMCell(Variable input, Variable prevOutput, Variable prevCellState, bool useShortcutConnections, Layer selfStabilizerLayer, DeviceDescriptor device) { int lstmOutputDimension = prevOutput.Shape[0]; int lstmCellDimension = prevCellState.Shape[0]; bool hasDifferentOutputAndCellDimension = lstmCellDimension != lstmOutputDimension; DataType dataType = input.DataType; if (selfStabilizerLayer != null) { prevOutput = selfStabilizerLayer.Create(prevOutput, device); prevCellState = selfStabilizerLayer.Create(prevCellState, device); } uint seed = CNTKLib.GetRandomSeed(); //создаем входную проекцию данных из входа X[t] и скрытого состояния H[t-1] Func <int, Variable> createInput = (outputDim) => { var inputWeigths = new Parameter(new[] { outputDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputBias = new Parameter(new[] { outputDim }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var inputToCell = CNTKLib.Times(inputWeigths, input) + inputBias; var gateInput = CNTKLib.Plus(inputToCell, prevOutput); return(gateInput); }; Func <int, Variable, Variable> createProjection = (targetDim, variableNeedsToProjection) => { var cellWeigths = new Parameter(new[] { targetDim, NDShape.InferredDimension }, dataType, CNTKLib.GlorotUniformInitializer(1.0, 1, 0, seed++), device); var projection = CNTKLib.Times(cellWeigths, variableNeedsToProjection); return(projection); }; Variable forgetProjection = createInput(lstmOutputDimension); Variable inputProjection = createInput(lstmOutputDimension); Variable candidateProjection = createInput(lstmOutputDimension); Variable outputProjection = createInput(lstmOutputDimension); Function forgetGate = CNTKLib.Sigmoid(forgetProjection); // вентиль "забывания" (из входных данных на шаге t) Function inputGate = CNTKLib.Sigmoid(inputProjection); //вентиль входа (из входных данных на шаге t) Function candidateGate = CNTKLib.Tanh(candidateProjection); //вентиль выбора кандидатов для запоминания в клеточном состоянии (из входных данных на шаге t) Function outputGate = CNTKLib.Sigmoid(outputProjection); //вентиль выхода (из входных данных на шаге t) forgetGate = hasDifferentOutputAndCellDimension ? createProjection(lstmCellDimension, forgetGate) : (Variable)forgetGate; Function forgetState = CNTKLib.ElementTimes(prevCellState, forgetGate); //забываем то что нужно забыть в клеточном состоянии Function inputState = CNTKLib.ElementTimes(inputGate, candidateProjection); //получаем то что нужно сохранить в клеточном состоянии (из входных данных на шаге t) inputState = hasDifferentOutputAndCellDimension ? createProjection(lstmCellDimension, inputState) : (Variable)inputState; Function cellState = CNTKLib.Plus(forgetState, inputState); //добавляем новую информацию в клеточное состояние Variable cellToOutputProjection = hasDifferentOutputAndCellDimension ? createProjection(lstmOutputDimension, cellState) : (Variable)cellState; Function h = CNTKLib.ElementTimes(outputGate, CNTKLib.Tanh(cellToOutputProjection)); //получаем выход/скрытое состояние Function c = cellState; if (useShortcutConnections) { var forwarding = input; var inputDim = input.Shape[0]; if (inputDim != lstmOutputDimension) { var scales = new Parameter(new[] { lstmOutputDimension, inputDim }, dataType, CNTKLib.UniformInitializer(seed++), device); forwarding = CNTKLib.Times(scales, input); } h = CNTKLib.Plus(h, forwarding); } return(new Tuple <Function, Function>(h, c)); }