public IWeightTensor Step(IWeightTensor input, IComputeGraph g) { using (IComputeGraph innerGraph = g.CreateSubGraph(m_name)) { IWeightTensor hidden_prev = m_hidden; IWeightTensor cell_prev = m_cell; IWeightTensor inputs = innerGraph.ConcatColumns(input, hidden_prev); IWeightTensor hhSum = innerGraph.Affine(inputs, m_Wxh, m_b); IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, innerGraph); (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = innerGraph.SplitColumns(hhSum2, m_hdim * 3, m_hdim); IWeightTensor gates = innerGraph.Sigmoid(gates_raw); IWeightTensor cell_write = innerGraph.Tanh(cell_write_raw); (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = innerGraph.SplitColumns(gates, m_hdim, m_hdim, m_hdim); // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write m_cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); IWeightTensor ct2 = m_layerNorm2.Norm(m_cell, innerGraph); // compute hidden state as gated, saturated cell activations m_hidden = g.EltMul(output_gate, innerGraph.Tanh(ct2)); return(m_hidden); } }
public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph) { var hidden_prev = ht; var cell_prev = ct; var inputs = innerGraph.ConcatColumns(input, hidden_prev); var bs = innerGraph.RepeatRows(b, input.Rows); var hhSum = innerGraph.MulAdd(inputs, Wxh, bs); (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum, hdim * 3, hdim); var gates = innerGraph.Sigmoid(gates_raw); var cell_write = innerGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation var retain_cell = innerGraph.EltMul(forget_gate, cell_prev); // what do we keep from cell var write_cell = innerGraph.EltMul(input_gate, cell_write); // what do we write to cell ct = innerGraph.Add(retain_cell, write_cell); // new cell contents // compute hidden state as gated, saturated cell activations ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct)); return(ht); }
public List <IWeightMatrix> Encode(List <IWeightMatrix> inputs, IComputeGraph g) { List <IWeightMatrix> forwardOutputs = new List <IWeightMatrix>(); List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>(); List <IWeightMatrix> layerOutputs = inputs.ToList(); int seqLen = inputs.Count; for (int i = 0; i < depth; i++) { for (int j = 0; j < seqLen; j++) { var forwardOutput = forwardEncoders[i].Step(layerOutputs[j], g); forwardOutputs.Add(forwardOutput); var backwardOutput = backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g); backwardOutputs.Add(backwardOutput); } backwardOutputs.Reverse(); layerOutputs.Clear(); for (int j = 0; j < seqLen; j++) { var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]); layerOutputs.Add(concatW); } } return(layerOutputs); }
/// <summary> /// Update LSTM-Attention cells according to given weights /// </summary> /// <param name="context">The context weights for attention</param> /// <param name="input">The input weights</param> /// <param name="computeGraph">The compute graph to build workflow</param> /// <returns>Update hidden weights</returns> public IWeightTensor Step(IWeightTensor context, IWeightTensor input, IComputeGraph g) { using (IComputeGraph computeGraph = g.CreateSubGraph(m_name)) { IWeightTensor cell_prev = Cell; IWeightTensor hidden_prev = Hidden; IWeightTensor hxhc = computeGraph.ConcatColumns(input, hidden_prev, context); IWeightTensor hhSum = computeGraph.Affine(hxhc, m_Wxhc, m_b); IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, computeGraph); (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = computeGraph.SplitColumns(hhSum2, m_hiddenDim * 3, m_hiddenDim); IWeightTensor gates = computeGraph.Sigmoid(gates_raw); IWeightTensor cell_write = computeGraph.Tanh(cell_write_raw); (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = computeGraph.SplitColumns(gates, m_hiddenDim, m_hiddenDim, m_hiddenDim); // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write Cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); IWeightTensor ct2 = m_layerNorm2.Norm(Cell, computeGraph); Hidden = g.EltMul(output_gate, computeGraph.Tanh(ct2)); return(Hidden); } }
/// <summary> /// Update LSTM-Attention cells according to given weights /// </summary> /// <param name="context">The context weights for attention</param> /// <param name="input">The input weights</param> /// <param name="computeGraph">The compute graph to build workflow</param> /// <returns>Update hidden weights</returns> public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph) { var cell_prev = ct; var hidden_prev = ht; var hxhc = computeGraph.ConcatColumns(input, hidden_prev, context); var bs = computeGraph.RepeatRows(b, input.Rows); var hhSum = computeGraph.MulAdd(hxhc, Wxhc, bs); (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum, hdim * 3, hdim); var gates = computeGraph.Sigmoid(gates_raw); var cell_write = computeGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation //var retain_cell = computeGraph.EltMul(forget_gate, cell_prev); //var write_cell = computeGraph.EltMul(input_gate, cell_write); //ct = computeGraph.Add(retain_cell, write_cell); ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct)); return(ht); }
public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph) { var hidden_prev = ht; var cell_prev = ct; var inputs = innerGraph.ConcatColumns(input, hidden_prev); var bs = innerGraph.RepeatRows(b, input.Rows); var hhSum = innerGraph.MulAdd(inputs, Wxh, bs); var hhSum2 = layerNorm1.Process(hhSum, innerGraph); (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum2, hdim * 3, hdim); var gates = innerGraph.Sigmoid(gates_raw); var cell_write = innerGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write ct = innerGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); var ct2 = layerNorm2.Process(ct, innerGraph); // compute hidden state as gated, saturated cell activations ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct2)); return(ht); }
public IWeightTensor Perform(IWeightTensor state, AttentionPreProcessResult attenPreProcessResult, int batchSize, IComputeGraph graph) { int srcSeqLen = attenPreProcessResult.inputsBatchFirst.Rows / batchSize; using (IComputeGraph g = graph.CreateSubGraph(m_name)) { // Affine decoder state IWeightTensor wc = g.Affine(state, m_Wa, m_bWa); // Expand dims from [batchSize x decoder_dim] to [batchSize x srcSeqLen x decoder_dim] IWeightTensor wc1 = g.View(wc, batchSize, 1, wc.Columns); IWeightTensor wcExp = g.Expand(wc1, batchSize, srcSeqLen, wc.Columns); IWeightTensor ggs = null; if (m_enableCoverageModel) { // Get coverage model status at {t-1} IWeightTensor wCoverage = g.Affine(m_coverage.Hidden, m_Wc, m_bWc); IWeightTensor wCoverage1 = g.View(wCoverage, batchSize, srcSeqLen, -1); ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp, wCoverage1); } else { ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp); } IWeightTensor ggss = g.View(ggs, batchSize * srcSeqLen, -1); IWeightTensor atten = g.Mul(ggss, m_V); IWeightTensor attenT = g.Transpose(atten); IWeightTensor attenT2 = g.View(attenT, batchSize, srcSeqLen); IWeightTensor attenSoftmax1 = g.Softmax(attenT2, inPlace: true); IWeightTensor attenSoftmax = g.View(attenSoftmax1, batchSize, 1, srcSeqLen); IWeightTensor inputs2 = g.View(attenPreProcessResult.inputsBatchFirst, batchSize, srcSeqLen, attenPreProcessResult.inputsBatchFirst.Columns); IWeightTensor contexts = graph.MulBatch(attenSoftmax, inputs2, batchSize); if (m_enableCoverageModel) { // Concatenate tensor as input for coverage model IWeightTensor aCoverage = g.View(attenSoftmax1, attenPreProcessResult.inputsBatchFirst.Rows, 1); IWeightTensor state2 = g.View(state, batchSize, 1, state.Columns); IWeightTensor state3 = g.Expand(state2, batchSize, srcSeqLen, state.Columns); IWeightTensor state4 = g.View(state3, batchSize * srcSeqLen, -1); IWeightTensor concate = g.ConcatColumns(aCoverage, attenPreProcessResult.inputsBatchFirst, state4); m_coverage.Step(concate, graph); } return(contexts); } }
/// <summary> /// Encode source sentences and output encoded weights /// </summary> /// <param name="g"></param> /// <param name="inputSentences"></param> /// <param name="encoder"></param> /// <param name="reversEncoder"></param> /// <param name="Embedding"></param> /// <returns></returns> private IWeightMatrix Encode(IComputeGraph g, List <List <string> > inputSentences, Encoder encoder, Encoder reversEncoder, IWeightMatrix Embedding) { PadSentences(inputSentences); List <IWeightMatrix> forwardOutputs = new List <IWeightMatrix>(); List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>(); int seqLen = inputSentences[0].Count; List <IWeightMatrix> forwardInput = new List <IWeightMatrix>(); for (int i = 0; i < seqLen; i++) { for (int j = 0; j < inputSentences.Count; j++) { var inputSentence = inputSentences[j]; int ix_source = (int)SENTTAGS.UNK; if (m_srcWordToIndex.ContainsKey(inputSentence[i])) { ix_source = m_srcWordToIndex[inputSentence[i]]; } var x = g.PeekRow(Embedding, ix_source); forwardInput.Add(x); } } var forwardInputsM = g.ConcatRows(forwardInput); for (int i = 0; i < seqLen; i++) { var eOutput = encoder.Encode(g.PeekRow(forwardInputsM, i * inputSentences.Count, inputSentences.Count), g); forwardOutputs.Add(eOutput); var eOutput2 = reversEncoder.Encode(g.PeekRow(forwardInputsM, forwardInputsM.Rows - (i + 1) * inputSentences.Count, inputSentences.Count), g); backwardOutputs.Add(eOutput2); } backwardOutputs.Reverse(); List <IWeightMatrix> encoded = new List <IWeightMatrix>(); for (int i = 0; i < seqLen; i++) { encoded.Add(g.ConcatColumns(forwardOutputs[i], backwardOutputs[i])); } var encodedOutput = g.ConcatRows(encoded); return(encodedOutput); }
public IWeightTensor Encode(IWeightTensor rawInputs, int batchSize, IComputeGraph g, IWeightTensor srcSelfMask) { int seqLen = rawInputs.Rows / batchSize; rawInputs = g.TransposeBatch(rawInputs, seqLen); List <IWeightTensor> inputs = new List <IWeightTensor>(); for (int i = 0; i < seqLen; i++) { IWeightTensor emb_i = g.PeekRow(rawInputs, i * batchSize, batchSize); inputs.Add(emb_i); } List <IWeightTensor> forwardOutputs = new List <IWeightTensor>(); List <IWeightTensor> backwardOutputs = new List <IWeightTensor>(); List <IWeightTensor> layerOutputs = inputs.ToList(); for (int i = 0; i < m_depth; i++) { for (int j = 0; j < seqLen; j++) { IWeightTensor forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g); forwardOutputs.Add(forwardOutput); IWeightTensor backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g); backwardOutputs.Add(backwardOutput); } backwardOutputs.Reverse(); layerOutputs.Clear(); for (int j = 0; j < seqLen; j++) { IWeightTensor concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]); layerOutputs.Add(concatW); } } var result = g.ConcatRows(layerOutputs); return(g.TransposeBatch(result, batchSize)); }
public IWeightTensor Encode(IWeightTensor rawInputs, IComputeGraph g) { int seqLen = rawInputs.Rows / m_batchSize; List <IWeightTensor> inputs = new List <IWeightTensor>(); for (int i = 0; i < seqLen; i++) { var emb_i = g.PeekRow(rawInputs, i * m_batchSize, m_batchSize); inputs.Add(emb_i); } List <IWeightTensor> forwardOutputs = new List <IWeightTensor>(); List <IWeightTensor> backwardOutputs = new List <IWeightTensor>(); List <IWeightTensor> layerOutputs = inputs.ToList(); for (int i = 0; i < m_depth; i++) { for (int j = 0; j < seqLen; j++) { var forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g); forwardOutputs.Add(forwardOutput); var backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g); backwardOutputs.Add(backwardOutput); } backwardOutputs.Reverse(); layerOutputs.Clear(); for (int j = 0; j < seqLen; j++) { var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]); layerOutputs.Add(concatW); } } return(g.ConcatRows(layerOutputs)); }
/// <summary> /// Update LSTM-Attention cells according to given weights /// </summary> /// <param name="context">The context weights for attention</param> /// <param name="input">The input weights</param> /// <param name="computeGraph">The compute graph to build workflow</param> /// <returns>Update hidden weights</returns> public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph) { var cell_prev = ct; var hidden_prev = ht; var hxhc = computeGraph.ConcatColumns(input, hidden_prev, context); var bs = computeGraph.RepeatRows(b, input.Rows); var hhSum = computeGraph.MulAdd(hxhc, Wxhc, bs); var hhSum2 = layerNorm1.Process(hhSum, computeGraph); (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum2, hdim * 3, hdim); var gates = computeGraph.Sigmoid(gates_raw); var cell_write = computeGraph.Tanh(cell_write_raw); (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim); // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write); var ct2 = layerNorm2.Process(ct, computeGraph); ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct2)); return(ht); }