Пример #1
0
        public IWeightTensor Step(IWeightTensor input, IComputeGraph g)
        {
            using (IComputeGraph innerGraph = g.CreateSubGraph(m_name))
            {
                IWeightTensor hidden_prev = m_hidden;
                IWeightTensor cell_prev   = m_cell;

                IWeightTensor inputs = innerGraph.ConcatColumns(input, hidden_prev);
                IWeightTensor hhSum  = innerGraph.Affine(inputs, m_Wxh, m_b);
                IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, innerGraph);

                (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = innerGraph.SplitColumns(hhSum2, m_hdim * 3, m_hdim);
                IWeightTensor gates      = innerGraph.Sigmoid(gates_raw);
                IWeightTensor cell_write = innerGraph.Tanh(cell_write_raw);

                (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = innerGraph.SplitColumns(gates, m_hdim, m_hdim, m_hdim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                m_cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                IWeightTensor ct2 = m_layerNorm2.Norm(m_cell, innerGraph);

                // compute hidden state as gated, saturated cell activations
                m_hidden = g.EltMul(output_gate, innerGraph.Tanh(ct2));

                return(m_hidden);
            }
        }
Пример #2
0
        public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph)
        {
            var hidden_prev = ht;
            var cell_prev   = ct;

            var inputs = innerGraph.ConcatColumns(input, hidden_prev);
            var bs     = innerGraph.RepeatRows(b, input.Rows);
            var hhSum  = innerGraph.MulAdd(inputs, Wxh, bs);

            (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum, hdim * 3, hdim);

            var gates      = innerGraph.Sigmoid(gates_raw);
            var cell_write = innerGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation
            var retain_cell = innerGraph.EltMul(forget_gate, cell_prev); // what do we keep from cell
            var write_cell  = innerGraph.EltMul(input_gate, cell_write); // what do we write to cell

            ct = innerGraph.Add(retain_cell, write_cell);                // new cell contents

            // compute hidden state as gated, saturated cell activations
            ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct));

            return(ht);
        }
Пример #3
0
        public List <IWeightMatrix> Encode(List <IWeightMatrix> inputs, IComputeGraph g)
        {
            List <IWeightMatrix> forwardOutputs  = new List <IWeightMatrix>();
            List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>();

            List <IWeightMatrix> layerOutputs = inputs.ToList();
            int seqLen = inputs.Count;

            for (int i = 0; i < depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    var forwardOutput = forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    var backwardOutput = backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            return(layerOutputs);
        }
Пример #4
0
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightTensor Step(IWeightTensor context, IWeightTensor input, IComputeGraph g)
        {
            using (IComputeGraph computeGraph = g.CreateSubGraph(m_name))
            {
                IWeightTensor cell_prev   = Cell;
                IWeightTensor hidden_prev = Hidden;

                IWeightTensor hxhc   = computeGraph.ConcatColumns(input, hidden_prev, context);
                IWeightTensor hhSum  = computeGraph.Affine(hxhc, m_Wxhc, m_b);
                IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, computeGraph);

                (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = computeGraph.SplitColumns(hhSum2, m_hiddenDim * 3, m_hiddenDim);
                IWeightTensor gates      = computeGraph.Sigmoid(gates_raw);
                IWeightTensor cell_write = computeGraph.Tanh(cell_write_raw);

                (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = computeGraph.SplitColumns(gates, m_hiddenDim, m_hiddenDim, m_hiddenDim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                Cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                IWeightTensor ct2 = m_layerNorm2.Norm(Cell, computeGraph);

                Hidden = g.EltMul(output_gate, computeGraph.Tanh(ct2));


                return(Hidden);
            }
        }
Пример #5
0
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph)
        {
            var cell_prev   = ct;
            var hidden_prev = ht;

            var hxhc  = computeGraph.ConcatColumns(input, hidden_prev, context);
            var bs    = computeGraph.RepeatRows(b, input.Rows);
            var hhSum = computeGraph.MulAdd(hxhc, Wxhc, bs);

            (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum, hdim * 3, hdim);
            var gates      = computeGraph.Sigmoid(gates_raw);
            var cell_write = computeGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation
            //var retain_cell = computeGraph.EltMul(forget_gate, cell_prev);
            //var write_cell = computeGraph.EltMul(input_gate, cell_write);

            //ct = computeGraph.Add(retain_cell, write_cell);


            ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);

            ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct));

            return(ht);
        }
Пример #6
0
        public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph)
        {
            var hidden_prev = ht;
            var cell_prev   = ct;

            var inputs = innerGraph.ConcatColumns(input, hidden_prev);
            var bs     = innerGraph.RepeatRows(b, input.Rows);
            var hhSum  = innerGraph.MulAdd(inputs, Wxh, bs);
            var hhSum2 = layerNorm1.Process(hhSum, innerGraph);

            (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum2, hdim * 3, hdim);
            var gates      = innerGraph.Sigmoid(gates_raw);
            var cell_write = innerGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
            ct = innerGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
            var ct2 = layerNorm2.Process(ct, innerGraph);

            // compute hidden state as gated, saturated cell activations
            ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct2));

            return(ht);
        }
        public IWeightTensor Perform(IWeightTensor state, AttentionPreProcessResult attenPreProcessResult, int batchSize, IComputeGraph graph)
        {
            int srcSeqLen = attenPreProcessResult.inputsBatchFirst.Rows / batchSize;

            using (IComputeGraph g = graph.CreateSubGraph(m_name))
            {
                // Affine decoder state
                IWeightTensor wc = g.Affine(state, m_Wa, m_bWa);

                // Expand dims from [batchSize x decoder_dim] to [batchSize x srcSeqLen x decoder_dim]
                IWeightTensor wc1   = g.View(wc, batchSize, 1, wc.Columns);
                IWeightTensor wcExp = g.Expand(wc1, batchSize, srcSeqLen, wc.Columns);

                IWeightTensor ggs = null;
                if (m_enableCoverageModel)
                {
                    // Get coverage model status at {t-1}
                    IWeightTensor wCoverage  = g.Affine(m_coverage.Hidden, m_Wc, m_bWc);
                    IWeightTensor wCoverage1 = g.View(wCoverage, batchSize, srcSeqLen, -1);

                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp, wCoverage1);
                }
                else
                {
                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp);
                }

                IWeightTensor ggss  = g.View(ggs, batchSize * srcSeqLen, -1);
                IWeightTensor atten = g.Mul(ggss, m_V);

                IWeightTensor attenT  = g.Transpose(atten);
                IWeightTensor attenT2 = g.View(attenT, batchSize, srcSeqLen);

                IWeightTensor attenSoftmax1 = g.Softmax(attenT2, inPlace: true);

                IWeightTensor attenSoftmax = g.View(attenSoftmax1, batchSize, 1, srcSeqLen);
                IWeightTensor inputs2      = g.View(attenPreProcessResult.inputsBatchFirst, batchSize, srcSeqLen, attenPreProcessResult.inputsBatchFirst.Columns);

                IWeightTensor contexts = graph.MulBatch(attenSoftmax, inputs2, batchSize);

                if (m_enableCoverageModel)
                {
                    // Concatenate tensor as input for coverage model
                    IWeightTensor aCoverage = g.View(attenSoftmax1, attenPreProcessResult.inputsBatchFirst.Rows, 1);


                    IWeightTensor state2 = g.View(state, batchSize, 1, state.Columns);
                    IWeightTensor state3 = g.Expand(state2, batchSize, srcSeqLen, state.Columns);
                    IWeightTensor state4 = g.View(state3, batchSize * srcSeqLen, -1);


                    IWeightTensor concate = g.ConcatColumns(aCoverage, attenPreProcessResult.inputsBatchFirst, state4);
                    m_coverage.Step(concate, graph);
                }


                return(contexts);
            }
        }
Пример #8
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightMatrix Encode(IComputeGraph g, List <List <string> > inputSentences, Encoder encoder, Encoder reversEncoder, IWeightMatrix Embedding)
        {
            PadSentences(inputSentences);
            List <IWeightMatrix> forwardOutputs  = new List <IWeightMatrix>();
            List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>();

            int seqLen = inputSentences[0].Count;
            List <IWeightMatrix> forwardInput = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    var inputSentence = inputSentences[j];
                    int ix_source     = (int)SENTTAGS.UNK;
                    if (m_srcWordToIndex.ContainsKey(inputSentence[i]))
                    {
                        ix_source = m_srcWordToIndex[inputSentence[i]];
                    }
                    var x = g.PeekRow(Embedding, ix_source);
                    forwardInput.Add(x);
                }
            }

            var forwardInputsM = g.ConcatRows(forwardInput);

            for (int i = 0; i < seqLen; i++)
            {
                var eOutput = encoder.Encode(g.PeekRow(forwardInputsM, i * inputSentences.Count, inputSentences.Count), g);
                forwardOutputs.Add(eOutput);

                var eOutput2 = reversEncoder.Encode(g.PeekRow(forwardInputsM, forwardInputsM.Rows - (i + 1) * inputSentences.Count, inputSentences.Count), g);
                backwardOutputs.Add(eOutput2);
            }

            backwardOutputs.Reverse();

            List <IWeightMatrix> encoded = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                encoded.Add(g.ConcatColumns(forwardOutputs[i], backwardOutputs[i]));
            }

            var encodedOutput = g.ConcatRows(encoded);

            return(encodedOutput);
        }
Пример #9
0
        public IWeightTensor Encode(IWeightTensor rawInputs, int batchSize, IComputeGraph g, IWeightTensor srcSelfMask)
        {
            int seqLen = rawInputs.Rows / batchSize;

            rawInputs = g.TransposeBatch(rawInputs, seqLen);

            List <IWeightTensor> inputs = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                IWeightTensor emb_i = g.PeekRow(rawInputs, i * batchSize, batchSize);
                inputs.Add(emb_i);
            }

            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            List <IWeightTensor> layerOutputs = inputs.ToList();

            for (int i = 0; i < m_depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    IWeightTensor forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    IWeightTensor backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    IWeightTensor concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            var result = g.ConcatRows(layerOutputs);

            return(g.TransposeBatch(result, batchSize));
        }
Пример #10
0
        public IWeightTensor Encode(IWeightTensor rawInputs, IComputeGraph g)
        {
            int seqLen = rawInputs.Rows / m_batchSize;

            List <IWeightTensor> inputs = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                var emb_i = g.PeekRow(rawInputs, i * m_batchSize, m_batchSize);
                inputs.Add(emb_i);
            }

            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            List <IWeightTensor> layerOutputs = inputs.ToList();

            for (int i = 0; i < m_depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    var forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    var backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            return(g.ConcatRows(layerOutputs));
        }
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph)
        {
            var cell_prev   = ct;
            var hidden_prev = ht;

            var hxhc   = computeGraph.ConcatColumns(input, hidden_prev, context);
            var bs     = computeGraph.RepeatRows(b, input.Rows);
            var hhSum  = computeGraph.MulAdd(hxhc, Wxhc, bs);
            var hhSum2 = layerNorm1.Process(hhSum, computeGraph);

            (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum2, hdim * 3, hdim);
            var gates      = computeGraph.Sigmoid(gates_raw);
            var cell_write = computeGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
            ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
            var ct2 = layerNorm2.Process(ct, computeGraph);

            ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct2));

            return(ht);
        }