Пример #1
0
        public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph)
        {
            var hidden_prev = ht;
            var cell_prev   = ct;

            var inputs = innerGraph.ConcatColumns(input, hidden_prev);
            var bs     = innerGraph.RepeatRows(b, input.Rows);
            var hhSum  = innerGraph.MulAdd(inputs, Wxh, bs);
            var hhSum2 = layerNorm1.Process(hhSum, innerGraph);

            (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum2, hdim * 3, hdim);
            var gates      = innerGraph.Sigmoid(gates_raw);
            var cell_write = innerGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
            ct = innerGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
            var ct2 = layerNorm2.Process(ct, innerGraph);

            // compute hidden state as gated, saturated cell activations
            ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct2));

            return(ht);
        }
Пример #2
0
        /// <summary>
        /// Transformer encoder
        /// </summary>
        /// <param name="rawInputs"></param>
        /// <param name="g"></param>
        /// <returns></returns>
        public IWeightTensor Encode(IWeightTensor inputs, int batchSize, IComputeGraph g, IWeightTensor srcSelfMask)
        {
            using (IComputeGraph subg = g.CreateSubGraph($"{m_name}_Encoder"))
            {
                IWeightTensor maskTensor = null;
                if (srcSelfMask != null)
                {
                    int seqLen = inputs.Rows / batchSize;
                    using var keyMaskView = subg.View(srcSelfMask, dims: new long[] { batchSize, 1, seqLen, seqLen });
                    maskTensor            = subg.Expand(keyMaskView, dims: new long[] { batchSize, m_multiHeadNum, seqLen, seqLen });
                }

                IWeightTensor attnProbs = null;
                for (int k = 0; k < m_encoders.Count; k++)
                {
                    (inputs, attnProbs) = m_encoders[k].Perform(inputs, maskTensor, batchSize, subg, outputAttenWeights: false);
                    inputs = m_posFFNs[k].Perform(inputs, batchSize, subg);
                }

                inputs = layerNorm.Norm(inputs, subg);

                inputs.UnbindFromComputeGraph();
                if (attnProbs != null)
                {
                    attnProbs.UnbindFromComputeGraph();
                }

                if (maskTensor != null)
                {
                    maskTensor.Dispose();
                }
            }

            return(inputs);
        }
        public WeightMatrix Perform(WeightMatrix input, WeightMatrix state, IComputeGraph g)
        {
            WeightMatrix        context;
            List <WeightMatrix> atten = new List <WeightMatrix>();

            var stateRepeat = g.RepeatRows(state, input.Rows);
            var baiseInput  = new WeightMatrix(input.Rows, 1, 1);
            var inputb      = g.concatColumns(input, baiseInput);


            var uh = g.mul(inputb, Ua);


            baiseInput  = new WeightMatrix(stateRepeat.Rows, 1, 1);
            stateRepeat = g.concatColumns(stateRepeat, baiseInput);


            var wc = g.mul(stateRepeat, Wa);
            var gg = g.addtanh(uh, wc);
            var aa = g.mul(gg, V);


            var res = g.Softmax(aa);


            var weighted = g.weightRows(input, res);;

            context = g.sumColumns(weighted);

            return(context);
        }
Пример #4
0
        public static IWeightTensor BuildPadSelfMask(IComputeGraph g, int paddedLength, List <int> originalLengths, int deviceId)
        {
            var buf = new float[originalLengths.Count * paddedLength * paddedLength];

            for (var i = 0; i < buf.Length; i++)
            {
                buf[i] = -1e30f;
            }

            for (var k = 0; k < originalLengths.Count; k++)
            {
                for (var i = 0; i < originalLengths[k]; i++)
                {
                    for (var j = 0; j < originalLengths[k]; j++)
                    {
                        // ReSharper disable once ArrangeRedundantParentheses
                        buf[k * (paddedLength * paddedLength) + i * paddedLength + j] = 0.0f;
                    }
                }
            }

            var tensor = new WeightTensor(new long[] { originalLengths.Count, paddedLength, paddedLength }, 0.0f, deviceId, $"TriMask_{deviceId}", false);

            tensor.SetWeightArray(buf);

            return(tensor);
        }
Пример #5
0
        public static IWeightTensor BuildSrcTgtMask(IComputeGraph g, int srcPaddedLength, int tgtPaddedLength, List <int> tgtOriginalLengths, List <int> srcOriginalLengths, int deviceId)
        {
            float[] buf = new float[tgtOriginalLengths.Count * tgtPaddedLength * srcPaddedLength];
            Array.Fill(buf, -99999999.0f);


            for (int k = 0; k < tgtOriginalLengths.Count; k++) // batch size
            {
                int offset_k = k * (tgtPaddedLength * srcPaddedLength);
                for (int i = 0; i < tgtOriginalLengths[k]; i++)
                {
                    int offset_k_i = offset_k + i * srcPaddedLength;
                    for (int j = 0; j < srcOriginalLengths[k]; j++)
                    {
                        buf[offset_k_i + j] = 0.0f;
                    }
                }
            }

            WeightTensor tensor = new WeightTensor(new long[] { tgtOriginalLengths.Count, tgtPaddedLength, srcPaddedLength }, deviceId, $"SrcTgtMask_{deviceId}", isTrainable: false);

            tensor.SetWeightArray(buf);

            return(tensor);
        }
Пример #6
0
        public IWeightMatrix Process(IWeightMatrix input, IComputeGraph innerGraph)
        {
            var alphas = innerGraph.RepeatRows(alpha, input.Rows);
            var betas  = innerGraph.RepeatRows(beta, input.Rows);

            return(innerGraph.LayerNorm(input, alphas, betas));
        }
Пример #7
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="srcSnts"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > srcSnts, IEncoder encoder, IWeightTensor Embedding, IWeightTensor srcSelfMask, IWeightTensor posEmbedding, List <int> originalSrcLengths)
        {
            var seqLen    = srcSnts[0].Count;
            var batchSize = srcSnts.Count;

            var inputs = new List <IWeightTensor>();

            // Generate batch-first based input embeddings
            for (var j = 0; j < batchSize; j++)
            {
                var originalLength = originalSrcLengths[j];
                for (var i = 0; i < seqLen; i++)
                {
                    var ix_source = this.m_modelMetaData.Vocab.GetSourceWordIndex(srcSnts[j][i], true);

                    var emb = g.PeekRow(Embedding, ix_source, runGradients: i < originalLength ? true : false);

                    inputs.Add(emb);
                }
            }

            var inputEmbs = g.ConcatRows(inputs);

            if (this.m_modelMetaData.EncoderType == EncoderTypeEnums.Transformer)
            {
                inputEmbs = this.AddPositionEmbedding(g, posEmbedding, batchSize, seqLen, inputEmbs);
            }


            return(encoder.Encode(inputEmbs, batchSize, g, srcSelfMask));
        }
Пример #8
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > inputSentences, IEncoder encoder, IWeightTensor Embedding)
        {
            PadSentences(inputSentences);
            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            int seqLen = inputSentences[0].Count;
            List <IWeightTensor> forwardInput = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    var inputSentence = inputSentences[j];
                    int ix_source     = (int)SENTTAGS.UNK;
                    if (m_srcWordToIndex.ContainsKey(inputSentence[i]))
                    {
                        ix_source = m_srcWordToIndex[inputSentence[i]];
                    }
                    else
                    {
                        Logger.WriteLine($"'{inputSentence[i]}' is an unknown word.");
                    }
                    var x = g.PeekRow(Embedding, ix_source);
                    forwardInput.Add(x);
                }
            }

            var forwardInputsM = g.ConcatRows(forwardInput);

            return(encoder.Encode(forwardInputsM, g));
        }
Пример #9
0
        public static IWeightTensor BuildPadSelfTriMask(IComputeGraph g, int paddedLength, List <int> originalLengths, int deviceId)
        {
            float[] buf = new float[originalLengths.Count * paddedLength * paddedLength];
            for (int i = 0; i < buf.Length; i++)
            {
                buf[i] = -1e9f;
            }

            for (int k = 0; k < originalLengths.Count; k++)
            {
                for (int i = 0; i < originalLengths[k]; i++)
                {
                    for (int j = 0; j < originalLengths[k]; j++)
                    {
                        if (i >= j)
                        {
                            buf[k * (paddedLength * paddedLength) + i * paddedLength + j] = 0.0f;
                        }
                        else
                        {
                            break;
                        }
                    }
                }
            }

            WeightTensor tensor = new WeightTensor(new long[] { originalLengths.Count, paddedLength, paddedLength }, 0.0f, deviceId, $"TriMask_{deviceId}", isTrainable: false);

            tensor.SetWeightArray(buf);

            return(tensor);
        }
Пример #10
0
        public IWeightTensor Step(IWeightTensor input, IComputeGraph g)
        {
            using (var innerGraph = g.CreateSubGraph(this.m_name))
            {
                var hidden_prev = this.m_hidden;
                var cell_prev   = this.m_cell;

                var inputs = innerGraph.ConcatColumns(input, hidden_prev);
                var hhSum  = innerGraph.Affine(inputs, this.m_Wxh, this.m_b);
                var hhSum2 = this.m_layerNorm1.Norm(hhSum, innerGraph);

                var(gates_raw, cell_write_raw) = innerGraph.SplitColumns(hhSum2, this.m_hdim * 3, this.m_hdim);
                var gates      = innerGraph.Sigmoid(gates_raw);
                var cell_write = innerGraph.Tanh(cell_write_raw);

                var(input_gate, forget_gate, output_gate) = innerGraph.SplitColumns(gates, this.m_hdim, this.m_hdim, this.m_hdim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                this.m_cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                var ct2 = this.m_layerNorm2.Norm(this.m_cell, innerGraph);

                // compute hidden state as gated, saturated cell activations
                this.m_hidden = g.EltMul(output_gate, innerGraph.Tanh(ct2));

                return(this.m_hidden);
            }
        }
Пример #11
0
        public IWeightMatrix Step(IWeightMatrix input, IComputeGraph innerGraph)
        {
            var hidden_prev = ht;
            var cell_prev   = ct;

            var inputs = innerGraph.ConcatColumns(input, hidden_prev);
            var bs     = innerGraph.RepeatRows(b, input.Rows);
            var hhSum  = innerGraph.MulAdd(inputs, Wxh, bs);

            (var gates_raw, var cell_write_raw) = innerGraph.SplitColumns(hhSum, hdim * 3, hdim);

            var gates      = innerGraph.Sigmoid(gates_raw);
            var cell_write = innerGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = innerGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation
            var retain_cell = innerGraph.EltMul(forget_gate, cell_prev); // what do we keep from cell
            var write_cell  = innerGraph.EltMul(input_gate, cell_write); // what do we write to cell

            ct = innerGraph.Add(retain_cell, write_cell);                // new cell contents

            // compute hidden state as gated, saturated cell activations
            ht = innerGraph.EltMul(output_gate, innerGraph.Tanh(ct));

            return(ht);
        }
Пример #12
0
        public IWeightTensor Process(IWeightTensor inputT, int batchSize, IComputeGraph graph)
        {
            var g   = graph.CreateSubGraph(m_name);
            var res = g.Affine(inputT, m_Whd, m_Bd);

            return(g.Dropout(res, batchSize, m_dropoutRatio, inPlace: true));
        }
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightTensor Step(IWeightTensor context, IWeightTensor input, IComputeGraph g)
        {
            var computeGraph = g.CreateSubGraph(m_name);

            var cell_prev   = Cell;
            var hidden_prev = Hidden;

            var hxhc   = computeGraph.ConcatColumns(input, hidden_prev, context);
            var hhSum  = computeGraph.Affine(hxhc, m_Wxhc, m_b);
            var hhSum2 = layerNorm1.Process(hhSum, computeGraph);

            (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum2, m_hdim * 3, m_hdim);
            var gates      = computeGraph.Sigmoid(gates_raw);
            var cell_write = computeGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, m_hdim, m_hdim, m_hdim);

            // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
            Cell = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
            var ct2 = layerNorm2.Process(Cell, computeGraph);

            Hidden = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct2));

            return(Hidden);
        }
Пример #14
0
        private void RunValidParallel(Func<IComputeGraph, List<List<string>>, List<List<string>>, int, bool, float> RunNetwork, List<IMetric> metrics, bool outputToFile, List<string> srcSents, List<string> refSents, List<string> hypSents, List<SntPairBatch> sntPairBatchs)
        {
            // Run forward on all available processors
            Parallel.For(0, m_deviceIds.Length, i =>
            {
                SntPairBatch sntPairBatch = sntPairBatchs[i];

                // Construct sentences for encoding and decoding
                List<List<string>> srcTkns = new List<List<string>>();
                List<List<string>> refTkns = new List<List<string>>();
                List<List<string>> hypTkns = new List<List<string>>();
                for (int j = 0; j < sntPairBatch.BatchSize; j++)
                {
                    srcTkns.Add(sntPairBatch.SntPairs[j].SrcSnt.ToList());
                    refTkns.Add(sntPairBatch.SntPairs[j].TgtSnt.ToList());
                    hypTkns.Add(new List<string>() { ParallelCorpus.BOS });
                }

                // Create a new computing graph instance
                using (IComputeGraph computeGraph = CreateComputGraph(i, needBack: false))
                {
                    // Run forward part
                    RunNetwork(computeGraph, srcTkns, hypTkns, i, false);
                }

                lock (locker)
                {

                    for (int j = 0; j < hypTkns.Count; j++)
                    {
                        foreach (IMetric metric in metrics)
                        {
                            if (j < 0 || j >= refTkns.Count)
                            {
                                throw new InvalidDataException($"Ref token only has '{refTkns.Count}' batch, however, it try to access batch '{j}'. Hyp token has '{hypTkns.Count}' tokens, Batch Size = '{sntPairBatch.BatchSize}'");
                            }

                            if (j < 0 || j >= hypTkns.Count)
                            {
                                throw new InvalidDataException($"Hyp token only has '{hypTkns.Count}' batch, however, it try to access batch '{j}'. Ref token has '{refTkns.Count}' tokens, Batch Size = '{sntPairBatch.BatchSize}'");
                            }

                            metric.Evaluate(new List<List<string>>() { refTkns[j] }, hypTkns[j]);
                        }
                    }

                    if (outputToFile)
                    {
                        for (int j = 0; j < srcTkns.Count; j++)
                        {
                            srcSents.Add(string.Join(" ", srcTkns[j]));
                            refSents.Add(string.Join(" ", refTkns[j]));
                            hypSents.Add(string.Join(" ", hypTkns[j]));
                        }
                    }
                }


            });
        }
Пример #15
0
        public List <IWeightMatrix> Encode(List <IWeightMatrix> inputs, IComputeGraph g)
        {
            List <IWeightMatrix> forwardOutputs  = new List <IWeightMatrix>();
            List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>();

            List <IWeightMatrix> layerOutputs = inputs.ToList();
            int seqLen = inputs.Count;

            for (int i = 0; i < depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    var forwardOutput = forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    var backwardOutput = backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            return(layerOutputs);
        }
Пример #16
0
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightMatrix Step(IWeightMatrix context, IWeightMatrix input, IComputeGraph computeGraph)
        {
            var cell_prev   = ct;
            var hidden_prev = ht;

            var hxhc  = computeGraph.ConcatColumns(input, hidden_prev, context);
            var bs    = computeGraph.RepeatRows(b, input.Rows);
            var hhSum = computeGraph.MulAdd(hxhc, Wxhc, bs);

            (var gates_raw, var cell_write_raw) = computeGraph.SplitColumns(hhSum, hdim * 3, hdim);
            var gates      = computeGraph.Sigmoid(gates_raw);
            var cell_write = computeGraph.Tanh(cell_write_raw);

            (var input_gate, var forget_gate, var output_gate) = computeGraph.SplitColumns(gates, hdim, hdim, hdim);

            // compute new cell activation
            //var retain_cell = computeGraph.EltMul(forget_gate, cell_prev);
            //var write_cell = computeGraph.EltMul(input_gate, cell_write);

            //ct = computeGraph.Add(retain_cell, write_cell);


            ct = computeGraph.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);

            ht = computeGraph.EltMul(output_gate, computeGraph.Tanh(ct));

            return(ht);
        }
Пример #17
0
        public IWeightTensor Step(IWeightTensor input, IComputeGraph g)
        {
            using (IComputeGraph innerGraph = g.CreateSubGraph(m_name))
            {
                IWeightTensor hidden_prev = m_hidden;
                IWeightTensor cell_prev   = m_cell;

                IWeightTensor inputs = innerGraph.Concate(1, input, hidden_prev);
                IWeightTensor hhSum  = innerGraph.Affine(inputs, m_Wxh, m_b);
                IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, innerGraph);

                (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = innerGraph.SplitColumns(hhSum2, m_hdim * 3, m_hdim);
                IWeightTensor gates      = innerGraph.Sigmoid(gates_raw);
                IWeightTensor cell_write = innerGraph.Tanh(cell_write_raw);

                (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = innerGraph.SplitColumns(gates, m_hdim, m_hdim, m_hdim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                m_cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                IWeightTensor ct2 = m_layerNorm2.Norm(m_cell, innerGraph);

                // compute hidden state as gated, saturated cell activations
                m_hidden = g.EltMul(output_gate, innerGraph.Tanh(ct2));

                return(m_hidden);
            }
        }
        /// <summary>
        /// Transformer encoder
        /// </summary>
        /// <param name="rawInputs"></param>
        /// <param name="g"></param>
        /// <returns></returns>
        public IWeightTensor Encode(IWeightTensor rawInput, int batchSize, IComputeGraph g)
        {
            int           seqLen             = rawInput.Rows / batchSize;
            IWeightTensor posEmbedding       = g.BuildPositionMatrix(seqLen, m_inputDim);
            IWeightTensor posEmbeddingRepeat = g.RepeatRows(posEmbedding, batchSize, runGradient: false);

            // Transpose to batch-first based sequence
            IWeightTensor inputs = g.TransposeBatch(rawInput, batchSize);

            inputs = g.AddMul(posEmbeddingRepeat, inputs, (float)Math.Sqrt(m_inputDim), runGradientW1: false, runGradientW2: true);

            // We don't update position embedding, so dispose it now to save memory.
            posEmbeddingRepeat.Dispose();
            posEmbedding.Dispose();

            inputs = g.Dropout(inputs, batchSize, m_dropoutRatio, inPlace: true);

            for (int k = 0; k < m_encoders.Count; k++)
            {
                inputs = m_encoders[k].Perform(inputs, batchSize, g);
            }

            // Transpose back to time-first based sequence
            rawInput = g.TransposeBatch(inputs, seqLen);

            return(rawInput);
        }
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightTensor Step(IWeightTensor context, IWeightTensor input, IComputeGraph g)
        {
            using (var computeGraph = g.CreateSubGraph(this.m_name))
            {
                var cell_prev   = this.Cell;
                var hidden_prev = this.Hidden;

                var hxhc   = computeGraph.ConcatColumns(input, hidden_prev, context);
                var hhSum  = computeGraph.Affine(hxhc, this.m_Wxhc, this.m_b);
                var hhSum2 = this.m_layerNorm1.Norm(hhSum, computeGraph);

                var(gates_raw, cell_write_raw) = computeGraph.SplitColumns(hhSum2, this.m_hiddenDim * 3, this.m_hiddenDim);
                var gates      = computeGraph.Sigmoid(gates_raw);
                var cell_write = computeGraph.Tanh(cell_write_raw);

                var(input_gate, forget_gate, output_gate) = computeGraph.SplitColumns(gates, this.m_hiddenDim, this.m_hiddenDim, this.m_hiddenDim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                this.Cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                var ct2 = this.m_layerNorm2.Norm(this.Cell, computeGraph);

                this.Hidden = g.EltMul(output_gate, computeGraph.Tanh(ct2));


                return(this.Hidden);
            }
        }
Пример #20
0
        public void VisualizeNeuralNetwork(string visNNFilePath)
        {
            (IEncoder encoder, IDecoder decoder, IWeightTensor srcEmbedding, IWeightTensor tgtEmbedding) = GetNetworksOnDeviceAt(-1);
            // Build input sentence
            List <List <string> > inputSeqs = ParallelCorpus.ConstructInputTokens(null);
            int              batchSize      = inputSeqs.Count;
            IComputeGraph    g          = CreateComputGraph(m_defaultDeviceId, needBack: false, visNetwork: true);
            AttentionDecoder rnnDecoder = decoder as AttentionDecoder;

            encoder.Reset(g.GetWeightFactory(), batchSize);
            rnnDecoder.Reset(g.GetWeightFactory(), batchSize);

            // Run encoder
            IWeightTensor encodedWeightMatrix = Encode(g, inputSeqs, encoder, srcEmbedding, null, null);

            // Prepare for attention over encoder-decoder
            AttentionPreProcessResult attPreProcessResult = rnnDecoder.PreProcess(encodedWeightMatrix, batchSize, g);

            // Run decoder
            IWeightTensor x       = g.PeekRow(tgtEmbedding, (int)SENTTAGS.START);
            IWeightTensor eOutput = rnnDecoder.Decode(x, attPreProcessResult, batchSize, g);
            IWeightTensor probs   = g.Softmax(eOutput);

            g.VisualizeNeuralNetToFile(visNNFilePath);
        }
Пример #21
0
        /// <summary>
        /// Update LSTM-Attention cells according to given weights
        /// </summary>
        /// <param name="context">The context weights for attention</param>
        /// <param name="input">The input weights</param>
        /// <param name="computeGraph">The compute graph to build workflow</param>
        /// <returns>Update hidden weights</returns>
        public IWeightTensor Step(IWeightTensor context, IWeightTensor input, IComputeGraph g)
        {
            using (IComputeGraph computeGraph = g.CreateSubGraph(m_name))
            {
                IWeightTensor cell_prev   = Cell;
                IWeightTensor hidden_prev = Hidden;

                IWeightTensor hxhc   = computeGraph.Concate(1, input, hidden_prev, context);
                IWeightTensor hhSum  = computeGraph.Affine(hxhc, m_Wxhc, m_b);
                IWeightTensor hhSum2 = m_layerNorm1.Norm(hhSum, computeGraph);

                (IWeightTensor gates_raw, IWeightTensor cell_write_raw) = computeGraph.SplitColumns(hhSum2, m_hiddenDim * 3, m_hiddenDim);
                IWeightTensor gates      = computeGraph.Sigmoid(gates_raw);
                IWeightTensor cell_write = computeGraph.Tanh(cell_write_raw);

                (IWeightTensor input_gate, IWeightTensor forget_gate, IWeightTensor output_gate) = computeGraph.SplitColumns(gates, m_hiddenDim, m_hiddenDim, m_hiddenDim);

                // compute new cell activation: ct = forget_gate * cell_prev + input_gate * cell_write
                Cell = g.EltMulMulAdd(forget_gate, cell_prev, input_gate, cell_write);
                IWeightTensor ct2 = m_layerNorm2.Norm(Cell, computeGraph);

                Hidden = g.EltMul(output_gate, computeGraph.Tanh(ct2));


                return(Hidden);
            }
        }
        public IWeightTensor Perform(IWeightTensor state, AttentionPreProcessResult attenPreProcessResult, int batchSize, IComputeGraph graph)
        {
            int srcSeqLen = attenPreProcessResult.inputsBatchFirst.Rows / batchSize;

            using (IComputeGraph g = graph.CreateSubGraph(m_name))
            {
                // Affine decoder state
                IWeightTensor wc = g.Affine(state, m_Wa, m_bWa);

                // Expand dims from [batchSize x decoder_dim] to [batchSize x srcSeqLen x decoder_dim]
                IWeightTensor wc1   = g.View(wc, batchSize, 1, wc.Columns);
                IWeightTensor wcExp = g.Expand(wc1, batchSize, srcSeqLen, wc.Columns);

                IWeightTensor ggs = null;
                if (m_enableCoverageModel)
                {
                    // Get coverage model status at {t-1}
                    IWeightTensor wCoverage  = g.Affine(m_coverage.Hidden, m_Wc, m_bWc);
                    IWeightTensor wCoverage1 = g.View(wCoverage, batchSize, srcSeqLen, -1);

                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp, wCoverage1);
                }
                else
                {
                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp);
                }

                IWeightTensor ggss  = g.View(ggs, batchSize * srcSeqLen, -1);
                IWeightTensor atten = g.Mul(ggss, m_V);

                IWeightTensor attenT  = g.Transpose(atten);
                IWeightTensor attenT2 = g.View(attenT, batchSize, srcSeqLen);

                IWeightTensor attenSoftmax1 = g.Softmax(attenT2, inPlace: true);

                IWeightTensor attenSoftmax = g.View(attenSoftmax1, batchSize, 1, srcSeqLen);
                IWeightTensor inputs2      = g.View(attenPreProcessResult.inputsBatchFirst, batchSize, srcSeqLen, attenPreProcessResult.inputsBatchFirst.Columns);

                IWeightTensor contexts = graph.MulBatch(attenSoftmax, inputs2, batchSize);

                if (m_enableCoverageModel)
                {
                    // Concatenate tensor as input for coverage model
                    IWeightTensor aCoverage = g.View(attenSoftmax1, attenPreProcessResult.inputsBatchFirst.Rows, 1);


                    IWeightTensor state2 = g.View(state, batchSize, 1, state.Columns);
                    IWeightTensor state3 = g.Expand(state2, batchSize, srcSeqLen, state.Columns);
                    IWeightTensor state4 = g.View(state3, batchSize * srcSeqLen, -1);


                    IWeightTensor concate = g.ConcatColumns(aCoverage, attenPreProcessResult.inputsBatchFirst, state4);
                    m_coverage.Step(concate, graph);
                }


                return(contexts);
            }
        }
Пример #23
0
        /// <summary>
        /// Run forward part on given single device
        /// </summary>
        /// <param name="g">The computing graph for current device. It gets created and passed by the framework</param>
        /// <param name="srcSnts">A batch of input tokenized sentences in source side</param>
        /// <param name="tgtSnts">A batch of output tokenized sentences in target side. In training mode, it inputs target tokens, otherwise, it outputs target tokens generated by decoder</param>
        /// <param name="deviceIdIdx">The index of current device</param>
        /// <returns>The cost of forward part</returns>
        public override List <NetworkResult> RunForwardOnSingleDevice(IComputeGraph g, ISntPairBatch sntPairBatch, int deviceIdIdx, bool isTraining, DecodingOptions decodingOptions)
        {
            List <NetworkResult> nrs = new List <NetworkResult>();

            var srcSnts = sntPairBatch.GetSrcTokens(0);
            var tgtSnts = sntPairBatch.GetTgtTokens(0);

            (IEncoder encoder, IWeightTensor srcEmbedding, IWeightTensor posEmbedding, FeedForwardLayer decoderFFLayer) = GetNetworksOnDeviceAt(deviceIdIdx);

            // Reset networks
            encoder.Reset(g.GetWeightFactory(), srcSnts.Count);

            var originalSrcLengths = BuildInTokens.PadSentences(srcSnts);
            var srcTokensList      = m_modelMetaData.SrcVocab.GetWordIndex(srcSnts);

            BuildInTokens.PadSentences(tgtSnts);
            var tgtTokensLists = m_modelMetaData.ClsVocab.GetWordIndex(tgtSnts);

            int seqLen    = srcSnts[0].Count;
            int batchSize = srcSnts.Count;

            // Encoding input source sentences
            IWeightTensor encOutput = Encoder.Run(g, sntPairBatch, encoder, m_modelMetaData, m_shuffleType, srcEmbedding, posEmbedding, null, srcTokensList, originalSrcLengths);
            IWeightTensor ffLayer   = decoderFFLayer.Process(encOutput, batchSize, g);

            float         cost  = 0.0f;
            IWeightTensor probs = g.Softmax(ffLayer, inPlace: true);

            if (isTraining)
            {
                var tgtTokensTensor = g.CreateTokensTensor(tgtTokensLists);
                cost = g.CrossEntropyLoss(probs, tgtTokensTensor);
            }
            else
            {
                // Output "i"th target word
                using var targetIdxTensor = g.Argmax(probs, 1);
                float[]       targetIdx   = targetIdxTensor.ToWeightArray();
                List <string> targetWords = m_modelMetaData.ClsVocab.ConvertIdsToString(targetIdx.ToList());

                for (int k = 0; k < batchSize; k++)
                {
                    tgtSnts[k] = targetWords.GetRange(k * seqLen, seqLen);
                }
            }

            NetworkResult nr = new NetworkResult
            {
                Cost   = cost,
                Output = new List <List <List <string> > >()
            };

            nr.Output.Add(tgtSnts);

            nrs.Add(nr);

            return(nrs);
        }
        public IWeightTensor Process(IWeightTensor input, IComputeGraph g)
        {
            var innerGraph = g.CreateSubGraph(m_name);

            //var alphas = innerGraph.RepeatRows(m_alpha, input.Rows);
            //var betas = innerGraph.RepeatRows(m_beta, input.Rows);

            return(innerGraph.LayerNorm(input, m_alpha, m_beta));
        }
Пример #25
0
 public WeightMatrix Encode(WeightMatrix V, IComputeGraph g)
 {
     foreach (var encoder in encoders)
     {
         var e = encoder.Step(V, g);
         V = e;
     }
     return(V);
 }
Пример #26
0
        static public IWeightTensor Run(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, IEncoder encoder, IModel modelMetaData, ShuffleEnums shuffleType,
                                        IWeightTensor srcEmbedding, IWeightTensor posEmbedding, IWeightTensor segmentEmbedding, List <List <int> > srcSntsIds, float[] originalSrcLengths)
        {
            // Reset networks
            encoder.Reset(computeGraph.GetWeightFactory(), srcSntsIds.Count);

            IWeightTensor encOutput = InnerRunner(computeGraph, srcSntsIds, originalSrcLengths, shuffleType, encoder, modelMetaData, srcEmbedding, posEmbedding, segmentEmbedding);

            return(encOutput);
        }
Пример #27
0
        public IWeightTensor Encode(IWeightTensor V, IComputeGraph g)
        {
            foreach (var encoder in encoders)
            {
                var e = encoder.Step(V, g);
                V = e;
            }

            return(V);
        }
Пример #28
0
        public AttentionPreProcessResult PreProcess(IWeightTensor inputs, int batchSize, IComputeGraph graph)
        {
            IComputeGraph             g = graph.CreateSubGraph(m_name + "_PreProcess");
            AttentionPreProcessResult r = new AttentionPreProcessResult();

            r.uhs    = g.Affine(inputs, m_Ua, m_bUa);
            r.inputs = g.TransposeBatch(inputs, batchSize);

            return(r);
        }
Пример #29
0
        /// <summary>
        /// Transformer encoder
        /// </summary>
        /// <param name="rawInputs"></param>
        /// <param name="g"></param>
        /// <returns></returns>
        ///

        public (IWeightTensor, IWeightTensor) Decode(IWeightTensor tgtInputs, IWeightTensor encOutputBatchFirst, IWeightTensor tgtSelfMask, IWeightTensor srcTgtMask, int batchSize, IComputeGraph g, bool outputAttnWeights = false, Dictionary <string, IWeightTensor> cachedTensors = null)
        {
            IWeightTensor attnProbs = null;

            using (IComputeGraph subg = g.CreateSubGraph($"{m_name}_Decoder"))
            {
                int seqLenQ = tgtInputs.Rows / batchSize;

                // SeqLenK must be euqal to SeqLenV
                int seqLenK = encOutputBatchFirst.Rows / batchSize;

                IWeightTensor selfMaskTensor = null;
                if (tgtSelfMask != null)
                {
                    selfMaskTensor = subg.Expand(tgtSelfMask, dims: new long[] { batchSize, m_multiHeadNum, seqLenQ, seqLenQ });
                }

                IWeightTensor crossMaskTensor = null;
                if (srcTgtMask != null)
                {
                    crossMaskTensor = subg.Expand(srcTgtMask, dims: new long[] { batchSize, m_multiHeadNum, seqLenQ, seqLenK });
                }

                for (int k = 0; k < m_selfAttns.Count; k++)
                {
                    (tgtInputs, attnProbs) = m_selfAttns[k].Perform(tgtInputs, selfMaskTensor, batchSize, subg, outputAttenWeights: false);
                    (tgtInputs, attnProbs) = m_encAttns[k].Perform(tgtInputs, encOutputBatchFirst, encOutputBatchFirst, crossMaskTensor, batchSize, subg, outputAttenWeights: (outputAttnWeights && k == m_selfAttns.Count - 1), cachedTensors: cachedTensors);
                    tgtInputs = m_posFFNs[k].Perform(tgtInputs, batchSize, subg);
                }

                tgtInputs = layerNorm.Norm(tgtInputs, subg);

                tgtInputs.UnbindFromComputeGraph();
                if (attnProbs != null)
                {
                    attnProbs.UnbindFromComputeGraph();
                }

                if (selfMaskTensor != null)
                {
                    selfMaskTensor.Dispose();
                }

                if (crossMaskTensor != null)
                {
                    crossMaskTensor.Dispose();
                }
            }


            //     tgtInputs = m_decoderFFLayer.Process(tgtInputs, batchSize, g);

            return(tgtInputs, attnProbs);
        }
Пример #30
0
        /// <summary>
        /// Scaled multi-heads attention component with skip connectioned feed forward layers
        /// </summary>
        /// <param name="inputQ">The input Q tensor</param>
        /// <param name="keyMask">The mask for softmax</param>
        /// <param name="batchSize">Batch size of input data set</param>
        /// <param name="graph">The instance of computing graph</param>
        /// <returns>Transformered output tensor</returns>
        public (IWeightTensor, IWeightTensor) Perform(IWeightTensor inputQ, IWeightTensor keyMask, int batchSize, IComputeGraph graph, bool outputAttenWeights = false)
        {
            using IComputeGraph g = graph.CreateSubGraph($"{m_name}_MultiHeadAttention");
            int seqLenQ = inputQ.Rows / batchSize;

            IWeightTensor inputQNorm = layerNormQ.Norm(inputQ, g);

            //Input projections
            var weightedQKV = g.View(g.Affine(inputQNorm, QKV, QKVb), dims: new long[] { batchSize, seqLenQ, 3, m_multiHeadNum, m_d });
            var allQ        = g.Select(weightedQKV, 2, 0);
            var allK        = g.Select(weightedQKV, 2, 1);
            var allV        = g.Select(weightedQKV, 2, 2);


            //Multi-head attentions
            IWeightTensor Qs = g.View(g.AsContiguous(g.Transpose(allQ, 1, 2)), dims: new long[] { batchSize *m_multiHeadNum, seqLenQ, m_d });
            IWeightTensor Ks = g.View(g.AsContiguous(g.Transpose(g.Transpose(allK, 1, 2), 2, 3)), dims: new long[] { batchSize *m_multiHeadNum, m_d, seqLenQ });
            IWeightTensor Vs = g.View(g.AsContiguous(g.Transpose(allV, 1, 2)), dims: new long[] { batchSize *m_multiHeadNum, seqLenQ, m_d });

            // Scaled softmax
            float scale = 1.0f / (float)(Math.Sqrt(m_d));
            var   attn  = g.MulBatch(Qs, Ks, scale);

            attn = g.View(attn, dims: new long[] { batchSize, m_multiHeadNum, seqLenQ, seqLenQ });

            if (keyMask != null)
            {
                attn = g.Add(attn, keyMask, inPlace: true);
            }

            var attnProbs = g.Softmax(attn, inPlace: true);

            IWeightTensor sumAttnWeights = null;

            if (outputAttenWeights)
            {
                //Merge all attention probs over multi-heads
                sumAttnWeights = graph.Sum(attnProbs, 1);
                sumAttnWeights = graph.Div(sumAttnWeights, (float)m_multiHeadNum);
                sumAttnWeights = graph.View(sumAttnWeights, new long[] { batchSize *seqLenQ, seqLenQ });
            }

            attnProbs = g.View(attnProbs, dims: new long[] { batchSize *m_multiHeadNum, seqLenQ, seqLenQ });

            IWeightTensor o = g.View(g.MulBatch(attnProbs, Vs), dims: new long[] { batchSize, m_multiHeadNum, seqLenQ, m_d });
            IWeightTensor W = g.View(g.AsContiguous(g.Transpose(o, 1, 2)), dims: new long[] { batchSize *seqLenQ, m_multiHeadNum *m_d });

            // Output projection
            IWeightTensor finalAttResults = g.Dropout(g.Affine(W, W0, b0), batchSize, m_dropoutRatio, inPlace: true);
            IWeightTensor result          = graph.Add(finalAttResults, inputQ, inPlace: true);


            return(result, sumAttnWeights);
        }