Beispiel #1
0
        public IWeightMatrix Perform(IWeightMatrix state, AttentionPreProcessResult attenPreProcessResult, IComputeGraph g)
        {
            var bWas  = g.RepeatRows(bWa, state.Rows);
            var wc    = g.MulAdd(state, Wa, bWas);
            var wcs   = g.RepeatRows(wc, attenPreProcessResult.inputsUnfolder[0].Rows);
            var ggs   = g.AddTanh(attenPreProcessResult.uhs, wcs);
            var atten = g.Mul(ggs, V);

            List <IWeightMatrix> attens   = g.UnFolderRow(atten, m_batchSize);
            List <IWeightMatrix> contexts = new List <IWeightMatrix>();

            List <IWeightMatrix> attensT = new List <IWeightMatrix>();

            for (int i = 0; i < m_batchSize; i++)
            {
                attensT.Add(g.Transpose2(attens[i]));
            }

            var attenT       = g.ConcatRows(attensT);
            var attenSoftmax = g.SoftmaxM(attenT);

            for (int i = 0; i < m_batchSize; i++)
            {
                IWeightMatrix context = g.Mul(g.PeekRow(attenSoftmax, i), attenPreProcessResult.inputsUnfolder[i]);
                contexts.Add(context);
            }

            return(g.ConcatRows(contexts));
        }
        public IWeightTensor Perform(IWeightTensor state, AttentionPreProcessResult attenPreProcessResult, int batchSize, IComputeGraph graph)
        {
            int srcSeqLen = attenPreProcessResult.inputsBatchFirst.Rows / batchSize;

            using (IComputeGraph g = graph.CreateSubGraph(m_name))
            {
                // Affine decoder state
                IWeightTensor wc = g.Affine(state, m_Wa, m_bWa);

                // Expand dims from [batchSize x decoder_dim] to [batchSize x srcSeqLen x decoder_dim]
                IWeightTensor wc1   = g.View(wc, batchSize, 1, wc.Columns);
                IWeightTensor wcExp = g.Expand(wc1, batchSize, srcSeqLen, wc.Columns);

                IWeightTensor ggs = null;
                if (m_enableCoverageModel)
                {
                    // Get coverage model status at {t-1}
                    IWeightTensor wCoverage  = g.Affine(m_coverage.Hidden, m_Wc, m_bWc);
                    IWeightTensor wCoverage1 = g.View(wCoverage, batchSize, srcSeqLen, -1);

                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp, wCoverage1);
                }
                else
                {
                    ggs = g.AddTanh(attenPreProcessResult.uhs, wcExp);
                }

                IWeightTensor ggss  = g.View(ggs, batchSize * srcSeqLen, -1);
                IWeightTensor atten = g.Mul(ggss, m_V);

                IWeightTensor attenT  = g.Transpose(atten);
                IWeightTensor attenT2 = g.View(attenT, batchSize, srcSeqLen);

                IWeightTensor attenSoftmax1 = g.Softmax(attenT2, inPlace: true);

                IWeightTensor attenSoftmax = g.View(attenSoftmax1, batchSize, 1, srcSeqLen);
                IWeightTensor inputs2      = g.View(attenPreProcessResult.inputsBatchFirst, batchSize, srcSeqLen, attenPreProcessResult.inputsBatchFirst.Columns);

                IWeightTensor contexts = graph.MulBatch(attenSoftmax, inputs2, batchSize);

                if (m_enableCoverageModel)
                {
                    // Concatenate tensor as input for coverage model
                    IWeightTensor aCoverage = g.View(attenSoftmax1, attenPreProcessResult.inputsBatchFirst.Rows, 1);


                    IWeightTensor state2 = g.View(state, batchSize, 1, state.Columns);
                    IWeightTensor state3 = g.Expand(state2, batchSize, srcSeqLen, state.Columns);
                    IWeightTensor state4 = g.View(state3, batchSize * srcSeqLen, -1);


                    IWeightTensor concate = g.ConcatColumns(aCoverage, attenPreProcessResult.inputsBatchFirst, state4);
                    m_coverage.Step(concate, graph);
                }


                return(contexts);
            }
        }
        public IWeightMatrix Perform(IWeightMatrix state, AttentionPreProcessResult attenPreProcessResult, IComputeGraph g)
        {
            var bWas  = g.RepeatRows(bWa, state.Rows);
            var wc    = g.MulAdd(state, Wa, bWas);
            var wcs   = g.RepeatRows(wc, attenPreProcessResult.inputs.Rows / m_batchSize);
            var ggs   = g.AddTanh(attenPreProcessResult.uhs, wcs);
            var atten = g.Mul(ggs, V);

            var atten2  = g.PermuteBatch(atten, m_batchSize);
            var attenT  = g.Transpose2(atten2);
            var attenT2 = g.View(attenT, m_batchSize, attenPreProcessResult.inputs.Rows / m_batchSize);

            var attenSoftmax = g.Softmax(attenT2);

            IWeightMatrix contexts = g.MulBatch(attenSoftmax, attenPreProcessResult.inputs, m_batchSize);


            return(contexts);
        }
Beispiel #4
0
        /// <summary>
        /// Transformer encoder
        /// </summary>
        /// <param name="rawInputs"></param>
        /// <param name="g"></param>
        /// <returns></returns>
        public IWeightTensor Encode(IWeightTensor rawInput, IComputeGraph g)
        {
            int seqLen             = rawInput.Rows / m_batchSize;
            var posEmbedding       = g.BuildPositionMatrix(seqLen, m_inputDim);
            var posEmbeddingRepeat = g.RepeatRows(posEmbedding, m_batchSize);

            // Transpose to batch-first based sequence
            var inputs = g.TransposeBatch(rawInput, m_batchSize);

            inputs = g.Mul(inputs, (float)Math.Sqrt(m_inputDim));
            inputs = g.Add(inputs, posEmbeddingRepeat);

            for (int k = 0; k < m_encoders.Count; k++)
            {
                inputs = m_encoders[k].Perform(inputs, g);
            }

            // Transpose back to time-first based sequence
            rawInput = g.TransposeBatch(inputs, seqLen);

            return(rawInput);
        }
Beispiel #5
0
        public IWeightTensor Perform(IWeightTensor state, AttentionPreProcessResult attenPreProcessResult, int batchSize, IComputeGraph graph)
        {
            IComputeGraph g = graph.CreateSubGraph(m_name);

            var wc    = g.Affine(state, m_Wa, m_bWa);
            var wcs   = g.RepeatRows(wc, attenPreProcessResult.inputs.Rows / batchSize);
            var ggs   = g.AddTanh(attenPreProcessResult.uhs, wcs);
            var atten = g.Mul(ggs, m_V);

            var atten2  = g.TransposeBatch(atten, batchSize);
            var attenT  = g.Transpose(atten2);
            var attenT2 = g.View(attenT, batchSize, attenPreProcessResult.inputs.Rows / batchSize);

            var attenSoftmax1 = g.Softmax(attenT2, inPlace: true);

            var attenSoftmax = g.View(attenSoftmax1, batchSize, attenSoftmax1.Rows / batchSize, attenSoftmax1.Columns);
            var inputs2      = g.View(attenPreProcessResult.inputs, batchSize, attenPreProcessResult.inputs.Rows / batchSize, attenPreProcessResult.inputs.Columns);

            IWeightTensor contexts = g.MulBatch(attenSoftmax, inputs2, batchSize);

            return(contexts);
        }
Beispiel #6
0
        private IWeightTensor AddPositionEmbedding(IComputeGraph g, IWeightTensor posEmbedding, int batchSize, int seqLen, IWeightTensor inputEmbs)
        {
            var Column = posEmbedding.Columns;

            inputEmbs = g.Mul(inputEmbs, (float)Math.Sqrt(m_modelMetaData.HiddenDim));

            using (var posEmbeddingPeek = g.PeekRow(posEmbedding, 0, seqLen, false))
            {
                using (var posEmbeddingPeekView = g.View(posEmbeddingPeek, runGradient: false, dims: new long[] { 1, seqLen, Column }))
                {
                    using (var posEmbeddingPeekViewExp = g.Expand(posEmbeddingPeekView, runGradient: false, dims: new long[] { batchSize, seqLen, Column }))
                    {
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize, seqLen, Column });
                        inputEmbs = g.Add(inputEmbs, posEmbeddingPeekViewExp, true, false);
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize *seqLen, Column });
                    }
                }
            }

            inputEmbs = g.Dropout(inputEmbs, batchSize, m_dropoutRatio, inPlace: true);

            return(inputEmbs);
        }
        private float DecodeTransformer(List <List <string> > outInputSeqs, IComputeGraph g, IWeightTensor encOutputs, IWeightTensor encMask, TransformerDecoder decoder,
                                        IWeightTensor tgtEmbedding, int batchSize, int deviceId, bool isTraining = true)
        {
            float cost = 0.0f;

            var originalInputLengths = ParallelCorpus.PadSentences(outInputSeqs);
            int tgtSeqLen            = outInputSeqs[0].Count;

            IWeightTensor tgtDimMask = MaskUtils.BuildPadDimMask(g, tgtSeqLen, originalInputLengths, m_modelMetaData.HiddenDim, deviceId);

            using (IWeightTensor tgtSelfTriMask = MaskUtils.BuildPadSelfTriMask(g, tgtSeqLen, originalInputLengths, deviceId))
            {
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int i = 0; i < batchSize; i++)
                {
                    for (int j = 0; j < tgtSeqLen; j++)
                    {
                        int ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outInputSeqs[i][j], logUnk: true);
                        inputs.Add(g.PeekRow(tgtEmbedding, ix_targets_k));
                    }
                }

                IWeightTensor tgtInputEmbeddings = inputs.Count > 1 ? g.ConcatRows(inputs) : inputs[0];
                IWeightTensor decOutput          = decoder.Decode(tgtInputEmbeddings, encOutputs, tgtSelfTriMask, encMask, tgtDimMask, batchSize, g);

                decOutput = g.Mul(decOutput, g.Transpose(tgtEmbedding));

                using (IWeightTensor probs = g.Softmax(decOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        var leftShiftInputSeqs    = ParallelCorpus.LeftShiftSnts(outInputSeqs, ParallelCorpus.EOS);
                        var originalOutputLengths = ParallelCorpus.PadSentences(leftShiftInputSeqs, tgtSeqLen);

                        for (int i = 0; i < batchSize; i++)
                        {
                            for (int j = 0; j < tgtSeqLen; j++)
                            {
                                using (IWeightTensor probs_i_j = g.PeekRow(probs, i * tgtSeqLen + j, runGradients: false))
                                {
                                    if (j < originalOutputLengths[i])
                                    {
                                        int   ix_targets_i_j = m_modelMetaData.Vocab.GetTargetWordIndex(leftShiftInputSeqs[i][j], logUnk: true);
                                        float score_i_j      = probs_i_j.GetWeightAt(ix_targets_i_j);
                                        if (j < originalOutputLengths[i])
                                        {
                                            cost += (float)-Math.Log(score_i_j);
                                        }

                                        probs_i_j.SetWeightAt(score_i_j - 1, ix_targets_i_j);
                                    }
                                    else
                                    {
                                        probs_i_j.CleanWeight();
                                    }
                                }
                            }
                        }

                        decOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());

                        for (int i = 0; i < batchSize; i++)
                        {
                            outInputSeqs[i].Add(targetWords[i * tgtSeqLen + tgtSeqLen - 1]);
                        }
                    }
                }
            }


            return(cost);
        }
Beispiel #8
0
        /// <summary>
        /// Create input embedding from token embeddings, segment embeddings
        /// </summary>
        /// <param name="seqs"></param>
        /// <param name="g"></param>
        /// <param name="embeddingsTensor"></param>
        /// <param name="seqOriginalLengths"></param>
        /// <param name="segmentEmbedding"></param>
        /// <param name="vocab"></param>
        /// <returns>The embedding tensor. shape: (batchsize * seqLen, embedding_dim) </returns>
        public static IWeightTensor CreateTokensEmbeddings(List <List <int> > seqs, IComputeGraph g, IWeightTensor embeddingsTensor,
                                                           IWeightTensor segmentEmbedding, Vocab vocab, float scaleFactor = 1.0f, bool enableTagEmbedding = false)
        {
            int batchSize = seqs.Count;
            int seqLen    = seqs[0].Count;

            float[]        idxs        = new float[batchSize * seqLen];
            float[]        segIdxs     = new float[batchSize * seqLen];
            List <float[]> tagIdxsList = new List <float[]>();

            //float[] tagIdxs = new float[batchSize * seqLen];

            for (int i = 0; i < batchSize; i++)
            {
                int        segIdx       = 0;
                List <int> currTagIdxs  = new List <int>();
                int        currTagLevel = 0;

                for (int j = 0; j < seqLen; j++)
                {
                    idxs[i * seqLen + j]    = seqs[i][j];
                    segIdxs[i * seqLen + j] = segIdx;

                    string token = vocab.GetString(seqs[i][j]);
                    if (token == BuildInTokens.SEP)
                    {
                        //A new segment
                        segIdx++;
                    }


                    if (enableTagEmbedding)
                    {
                        if (token.StartsWith("<") && token.EndsWith(">") && BuildInTokens.IsPreDefinedToken(token) == false)
                        {
                            if (token[1] == '/')
                            {
                                currTagLevel--;
                                currTagIdxs[currTagLevel] = -1;
                            }
                            else
                            {
                                //A new opening tag
                                while (tagIdxsList.Count <= currTagLevel)
                                {
                                    float[] tagIdxs = new float[batchSize * seqLen];
                                    Array.Fill(tagIdxs, -1.0f);
                                    tagIdxsList.Add(tagIdxs);
                                }

                                while (currTagIdxs.Count <= currTagLevel)
                                {
                                    currTagIdxs.Add(-1);
                                }

                                currTagIdxs[currTagLevel] = seqs[i][j];

                                currTagLevel++;
                            }
                        }
                        else
                        {
                            for (int k = 0; k < currTagLevel; k++)
                            {
                                tagIdxsList[k][i * seqLen + j] = currTagIdxs[k];

                                //Logger.WriteLine($"Add tag embeddings: '{currTagIdxs[k]}'");
                            }
                        }
                    }
                }
            }

            IWeightTensor tagEmbeddings = null;

            if (enableTagEmbedding)
            {
                for (int k = 0; k < tagIdxsList.Count; k++)
                {
                    var tagEmbeddings_k = g.IndexSelect(embeddingsTensor, tagIdxsList[k], clearWeights: true);
                    if (tagEmbeddings == null)
                    {
                        tagEmbeddings = tagEmbeddings_k;
                    }
                    else
                    {
                        tagEmbeddings = g.Add(tagEmbeddings, tagEmbeddings_k);
                    }
                }
            }

            IWeightTensor embeddingRst = g.IndexSelect(embeddingsTensor, idxs);

            if (scaleFactor != 1.0f)
            {
                embeddingRst = g.Mul(embeddingRst, scaleFactor, inPlace: true);
            }

            // Apply segment embeddings to the input sequence embeddings
            if (segmentEmbedding != null)
            {
                embeddingRst = g.Add(embeddingRst, g.IndexSelect(segmentEmbedding, segIdxs));
            }

            if (tagEmbeddings != null)
            {
                embeddingRst = g.Add(embeddingRst, tagEmbeddings);
            }

            return(embeddingRst);
        }