/// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightMatrix Encode(IComputeGraph g, List <List <string> > inputSentences, Encoder encoder, Encoder reversEncoder, IWeightMatrix Embedding)
        {
            PadSentences(inputSentences);
            List <IWeightMatrix> forwardOutputs  = new List <IWeightMatrix>();
            List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>();

            int seqLen = inputSentences[0].Count;
            List <IWeightMatrix> forwardInput = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    var inputSentence = inputSentences[j];
                    int ix_source     = (int)SENTTAGS.UNK;
                    if (m_srcWordToIndex.ContainsKey(inputSentence[i]))
                    {
                        ix_source = m_srcWordToIndex[inputSentence[i]];
                    }
                    var x = g.PeekRow(Embedding, ix_source);
                    forwardInput.Add(x);
                }
            }

            var forwardInputsM = g.ConcatRows(forwardInput);
            List <IWeightMatrix> attResults = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                var emb_i = g.PeekRow(forwardInputsM, i * inputSentences.Count, inputSentences.Count);
                attResults.Add(emb_i);
            }

            for (int i = 0; i < seqLen; i++)
            {
                var eOutput = encoder.Encode(attResults[i], g);
                forwardOutputs.Add(eOutput);

                var eOutput2 = reversEncoder.Encode(attResults[seqLen - i - 1], g);
                backwardOutputs.Add(eOutput2);
            }

            backwardOutputs.Reverse();

            var encodedOutput = g.ConcatRowColumn(forwardOutputs, backwardOutputs);

            return(encodedOutput);
        }
示例#2
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > inputSentences, IEncoder encoder, IWeightTensor Embedding)
        {
            PadSentences(inputSentences);
            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            int seqLen = inputSentences[0].Count;
            List <IWeightTensor> forwardInput = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    var inputSentence = inputSentences[j];
                    int ix_source     = (int)SENTTAGS.UNK;
                    if (m_srcWordToIndex.ContainsKey(inputSentence[i]))
                    {
                        ix_source = m_srcWordToIndex[inputSentence[i]];
                    }
                    else
                    {
                        Logger.WriteLine($"'{inputSentence[i]}' is an unknown word.");
                    }
                    var x = g.PeekRow(Embedding, ix_source);
                    forwardInput.Add(x);
                }
            }

            var forwardInputsM = g.ConcatRows(forwardInput);

            return(encoder.Encode(forwardInputsM, g));
        }
示例#3
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="srcSnts"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > srcSnts, IEncoder encoder, IWeightTensor Embedding, IWeightTensor srcSelfMask, IWeightTensor posEmbedding, List <int> originalSrcLengths)
        {
            var seqLen    = srcSnts[0].Count;
            var batchSize = srcSnts.Count;

            var inputs = new List <IWeightTensor>();

            // Generate batch-first based input embeddings
            for (var j = 0; j < batchSize; j++)
            {
                var originalLength = originalSrcLengths[j];
                for (var i = 0; i < seqLen; i++)
                {
                    var ix_source = this.m_modelMetaData.Vocab.GetSourceWordIndex(srcSnts[j][i], true);

                    var emb = g.PeekRow(Embedding, ix_source, runGradients: i < originalLength ? true : false);

                    inputs.Add(emb);
                }
            }

            var inputEmbs = g.ConcatRows(inputs);

            if (this.m_modelMetaData.EncoderType == EncoderTypeEnums.Transformer)
            {
                inputEmbs = this.AddPositionEmbedding(g, posEmbedding, batchSize, seqLen, inputEmbs);
            }


            return(encoder.Encode(inputEmbs, batchSize, g, srcSelfMask));
        }
示例#4
0
        public IWeightMatrix Perform(IWeightMatrix state, AttentionPreProcessResult attenPreProcessResult, IComputeGraph g)
        {
            var bWas  = g.RepeatRows(bWa, state.Rows);
            var wc    = g.MulAdd(state, Wa, bWas);
            var wcs   = g.RepeatRows(wc, attenPreProcessResult.inputsUnfolder[0].Rows);
            var ggs   = g.AddTanh(attenPreProcessResult.uhs, wcs);
            var atten = g.Mul(ggs, V);

            List <IWeightMatrix> attens   = g.UnFolderRow(atten, m_batchSize);
            List <IWeightMatrix> contexts = new List <IWeightMatrix>();

            List <IWeightMatrix> attensT = new List <IWeightMatrix>();

            for (int i = 0; i < m_batchSize; i++)
            {
                attensT.Add(g.Transpose2(attens[i]));
            }

            var attenT       = g.ConcatRows(attensT);
            var attenSoftmax = g.SoftmaxM(attenT);

            for (int i = 0; i < m_batchSize; i++)
            {
                IWeightMatrix context = g.Mul(g.PeekRow(attenSoftmax, i), attenPreProcessResult.inputsUnfolder[i]);
                contexts.Add(context);
            }

            return(g.ConcatRows(contexts));
        }
示例#5
0
        public void VisualizeNeuralNetwork(string visNNFilePath)
        {
            (IEncoder encoder, IDecoder decoder, IWeightTensor srcEmbedding, IWeightTensor tgtEmbedding) = GetNetworksOnDeviceAt(-1);
            // Build input sentence
            List <List <string> > inputSeqs = ParallelCorpus.ConstructInputTokens(null);
            int              batchSize      = inputSeqs.Count;
            IComputeGraph    g          = CreateComputGraph(m_defaultDeviceId, needBack: false, visNetwork: true);
            AttentionDecoder rnnDecoder = decoder as AttentionDecoder;

            encoder.Reset(g.GetWeightFactory(), batchSize);
            rnnDecoder.Reset(g.GetWeightFactory(), batchSize);

            // Run encoder
            IWeightTensor encodedWeightMatrix = Encode(g, inputSeqs, encoder, srcEmbedding, null, null);

            // Prepare for attention over encoder-decoder
            AttentionPreProcessResult attPreProcessResult = rnnDecoder.PreProcess(encodedWeightMatrix, batchSize, g);

            // Run decoder
            IWeightTensor x       = g.PeekRow(tgtEmbedding, (int)SENTTAGS.START);
            IWeightTensor eOutput = rnnDecoder.Decode(x, attPreProcessResult, batchSize, g);
            IWeightTensor probs   = g.Softmax(eOutput);

            g.VisualizeNeuralNetToFile(visNNFilePath);
        }
示例#6
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > inputSentences, IEncoder encoder, IWeightTensor Embedding)
        {
            int seqLen    = inputSentences[0].Count;
            int batchSize = inputSentences.Count;

            List <IWeightTensor> forwardInput = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    int ix_source = m_modelMetaData.Vocab.GetSourceWordIndex(inputSentences[j][i], logUnk: true);
                    forwardInput.Add(g.PeekRow(Embedding, ix_source));
                }
            }

            return(encoder.Encode(g.ConcatRows(forwardInput), batchSize, g));
        }
示例#7
0
        private IWeightTensor AddPositionEmbedding(IComputeGraph g, IWeightTensor posEmbedding, int batchSize, int seqLen, IWeightTensor inputEmbs)
        {
            using (var posEmbeddingPeek = g.PeekRow(posEmbedding, 0, seqLen, false))
            {
                using (var posEmbeddingPeekView = g.View(posEmbeddingPeek, false, new long[] { 1, seqLen, this.m_modelMetaData.EmbeddingDim }))
                {
                    using (var posEmbeddingPeekViewExp = g.Expand(posEmbeddingPeekView, false, new long[] { batchSize, seqLen, this.m_modelMetaData.EmbeddingDim }))
                    {
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize, seqLen, this.m_modelMetaData.EmbeddingDim });
                        inputEmbs = g.Add(inputEmbs, posEmbeddingPeekViewExp, true, false);
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize *seqLen, this.m_modelMetaData.EmbeddingDim });
                    }
                }
            }

            inputEmbs = g.Dropout(inputEmbs, batchSize, this.m_dropoutRatio, true);

            return(inputEmbs);
        }
示例#8
0
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="srcSnts"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightTensor Encode(IComputeGraph g, List <List <string> > srcSnts, IEncoder encoder, IWeightTensor Embedding, IWeightTensor selfMask, IWeightTensor dimMask)
        {
            int seqLen    = srcSnts[0].Count;
            int batchSize = srcSnts.Count;

            List <IWeightTensor> forwardInput = new List <IWeightTensor>();

            // Generate batch-first based input embeddings
            for (int j = 0; j < batchSize; j++)
            {
                for (int i = 0; i < seqLen; i++)
                {
                    int ix_source = m_modelMetaData.Vocab.GetSourceWordIndex(srcSnts[j][i], logUnk: true);
                    forwardInput.Add(g.PeekRow(Embedding, ix_source));
                }
            }

            return(encoder.Encode(g.ConcatRows(forwardInput), selfMask, dimMask, batchSize, g));
        }
示例#9
0
        public IWeightTensor Encode(IWeightTensor rawInputs, int batchSize, IComputeGraph g, IWeightTensor srcSelfMask)
        {
            int seqLen = rawInputs.Rows / batchSize;

            rawInputs = g.TransposeBatch(rawInputs, seqLen);

            List <IWeightTensor> inputs = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                IWeightTensor emb_i = g.PeekRow(rawInputs, i * batchSize, batchSize);
                inputs.Add(emb_i);
            }

            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            List <IWeightTensor> layerOutputs = inputs.ToList();

            for (int i = 0; i < m_depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    IWeightTensor forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    IWeightTensor backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    IWeightTensor concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            var result = g.ConcatRows(layerOutputs);

            return(g.TransposeBatch(result, batchSize));
        }
示例#10
0
        private float DecodeOutput(string[] OutputSentence, IComputeGraph g, float cost, SparseWeightMatrix sparseInput, List <WeightMatrix> encoded, AttentionDecoder decoder, WeightMatrix Whd, WeightMatrix bd, WeightMatrix Embedding)
        {
            int ix_input = (int)SENTTAGS.START;

            for (int i = 0; i < OutputSentence.Length + 1; i++)
            {
                int ix_target = (int)SENTTAGS.UNK;
                if (i == OutputSentence.Length)
                {
                    ix_target = (int)SENTTAGS.END;
                }
                else
                {
                    if (t_wordToIndex.ContainsKey(OutputSentence[i]))
                    {
                        ix_target = t_wordToIndex[OutputSentence[i]];
                    }
                }


                var x       = g.PeekRow(Embedding, ix_input);
                var eOutput = decoder.Decode(sparseInput, x, encoded, g);
                if (UseDropout)
                {
                    eOutput = g.Dropout(eOutput, 0.2f);
                }
                var o = g.muladd(eOutput, Whd, bd);
                if (UseDropout)
                {
                    o = g.Dropout(o, 0.2f);
                }

                var probs = g.SoftmaxWithCrossEntropy(o);
                cost += (float)-Math.Log(probs.Weight[ix_target]);

                o.Gradient             = probs.Weight;
                o.Gradient[ix_target] -= 1;
                ix_input = ix_target;
            }
            return(cost);
        }
示例#11
0
        public IWeightTensor Encode(IWeightTensor rawInputs, IComputeGraph g)
        {
            int seqLen = rawInputs.Rows / m_batchSize;

            List <IWeightTensor> inputs = new List <IWeightTensor>();

            for (int i = 0; i < seqLen; i++)
            {
                var emb_i = g.PeekRow(rawInputs, i * m_batchSize, m_batchSize);
                inputs.Add(emb_i);
            }

            List <IWeightTensor> forwardOutputs  = new List <IWeightTensor>();
            List <IWeightTensor> backwardOutputs = new List <IWeightTensor>();

            List <IWeightTensor> layerOutputs = inputs.ToList();

            for (int i = 0; i < m_depth; i++)
            {
                for (int j = 0; j < seqLen; j++)
                {
                    var forwardOutput = m_forwardEncoders[i].Step(layerOutputs[j], g);
                    forwardOutputs.Add(forwardOutput);

                    var backwardOutput = m_backwardEncoders[i].Step(layerOutputs[inputs.Count - j - 1], g);
                    backwardOutputs.Add(backwardOutput);
                }

                backwardOutputs.Reverse();
                layerOutputs.Clear();
                for (int j = 0; j < seqLen; j++)
                {
                    var concatW = g.ConcatColumns(forwardOutputs[j], backwardOutputs[j]);
                    layerOutputs.Add(concatW);
                }
            }

            return(g.ConcatRows(layerOutputs));
        }
示例#12
0
        private IWeightTensor AddPositionEmbedding(IComputeGraph g, IWeightTensor posEmbedding, int batchSize, int seqLen, IWeightTensor inputEmbs)
        {
            var Column = posEmbedding.Columns;

            inputEmbs = g.Mul(inputEmbs, (float)Math.Sqrt(m_modelMetaData.HiddenDim));

            using (var posEmbeddingPeek = g.PeekRow(posEmbedding, 0, seqLen, false))
            {
                using (var posEmbeddingPeekView = g.View(posEmbeddingPeek, runGradient: false, dims: new long[] { 1, seqLen, Column }))
                {
                    using (var posEmbeddingPeekViewExp = g.Expand(posEmbeddingPeekView, runGradient: false, dims: new long[] { batchSize, seqLen, Column }))
                    {
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize, seqLen, Column });
                        inputEmbs = g.Add(inputEmbs, posEmbeddingPeekViewExp, true, false);
                        inputEmbs = g.View(inputEmbs, dims: new long[] { batchSize *seqLen, Column });
                    }
                }
            }

            inputEmbs = g.Dropout(inputEmbs, batchSize, m_dropoutRatio, inPlace: true);

            return(inputEmbs);
        }
示例#13
0
        /// <summary>
        /// Run forward part on given single device
        /// </summary>
        /// <param name="g">The computing graph for current device. It gets created and passed by the framework</param>
        /// <param name="srcSnts">A batch of input tokenized sentences in source side</param>
        /// <param name="tgtSnts">A batch of output tokenized sentences in target side. In training mode, it inputs target tokens, otherwise, it outputs target tokens generated by decoder</param>
        /// <param name="deviceIdIdx">The index of current device</param>
        /// <returns>The cost of forward part</returns>
        private float RunForwardOnSingleDevice(IComputeGraph g, List <List <string> > srcSnts, List <List <string> > tgtSnts, int deviceIdIdx, bool isTraining)
        {
            (IEncoder encoder, IWeightTensor srcEmbedding, FeedForwardLayer decoderFFLayer) = GetNetworksOnDeviceAt(deviceIdIdx);
            int batchSize = srcSnts.Count;

            // Reset networks
            encoder.Reset(g.GetWeightFactory(), batchSize);

            // Encoding input source sentences
            ParallelCorpus.PadSentences(srcSnts);

            if (isTraining)
            {
                ParallelCorpus.PadSentences(tgtSnts);

                if (srcSnts[0].Count != tgtSnts[0].Count)
                {
                    throw new ArgumentException($"The length of source side and target side must be equal. source length = '{srcSnts[0].Count}', target length = '{tgtSnts[0].Count}'");
                }
            }
            int seqLen = srcSnts[0].Count;

            IWeightTensor encodedWeightMatrix = Encode(g.CreateSubGraph("Encoder"), srcSnts, encoder, srcEmbedding);
            IWeightTensor ffLayer             = decoderFFLayer.Process(encodedWeightMatrix, batchSize, g);

            IWeightTensor ffLayerBatch = g.TransposeBatch(ffLayer, batchSize);

            //    Logger.WriteLine("1");

            float cost = 0.0f;

            using (var probs = g.Softmax(ffLayerBatch, runGradients: false, inPlace: true))
            {
                if (isTraining)
                {
                    //Calculate loss for each word in the batch
                    for (int k = 0; k < batchSize; k++)
                    {
                        for (int j = 0; j < seqLen; j++)
                        {
                            using (var probs_k_j = g.PeekRow(probs, k * seqLen + j, runGradients: false))
                            {
                                var ix_targets_k_j = m_modelMetaData.Vocab.GetTargetWordIndex(tgtSnts[k][j]);
                                var score_k        = probs_k_j.GetWeightAt(ix_targets_k_j);
                                cost += (float)-Math.Log(score_k);

                                probs_k_j.SetWeightAt(score_k - 1, ix_targets_k_j);
                            }
                        }

                        ////CRF part
                        //using (var probs_k = g.PeekRow(probs, k * seqLen, seqLen, runGradients: false))
                        //{
                        //    var weights_k = probs_k.ToWeightArray();
                        //    var crfOutput_k = m_crfDecoder.ForwardBackward(seqLen, weights_k);

                        //    int[] trueTags = new int[seqLen];
                        //    for (int j = 0; j < seqLen; j++)
                        //    {
                        //        trueTags[j] = m_modelMetaData.Vocab.GetTargetWordIndex(tgtSnts[k][j]);
                        //    }
                        //    m_crfDecoder.UpdateBigramTransition(seqLen, crfOutput_k, trueTags);
                        //}
                    }

                    ffLayerBatch.CopyWeightsToGradients(probs);
                }
                else
                {
                    // CRF decoder
                    //for (int k = 0; k < batchSize; k++)
                    //{
                    //    //CRF part
                    //    using (var probs_k = g.PeekRow(probs, k * seqLen, seqLen, runGradients: false))
                    //    {
                    //        var weights_k = probs_k.ToWeightArray();

                    //        var crfOutput_k = m_crfDecoder.DecodeNBestCRF(weights_k, seqLen, 1);
                    //        var targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(crfOutput_k[0].ToList());
                    //        tgtSnts.Add(targetWords);
                    //    }
                    //}


                    // Output "i"th target word
                    var targetIdx   = g.Argmax(probs, 1);
                    var targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());

                    for (int k = 0; k < batchSize; k++)
                    {
                        tgtSnts.Add(targetWords.GetRange(k * seqLen, seqLen));
                    }
                }
            }

            return(cost);
        }
示例#14
0
        /// <summary>
        /// Given input sentence and generate output sentence by seq2seq model with beam search
        /// </summary>
        /// <param name="input"></param>
        /// <param name="beamSearchSize"></param>
        /// <param name="maxOutputLength"></param>
        /// <returns></returns>
        public List <List <string> > Predict(List <string> input, int beamSearchSize = 1, int maxOutputLength = 100)
        {
            (IEncoder encoder, IDecoder decoder, IWeightTensor srcEmbedding, IWeightTensor tgtEmbedding) = GetNetworksOnDeviceAt(-1);
            List <List <string> > inputSeqs = ParallelCorpus.ConstructInputTokens(input);
            int batchSize = 1; // For predict with beam search, we currently only supports one sentence per call

            IComputeGraph    g          = CreateComputGraph(m_defaultDeviceId, needBack: false);
            AttentionDecoder rnnDecoder = decoder as AttentionDecoder;

            encoder.Reset(g.GetWeightFactory(), batchSize);
            rnnDecoder.Reset(g.GetWeightFactory(), batchSize);

            // Construct beam search status list
            List <BeamSearchStatus> bssList = new List <BeamSearchStatus>();
            BeamSearchStatus        bss     = new BeamSearchStatus();

            bss.OutputIds.Add((int)SENTTAGS.START);
            bss.CTs = rnnDecoder.GetCTs();
            bss.HTs = rnnDecoder.GetHTs();
            bssList.Add(bss);

            IWeightTensor             encodedWeightMatrix = Encode(g, inputSeqs, encoder, srcEmbedding, null, null);
            AttentionPreProcessResult attPreProcessResult = rnnDecoder.PreProcess(encodedWeightMatrix, batchSize, g);

            List <BeamSearchStatus> newBSSList = new List <BeamSearchStatus>();
            bool finished     = false;
            int  outputLength = 0;

            while (finished == false && outputLength < maxOutputLength)
            {
                finished = true;
                for (int i = 0; i < bssList.Count; i++)
                {
                    bss = bssList[i];
                    if (bss.OutputIds[bss.OutputIds.Count - 1] == (int)SENTTAGS.END)
                    {
                        newBSSList.Add(bss);
                    }
                    else if (bss.OutputIds.Count > maxOutputLength)
                    {
                        newBSSList.Add(bss);
                    }
                    else
                    {
                        finished = false;
                        int ix_input = bss.OutputIds[bss.OutputIds.Count - 1];
                        rnnDecoder.SetCTs(bss.CTs);
                        rnnDecoder.SetHTs(bss.HTs);

                        IWeightTensor x       = g.PeekRow(tgtEmbedding, ix_input);
                        IWeightTensor eOutput = rnnDecoder.Decode(x, attPreProcessResult, batchSize, g);
                        using (IWeightTensor probs = g.Softmax(eOutput))
                        {
                            List <int> preds = probs.GetTopNMaxWeightIdx(beamSearchSize);
                            for (int j = 0; j < preds.Count; j++)
                            {
                                BeamSearchStatus newBSS = new BeamSearchStatus();
                                newBSS.OutputIds.AddRange(bss.OutputIds);
                                newBSS.OutputIds.Add(preds[j]);

                                newBSS.CTs = rnnDecoder.GetCTs();
                                newBSS.HTs = rnnDecoder.GetHTs();

                                float score = probs.GetWeightAt(preds[j]);
                                newBSS.Score  = bss.Score;
                                newBSS.Score += (float)(-Math.Log(score));

                                //var lengthPenalty = Math.Pow((5.0f + newBSS.OutputIds.Count) / 6, 0.6);
                                //newBSS.Score /= (float)lengthPenalty;

                                newBSSList.Add(newBSS);
                            }
                        }
                    }
                }

                bssList = BeamSearch.GetTopNBSS(newBSSList, beamSearchSize);
                newBSSList.Clear();

                outputLength++;
            }

            // Convert output target word ids to real string
            List <List <string> > results = new List <List <string> >();

            for (int i = 0; i < bssList.Count; i++)
            {
                results.Add(m_modelMetaData.Vocab.ConvertTargetIdsToString(bssList[i].OutputIds));
            }

            return(results);
        }
示例#15
0
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSnts">In training mode, they are golden target sentences, otherwise, they are target sentences generated by the decoder</param>
        /// <param name="g"></param>
        /// <param name="encOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="decoderFFLayer"></param>
        /// <param name="tgtEmbedding"></param>
        /// <returns></returns>
        private float DecodeAttentionLSTM(List <List <string> > outputSnts, IComputeGraph g, IWeightTensor encOutputs, AttentionDecoder decoder, IWeightTensor tgtEmbedding, int batchSize, bool isTraining = true)
        {
            float cost = 0.0f;

            int[] ix_inputs = new int[batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = m_modelMetaData.Vocab.GetTargetWordIndex(outputSnts[i][0]);
            }

            // Initialize variables accoridng to current mode
            List <int>    originalOutputLengths = isTraining ? ParallelCorpus.PadSentences(outputSnts) : null;
            int           seqLen       = isTraining ? outputSnts[0].Count : 64;
            float         dropoutRatio = isTraining ? m_dropoutRatio : 0.0f;
            HashSet <int> setEndSentId = isTraining ? null : new HashSet <int>();

            // Pre-process for attention model
            AttentionPreProcessResult attPreProcessResult = decoder.PreProcess(encOutputs, batchSize, g);

            for (int i = 1; i < seqLen; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int j = 0; j < batchSize; j++)
                {
                    inputs.Add(g.PeekRow(tgtEmbedding, ix_inputs[j]));
                }
                IWeightTensor inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                IWeightTensor eOutput = decoder.Decode(inputsM, attPreProcessResult, batchSize, g);

                //Softmax for output
                using (IWeightTensor probs = g.Softmax(eOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        //Calculate loss for each word in the batch
                        for (int k = 0; k < batchSize; k++)
                        {
                            using (IWeightTensor probs_k = g.PeekRow(probs, k, runGradients: false))
                            {
                                int   ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outputSnts[k][i]);
                                float score_k      = probs_k.GetWeightAt(ix_targets_k);
                                if (i < originalOutputLengths[k])
                                {
                                    cost += (float)-Math.Log(score_k);
                                }

                                probs_k.SetWeightAt(score_k - 1, ix_targets_k);
                                ix_inputs[k] = ix_targets_k;
                            }
                        }
                        eOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());
                        for (int j = 0; j < targetWords.Count; j++)
                        {
                            if (setEndSentId.Contains(j) == false)
                            {
                                outputSnts[j].Add(targetWords[j]);

                                if (targetWords[j] == ParallelCorpus.EOS)
                                {
                                    setEndSentId.Add(j);
                                }
                            }
                        }

                        if (setEndSentId.Count == batchSize)
                        {
                            // All target sentences in current batch are finished, so we exit.
                            break;
                        }

                        ix_inputs = targetIdx;
                    }
                }
            }

            return(cost);
        }
示例#16
0
        private float DecodeTransformer(List <List <string> > outInputSeqs, IComputeGraph g, IWeightTensor encOutputs, IWeightTensor encMask, TransformerDecoder decoder,
                                        IWeightTensor tgtEmbedding, int batchSize, int deviceId, bool isTraining = true)
        {
            float cost = 0.0f;

            var originalInputLengths = ParallelCorpus.PadSentences(outInputSeqs);
            int tgtSeqLen            = outInputSeqs[0].Count;

            IWeightTensor tgtDimMask = MaskUtils.BuildPadDimMask(g, tgtSeqLen, originalInputLengths, m_modelMetaData.HiddenDim, deviceId);

            using (IWeightTensor tgtSelfTriMask = MaskUtils.BuildPadSelfTriMask(g, tgtSeqLen, originalInputLengths, deviceId))
            {
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int i = 0; i < batchSize; i++)
                {
                    for (int j = 0; j < tgtSeqLen; j++)
                    {
                        int ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outInputSeqs[i][j], logUnk: true);
                        inputs.Add(g.PeekRow(tgtEmbedding, ix_targets_k));
                    }
                }

                IWeightTensor tgtInputEmbeddings = inputs.Count > 1 ? g.ConcatRows(inputs) : inputs[0];
                IWeightTensor decOutput          = decoder.Decode(tgtInputEmbeddings, encOutputs, tgtSelfTriMask, encMask, tgtDimMask, batchSize, g);

                decOutput = g.Mul(decOutput, g.Transpose(tgtEmbedding));

                using (IWeightTensor probs = g.Softmax(decOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        var leftShiftInputSeqs    = ParallelCorpus.LeftShiftSnts(outInputSeqs, ParallelCorpus.EOS);
                        var originalOutputLengths = ParallelCorpus.PadSentences(leftShiftInputSeqs, tgtSeqLen);

                        for (int i = 0; i < batchSize; i++)
                        {
                            for (int j = 0; j < tgtSeqLen; j++)
                            {
                                using (IWeightTensor probs_i_j = g.PeekRow(probs, i * tgtSeqLen + j, runGradients: false))
                                {
                                    if (j < originalOutputLengths[i])
                                    {
                                        int   ix_targets_i_j = m_modelMetaData.Vocab.GetTargetWordIndex(leftShiftInputSeqs[i][j], logUnk: true);
                                        float score_i_j      = probs_i_j.GetWeightAt(ix_targets_i_j);
                                        if (j < originalOutputLengths[i])
                                        {
                                            cost += (float)-Math.Log(score_i_j);
                                        }

                                        probs_i_j.SetWeightAt(score_i_j - 1, ix_targets_i_j);
                                    }
                                    else
                                    {
                                        probs_i_j.CleanWeight();
                                    }
                                }
                            }
                        }

                        decOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());

                        for (int i = 0; i < batchSize; i++)
                        {
                            outInputSeqs[i].Add(targetWords[i * tgtSeqLen + tgtSeqLen - 1]);
                        }
                    }
                }
            }


            return(cost);
        }
示例#17
0
        /// <summary>
        /// Run forward part on given single device
        /// </summary>
        /// <param name="g">The computing graph for current device. It gets created and passed by the framework</param>
        /// <param name="srcSnts">A batch of input tokenized sentences in source side</param>
        /// <param name="tgtSnts">A batch of output tokenized sentences in target side. In training mode, it inputs target tokens, otherwise, it outputs target tokens generated by decoder</param>
        /// <param name="deviceIdIdx">The index of current device</param>
        /// <returns>The cost of forward part</returns>
        private float RunForwardOnSingleDevice(IComputeGraph g, List <List <string> > srcSnts, List <List <string> > tgtSnts, int deviceIdIdx, bool isTraining)
        {
            var(encoder, srcEmbedding, posEmbedding, decoderFFLayer) = this.GetNetworksOnDeviceAt(deviceIdIdx);

            // Reset networks
            encoder.Reset(g.GetWeightFactory(), srcSnts.Count);


            var originalSrcLengths = ParallelCorpus.PadSentences(srcSnts);
            var seqLen             = srcSnts[0].Count;
            var batchSize          = srcSnts.Count;

            // Encoding input source sentences
            var encOutput    = this.Encode(g, srcSnts, encoder, srcEmbedding, null, posEmbedding, originalSrcLengths);
            var ffLayer      = decoderFFLayer.Process(encOutput, batchSize, g);
            var ffLayerBatch = g.TransposeBatch(ffLayer, batchSize);

            var cost = 0.0f;

            using (var probs = g.Softmax(ffLayerBatch, runGradients: false, inPlace: true))
            {
                if (isTraining)
                {
                    //Calculate loss for each word in the batch
                    for (var k = 0; k < batchSize; k++)
                    {
                        for (var j = 0; j < seqLen; j++)
                        {
                            using (var probs_k_j = g.PeekRow(probs, k * seqLen + j, runGradients: false))
                            {
                                var ix_targets_k_j = this.m_modelMetaData.Vocab.GetTargetWordIndex(tgtSnts[k][j]);
                                var score_k        = probs_k_j.GetWeightAt(ix_targets_k_j);
                                cost += (float)-Math.Log(score_k);

                                probs_k_j.SetWeightAt(score_k - 1, ix_targets_k_j);
                            }
                        }

                        ////CRF part
                        //using (var probs_k = g.PeekRow(probs, k * seqLen, seqLen, runGradients: false))
                        //{
                        //    var weights_k = probs_k.ToWeightArray();
                        //    var crfOutput_k = m_crfDecoder.ForwardBackward(seqLen, weights_k);

                        //    int[] trueTags = new int[seqLen];
                        //    for (int j = 0; j < seqLen; j++)
                        //    {
                        //        trueTags[j] = m_modelMetaData.Vocab.GetTargetWordIndex(tgtSnts[k][j]);
                        //    }
                        //    m_crfDecoder.UpdateBigramTransition(seqLen, crfOutput_k, trueTags);
                        //}
                    }

                    ffLayerBatch.CopyWeightsToGradients(probs);
                }
                else
                {
                    // CRF decoder
                    //for (int k = 0; k < batchSize; k++)
                    //{
                    //    //CRF part
                    //    using (var probs_k = g.PeekRow(probs, k * seqLen, seqLen, runGradients: false))
                    //    {
                    //        var weights_k = probs_k.ToWeightArray();

                    //        var crfOutput_k = m_crfDecoder.DecodeNBestCRF(weights_k, seqLen, 1);
                    //        var targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(crfOutput_k[0].ToList());
                    //        tgtSnts.Add(targetWords);
                    //    }
                    //}


                    // Output "i"th target word
                    var targetIdx   = g.Argmax(probs, 1);
                    var targetWords = this.m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());

                    for (var k = 0; k < batchSize; k++)
                    {
                        tgtSnts[k] = targetWords.GetRange(k * seqLen, seqLen);
                    }
                }
            }

            return(cost);
        }
示例#18
0
        private float DecodeTransformer(List <List <string> > tgtSeqs, IComputeGraph g, IWeightTensor encOutputs, TransformerDecoder decoder,
                                        IWeightTensor tgtEmbedding, IWeightTensor posEmbedding, int batchSize, int deviceId, List <int> srcOriginalLenghts, bool isTraining = true)
        {
            float cost = 0.0f;

            var tgtOriginalLengths = ParallelCorpus.PadSentences(tgtSeqs);
            int tgtSeqLen          = tgtSeqs[0].Count;
            int srcSeqLen          = encOutputs.Rows / batchSize;

            using (IWeightTensor srcTgtMask = MaskUtils.BuildSrcTgtMask(g, srcSeqLen, tgtSeqLen, tgtOriginalLengths, srcOriginalLenghts, deviceId))
            {
                using (IWeightTensor tgtSelfTriMask = MaskUtils.BuildPadSelfTriMask(g, tgtSeqLen, tgtOriginalLengths, deviceId))
                {
                    List <IWeightTensor> inputs = new List <IWeightTensor>();
                    for (int i = 0; i < batchSize; i++)
                    {
                        for (int j = 0; j < tgtSeqLen; j++)
                        {
                            int ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(tgtSeqs[i][j], logUnk: true);

                            var emb = g.PeekRow(tgtEmbedding, ix_targets_k, runGradients: j < tgtOriginalLengths[i] ? true : false);

                            inputs.Add(emb);
                        }
                    }

                    IWeightTensor inputEmbs = inputs.Count > 1 ? g.ConcatRows(inputs) : inputs[0];

                    inputEmbs = AddPositionEmbedding(g, posEmbedding, batchSize, tgtSeqLen, inputEmbs);

                    IWeightTensor decOutput = decoder.Decode(inputEmbs, encOutputs, tgtSelfTriMask, srcTgtMask, batchSize, g);

                    using (IWeightTensor probs = g.Softmax(decOutput, runGradients: false, inPlace: true))
                    {
                        if (isTraining)
                        {
                            var leftShiftInputSeqs = ParallelCorpus.LeftShiftSnts(tgtSeqs, ParallelCorpus.EOS);
                            for (int i = 0; i < batchSize; i++)
                            {
                                for (int j = 0; j < tgtSeqLen; j++)
                                {
                                    using (IWeightTensor probs_i_j = g.PeekRow(probs, i * tgtSeqLen + j, runGradients: false))
                                    {
                                        if (j < tgtOriginalLengths[i])
                                        {
                                            int   ix_targets_i_j = m_modelMetaData.Vocab.GetTargetWordIndex(leftShiftInputSeqs[i][j], logUnk: true);
                                            float score_i_j      = probs_i_j.GetWeightAt(ix_targets_i_j);

                                            cost += (float)-Math.Log(score_i_j);

                                            probs_i_j.SetWeightAt(score_i_j - 1, ix_targets_i_j);
                                        }
                                        else
                                        {
                                            probs_i_j.CleanWeight();
                                        }
                                    }
                                }
                            }

                            decOutput.CopyWeightsToGradients(probs);
                        }
                        //if (isTraining)
                        //{
                        //    var leftShiftInputSeqs = ParallelCorpus.LeftShiftSnts(tgtSeqs, ParallelCorpus.EOS);
                        //    int[] targetIds = new int[batchSize * tgtSeqLen];
                        //    int ids = 0;
                        //    for (int i = 0; i < batchSize; i++)
                        //    {
                        //        for (int j = 0; j < tgtSeqLen; j++)
                        //        {
                        //            targetIds[ids] = j < tgtOriginalLengths[i] ? m_modelMetaData.Vocab.GetTargetWordIndex(leftShiftInputSeqs[i][j], logUnk: true) : -1;
                        //            ids++;
                        //        }
                        //    }

                        //    cost += g.UpdateCost(probs, targetIds);
                        //    decOutput.CopyWeightsToGradients(probs);
                        //}
                        else
                        {
                            // Output "i"th target word
                            int[]         targetIdx   = g.Argmax(probs, 1);
                            List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());

                            for (int i = 0; i < batchSize; i++)
                            {
                                tgtSeqs[i].Add(targetWords[i * tgtSeqLen + tgtSeqLen - 1]);
                            }
                        }
                    }
                }
            }



            return(cost);
        }
示例#19
0
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSentences">In training mode, they are golden target sentences, otherwise, they are target sentences generated by the decoder</param>
        /// <param name="g"></param>
        /// <param name="encodedOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="decoderFFLayer"></param>
        /// <param name="embedding"></param>
        /// <returns></returns>
        private float Decode(List <List <string> > outputSentences, IComputeGraph g, IWeightTensor encodedOutputs, AttentionDecoder decoder, IWeightTensor embedding,
                             int batchSize, bool isTraining = true)
        {
            float cost = 0.0f;

            int[] ix_inputs = new int[batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = (int)SENTTAGS.START;
            }

            // Initialize variables accoridng to current mode
            List <int>    originalOutputLengths = isTraining ? ParallelCorpus.PadSentences(outputSentences) : null;
            int           seqLen       = isTraining ? outputSentences[0].Count : 64;
            float         dropoutRatio = isTraining ? m_dropoutRatio : 0.0f;
            HashSet <int> setEndSentId = isTraining ? null : new HashSet <int>();

            if (!isTraining)
            {
                if (outputSentences.Count != 0)
                {
                    throw new ArgumentException($"The list for output sentences must be empty if current is not in training mode.");
                }
                for (int i = 0; i < batchSize; i++)
                {
                    outputSentences.Add(new List <string>());
                }
            }

            // Pre-process for attention model
            AttentionPreProcessResult attPreProcessResult = decoder.PreProcess(encodedOutputs, batchSize, g);

            for (int i = 0; i < seqLen; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int j = 0; j < batchSize; j++)
                {
                    inputs.Add(g.PeekRow(embedding, ix_inputs[j]));
                }
                IWeightTensor inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                IWeightTensor eOutput = decoder.Decode(inputsM, attPreProcessResult, batchSize, g);

                //Softmax for output
                using (IWeightTensor probs = g.Softmax(eOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        //Calculate loss for each word in the batch
                        for (int k = 0; k < batchSize; k++)
                        {
                            using (IWeightTensor probs_k = g.PeekRow(probs, k, runGradients: false))
                            {
                                int   ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outputSentences[k][i]);
                                float score_k      = probs_k.GetWeightAt(ix_targets_k);
                                if (i < originalOutputLengths[k])
                                {
                                    cost += (float)-Math.Log(score_k);
                                }

                                probs_k.SetWeightAt(score_k - 1, ix_targets_k);
                                ix_inputs[k] = ix_targets_k;
                            }
                        }
                        eOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());
                        for (int j = 0; j < targetWords.Count; j++)
                        {
                            if (setEndSentId.Contains(j) == false)
                            {
                                outputSentences[j].Add(targetWords[j]);

                                if (targetWords[j] == ParallelCorpus.EOS)
                                {
                                    setEndSentId.Add(j);
                                }
                            }
                        }

                        ix_inputs = targetIdx;
                    }
                }

                if (isTraining)
                {
                    ////Hacky: Run backward for last feed forward layer and dropout layer in order to save memory usage, since it's not time sequence dependency
                    g.RunTopBackward();
                    if (m_dropoutRatio > 0.0f)
                    {
                        g.RunTopBackward();
                    }
                }
                else
                {
                    if (setEndSentId.Count == batchSize)
                    {
                        // All target sentences in current batch are finished, so we exit.
                        break;
                    }
                }
            }

            return(cost);
        }
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSentences"></param>
        /// <param name="g"></param>
        /// <param name="encodedOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="Whd"></param>
        /// <param name="bd"></param>
        /// <param name="Embedding"></param>
        /// <param name="predictSentence"></param>
        /// <returns></returns>
        private float Decode(List <List <string> > outputSentences, IComputeGraph g, IWeightMatrix encodedOutputs, AttentionDecoder decoder, FeedForwardLayer decoderFFLayer, IWeightMatrix Embedding, out List <List <string> > predictSentence)
        {
            predictSentence = null;
            float cost = 0.0f;
            var   attPreProcessResult = decoder.PreProcess(encodedOutputs, g);

            var originalOutputLengths = PadSentences(outputSentences);
            int seqLen = outputSentences[0].Count;

            int[] ix_inputs  = new int[m_batchSize];
            int[] ix_targets = new int[m_batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = (int)SENTTAGS.START;
            }

            for (int i = 0; i < seqLen + 1; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightMatrix> inputs = new List <IWeightMatrix>();
                for (int j = 0; j < m_batchSize; j++)
                {
                    List <string> OutputSentence = outputSentences[j];

                    ix_targets[j] = (int)SENTTAGS.UNK;
                    if (i >= seqLen)
                    {
                        ix_targets[j] = (int)SENTTAGS.END;
                    }
                    else
                    {
                        if (m_tgtWordToIndex.ContainsKey(OutputSentence[i]))
                        {
                            ix_targets[j] = m_tgtWordToIndex[OutputSentence[i]];
                        }
                    }

                    var x = g.PeekRow(Embedding, ix_inputs[j]);

                    inputs.Add(x);
                }

                var inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                var eOutput = decoder.Decode(inputsM, attPreProcessResult, g);
                if (m_dropoutRatio > 0.0f)
                {
                    eOutput = g.Dropout(eOutput, m_dropoutRatio);
                }

                var o = decoderFFLayer.Process(eOutput, g);

                //Softmax for output
//                var o = g.MulAdd(eOutput, Whd, bds);
                var probs = g.Softmax(o, false);

                o.ReleaseWeight();

                //Calculate loss for each word in the batch
                List <IWeightMatrix> probs_g = g.UnFolderRow(probs, m_batchSize, false);
                for (int k = 0; k < m_batchSize; k++)
                {
                    var probs_k = probs_g[k];
                    var score_k = probs_k.GetWeightAt(ix_targets[k]);

                    if (i < originalOutputLengths[k] + 1)
                    {
                        cost += (float)-Math.Log(score_k);
                    }

                    probs_k.SetWeightAt(score_k - 1, ix_targets[k]);

                    ix_inputs[k] = ix_targets[k];
                    probs_k.Dispose();
                }

                o.SetGradientByWeight(probs);

                //Hacky: Run backward for last feed forward layer and dropout layer in order to save memory usage, since it's not time sequence dependency
                g.RunTopBackward();
                g.RunTopBackward();
                if (m_dropoutRatio > 0.0f)
                {
                    g.RunTopBackward();
                }
            }

            return(cost);
        }