Example #1
0
        public IWeightMatrix Perform(IWeightMatrix state, AttentionPreProcessResult attenPreProcessResult, IComputeGraph g)
        {
            var bWas  = g.RepeatRows(bWa, state.Rows);
            var wc    = g.MulAdd(state, Wa, bWas);
            var wcs   = g.RepeatRows(wc, attenPreProcessResult.inputsUnfolder[0].Rows);
            var ggs   = g.AddTanh(attenPreProcessResult.uhs, wcs);
            var atten = g.Mul(ggs, V);

            List <IWeightMatrix> attens   = g.UnFolderRow(atten, m_batchSize);
            List <IWeightMatrix> contexts = new List <IWeightMatrix>();

            List <IWeightMatrix> attensT = new List <IWeightMatrix>();

            for (int i = 0; i < m_batchSize; i++)
            {
                attensT.Add(g.Transpose2(attens[i]));
            }

            var attenT       = g.ConcatRows(attensT);
            var attenSoftmax = g.SoftmaxM(attenT);

            for (int i = 0; i < m_batchSize; i++)
            {
                IWeightMatrix context = g.Mul(g.PeekRow(attenSoftmax, i), attenPreProcessResult.inputsUnfolder[i]);
                contexts.Add(context);
            }

            return(g.ConcatRows(contexts));
        }
Example #2
0
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSentences"></param>
        /// <param name="g"></param>
        /// <param name="encodedOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="Whd"></param>
        /// <param name="bd"></param>
        /// <param name="Embedding"></param>
        /// <param name="predictSentence"></param>
        /// <returns></returns>
        private float Decode(List <List <string> > outputSentences, IComputeGraph g, IWeightMatrix encodedOutputs, AttentionDecoder decoder,
                             IWeightMatrix Whd, IWeightMatrix bd, IWeightMatrix Embedding, out List <List <string> > predictSentence)
        {
            predictSentence = null;

            float cost = 0.0f;

            var attPreProcessResult = decoder.PreProcess(encodedOutputs, g);

            var originalOutputLengths = PadSentences(outputSentences);
            int seqLen = outputSentences[0].Count;

            int[] ix_inputs  = new int[m_batchSize];
            int[] ix_targets = new int[m_batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = (int)SENTTAGS.START;
            }

            var bds = g.RepeatRows(bd, m_batchSize);

            for (int i = 0; i < seqLen + 1; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightMatrix> inputs = new List <IWeightMatrix>();
                for (int j = 0; j < m_batchSize; j++)
                {
                    List <string> OutputSentence = outputSentences[j];

                    ix_targets[j] = (int)SENTTAGS.UNK;
                    if (i >= seqLen)
                    {
                        ix_targets[j] = (int)SENTTAGS.END;
                    }
                    else
                    {
                        if (m_tgtWordToIndex.ContainsKey(OutputSentence[i]))
                        {
                            ix_targets[j] = m_tgtWordToIndex[OutputSentence[i]];
                        }
                    }

                    var x = g.PeekRow(Embedding, ix_inputs[j]);

                    inputs.Add(x);
                }

                //Decode output sentence at position i
                var eOutput = decoder.Decode(g.ConcatRows(inputs), attPreProcessResult, g);
                if (m_dropoutRatio > 0.0f)
                {
                    eOutput = g.Dropout(eOutput, m_dropoutRatio);
                }

                //Softmax for output
                var o     = g.MulAdd2(eOutput, Whd, bds);
                var probs = g.SoftmaxM(o, false);

                o.ReleaseWeight();

                //Calculate loss for each word in the batch
                List <IWeightMatrix> probs_g = g.UnFolderRow(probs, m_batchSize, false);
                for (int k = 0; k < m_batchSize; k++)
                {
                    var probs_k = probs_g[k];
                    var score_k = probs_k.GetWeightAt(ix_targets[k]);

                    if (i < originalOutputLengths[k] + 1)
                    {
                        cost += (float)-Math.Log(score_k);
                    }

                    probs_k.SetWeightAt(score_k - 1, ix_targets[k]);

                    ix_inputs[k] = ix_targets[k];
                    probs_k.Dispose();
                }

                o.SetGradientByWeight(probs);
            }

            return(cost);
        }