Exemple #1
0
        public void Load(string modelFilePath)
        {
            EncodedModelFilePath = modelFilePath;

            ModelAttentionData tosave = new ModelAttentionData();
            BinaryFormatter    bf     = new BinaryFormatter();
            FileStream         fs     = new FileStream(EncodedModelFilePath, FileMode.Open, FileAccess.Read);

            tosave = bf.Deserialize(fs) as ModelAttentionData;
            fs.Close();
            fs.Dispose();


            this.bd             = tosave.bd;
            this.clipval        = tosave.clipval;
            this.decoder        = tosave.decoder;
            this.Depth          = tosave.Depth;
            this.encoder        = tosave.encoder;
            this.HiddenSize     = tosave.hidden_sizes;
            this.learning_rate  = tosave.learning_rate;
            this.WordVectorSize = tosave.letter_size;
            this.max_word       = 100;
            this.regc           = tosave.regc;
            this.reversEncoder  = tosave.ReversEncoder;
            this.UseDropout     = tosave.UseDropout;
            this.Whd            = tosave.Whd;
            this.s_Embedding    = tosave.s_Wil;
            this.s_wordToIndex  = tosave.s_wordToIndex;
            this.s_indexToWord  = tosave.s_indexToWord;

            this.t_Embedding   = tosave.t_Wil;
            this.t_wordToIndex = tosave.t_wordToIndex;
            this.t_indexToWord = tosave.t_indexToWord;
        }
        public void VisualizeNeuralNetwork(string visNNFilePath)
        {
            (IEncoder encoder, IDecoder decoder, IWeightTensor srcEmbedding, IWeightTensor tgtEmbedding) = GetNetworksOnDeviceAt(-1);
            // Build input sentence
            List <List <string> > inputSeqs = ParallelCorpus.ConstructInputTokens(null);
            int              batchSize      = inputSeqs.Count;
            IComputeGraph    g          = CreateComputGraph(m_defaultDeviceId, needBack: false, visNetwork: true);
            AttentionDecoder rnnDecoder = decoder as AttentionDecoder;

            encoder.Reset(g.GetWeightFactory(), batchSize);
            rnnDecoder.Reset(g.GetWeightFactory(), batchSize);

            // Run encoder
            IWeightTensor encodedWeightMatrix = Encode(g, inputSeqs, encoder, srcEmbedding, null, null);

            // Prepare for attention over encoder-decoder
            AttentionPreProcessResult attPreProcessResult = rnnDecoder.PreProcess(encodedWeightMatrix, batchSize, g);

            // Run decoder
            IWeightTensor x       = g.PeekRow(tgtEmbedding, (int)SENTTAGS.START);
            IWeightTensor eOutput = rnnDecoder.Decode(x, attPreProcessResult, batchSize, g);
            IWeightTensor probs   = g.Softmax(eOutput);

            g.VisualizeNeuralNetToFile(visNNFilePath);
        }
Exemple #3
0
        private void CleanWeightsCash(Encoder encoder, Encoder ReversEncoder, AttentionDecoder decoder, WeightMatrix Whd, WeightMatrix bd, WeightMatrix s_Embedding, WeightMatrix t_Embedding)
        {
            var model = encoder.getParams();

            model.AddRange(decoder.getParams());
            model.AddRange(ReversEncoder.getParams());
            model.Add(s_Embedding);
            model.Add(t_Embedding);
            model.Add(Whd);
            model.Add(bd);
            solver.CleanCash(model);
        }
Exemple #4
0
        private void UpdateParameters(Encoder encoder, Encoder ReversEncoder, AttentionDecoder decoder, WeightMatrix Whd, WeightMatrix bd, WeightMatrix s_Embedding, WeightMatrix t_Embedding)
        {
            var model = encoder.getParams();

            model.AddRange(decoder.getParams());
            model.AddRange(ReversEncoder.getParams());
            model.Add(s_Embedding);
            model.Add(t_Embedding);
            model.Add(Whd);
            model.Add(bd);
            solver.UpdateWeights(model, learning_rate, regc, clipval);
        }
Exemple #5
0
        public AttentionSeq2Seq(int inputSize, int hiddenSize, int depth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath, string srcEmbeddingFilePath, string tgtEmbeddingFilePath,
                                bool useSparseFeature, bool useDropout, string modelFilePath)
        {
            this.TrainCorpus = trainCorpus;
            this.Depth       = depth;
            // list of sizes of hidden layers
            WordVectorSize       = inputSize; // size of word embeddings.
            EncodedModelFilePath = modelFilePath;

            this.HiddenSize = hiddenSize;

            if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false)
            {
                Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'...");
                LoadVocab(srcVocabFilePath, tgtVocabFilePath);
            }
            else
            {
                Logger.WriteLine("Building vocabulary from training corpus...");
                BuildVocab(trainCorpus);
            }

            this.Whd = new WeightMatrix(HiddenSize, t_vocab.Count + 3, true);
            this.bd  = new WeightMatrix(1, t_vocab.Count + 3, 0);


            s_Embedding = new WeightMatrix(s_vocab.Count, WordVectorSize, true);
            t_Embedding = new WeightMatrix(t_vocab.Count + 3, WordVectorSize, true);

            if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false)
            {
                Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side.");
                LoadWordEmbedding(srcEmbeddingFilePath, s_Embedding, s_wordToIndex);
            }

            if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false)
            {
                Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side.");
                LoadWordEmbedding(tgtEmbeddingFilePath, t_Embedding, t_wordToIndex);
            }

            encoder       = new Encoder(HiddenSize, WordVectorSize, depth);
            reversEncoder = new Encoder(HiddenSize, WordVectorSize, depth);

            int sparseFeatureSize = useSparseFeature ? s_vocab.Count : 0;

            decoder = new AttentionDecoder(sparseFeatureSize, HiddenSize, WordVectorSize, depth);
        }
        private void InitWeights()
        {
            Logger.WriteLine($"Initializing weights...");

            m_srcEmbedding = new IWeightMatrix[m_deviceIds.Length];
            m_tgtEmbedding = new IWeightMatrix[m_deviceIds.Length];

            m_biEncoder = new BiEncoder[m_deviceIds.Length];
            m_decoder   = new AttentionDecoder[m_deviceIds.Length];

            m_decoderFFLayer = new FeedForwardLayer[m_deviceIds.Length];

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                Logger.WriteLine($"Initializing weights for device '{m_deviceIds[i]}'");
                if (m_archType == ArchTypeEnums.GPU_CUDA)
                {
                    //m_Whd[i] = new WeightTensor(HiddenSize, m_tgtIndexToWord.Count + 3, m_deviceIds[i], true);
                    //m_bd[i] = new WeightTensor(1, m_tgtIndexToWord.Count + 3, 0, m_deviceIds[i]);

                    m_srcEmbedding[i] = new WeightTensor(m_srcIndexToWord.Count, WordVectorSize, m_deviceIds[i], true);
                    m_tgtEmbedding[i] = new WeightTensor(m_tgtIndexToWord.Count + 3, WordVectorSize, m_deviceIds[i], true);
                }
                else
                {
                    //m_Whd[i] = new WeightMatrix(HiddenSize, m_tgtIndexToWord.Count + 3, true);
                    //m_bd[i] = new WeightMatrix(1, m_tgtIndexToWord.Count + 3, 0);

                    m_srcEmbedding[i] = new WeightMatrix(m_srcIndexToWord.Count, WordVectorSize, true);
                    m_tgtEmbedding[i] = new WeightMatrix(m_tgtIndexToWord.Count + 3, WordVectorSize, true);
                }

                Logger.WriteLine($"Initializing encoders and decoders for device '{m_deviceIds[i]}'...");

                m_biEncoder[i] = new BiEncoder(m_batchSize, HiddenSize, WordVectorSize, Depth, m_archType, m_deviceIds[i]);
                m_decoder[i]   = new AttentionDecoder(m_batchSize, HiddenSize, WordVectorSize, HiddenSize * 2, Depth, m_archType, m_deviceIds[i]);

                m_decoderFFLayer[i] = new FeedForwardLayer(HiddenSize, m_tgtIndexToWord.Count + 3, m_archType, m_deviceIds[i]);
            }

            InitWeightsFactory();
        }
Exemple #7
0
        private (IEncoder[], AttentionDecoder[]) CreateEncoderDecoder()
        {
            Logger.WriteLine($"Creating encoders and decoders...");

            IEncoder[]         encoder = new IEncoder[m_deviceIds.Length];
            AttentionDecoder[] decoder = new AttentionDecoder[m_deviceIds.Length];

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                if (m_encoderType == EncoderTypeEnums.BiLSTM)
                {
                    encoder[i] = new BiEncoder("BiLSTMEncoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]);
                    decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim * 2, m_decoderLayerDepth, m_deviceIds[i]);
                }
                else
                {
                    encoder[i] = new TransformerEncoder("TransformerEncoder", m_batchSize, m_multiHeadNum, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]);
                    decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim, m_decoderLayerDepth, m_deviceIds[i]);
                }
            }

            return(encoder, decoder);
        }
        /// <summary>
        /// Given input sentence and generate output sentence by seq2seq model with beam search
        /// </summary>
        /// <param name="input"></param>
        /// <param name="beamSearchSize"></param>
        /// <param name="maxOutputLength"></param>
        /// <returns></returns>
        public List <List <string> > Predict(List <string> input, int beamSearchSize = 1, int maxOutputLength = 100)
        {
            (IEncoder encoder, IDecoder decoder, IWeightTensor srcEmbedding, IWeightTensor tgtEmbedding) = GetNetworksOnDeviceAt(-1);
            List <List <string> > inputSeqs = ParallelCorpus.ConstructInputTokens(input);
            int batchSize = 1; // For predict with beam search, we currently only supports one sentence per call

            IComputeGraph    g          = CreateComputGraph(m_defaultDeviceId, needBack: false);
            AttentionDecoder rnnDecoder = decoder as AttentionDecoder;

            encoder.Reset(g.GetWeightFactory(), batchSize);
            rnnDecoder.Reset(g.GetWeightFactory(), batchSize);

            // Construct beam search status list
            List <BeamSearchStatus> bssList = new List <BeamSearchStatus>();
            BeamSearchStatus        bss     = new BeamSearchStatus();

            bss.OutputIds.Add((int)SENTTAGS.START);
            bss.CTs = rnnDecoder.GetCTs();
            bss.HTs = rnnDecoder.GetHTs();
            bssList.Add(bss);

            IWeightTensor             encodedWeightMatrix = Encode(g, inputSeqs, encoder, srcEmbedding, null, null);
            AttentionPreProcessResult attPreProcessResult = rnnDecoder.PreProcess(encodedWeightMatrix, batchSize, g);

            List <BeamSearchStatus> newBSSList = new List <BeamSearchStatus>();
            bool finished     = false;
            int  outputLength = 0;

            while (finished == false && outputLength < maxOutputLength)
            {
                finished = true;
                for (int i = 0; i < bssList.Count; i++)
                {
                    bss = bssList[i];
                    if (bss.OutputIds[bss.OutputIds.Count - 1] == (int)SENTTAGS.END)
                    {
                        newBSSList.Add(bss);
                    }
                    else if (bss.OutputIds.Count > maxOutputLength)
                    {
                        newBSSList.Add(bss);
                    }
                    else
                    {
                        finished = false;
                        int ix_input = bss.OutputIds[bss.OutputIds.Count - 1];
                        rnnDecoder.SetCTs(bss.CTs);
                        rnnDecoder.SetHTs(bss.HTs);

                        IWeightTensor x       = g.PeekRow(tgtEmbedding, ix_input);
                        IWeightTensor eOutput = rnnDecoder.Decode(x, attPreProcessResult, batchSize, g);
                        using (IWeightTensor probs = g.Softmax(eOutput))
                        {
                            List <int> preds = probs.GetTopNMaxWeightIdx(beamSearchSize);
                            for (int j = 0; j < preds.Count; j++)
                            {
                                BeamSearchStatus newBSS = new BeamSearchStatus();
                                newBSS.OutputIds.AddRange(bss.OutputIds);
                                newBSS.OutputIds.Add(preds[j]);

                                newBSS.CTs = rnnDecoder.GetCTs();
                                newBSS.HTs = rnnDecoder.GetHTs();

                                float score = probs.GetWeightAt(preds[j]);
                                newBSS.Score  = bss.Score;
                                newBSS.Score += (float)(-Math.Log(score));

                                //var lengthPenalty = Math.Pow((5.0f + newBSS.OutputIds.Count) / 6, 0.6);
                                //newBSS.Score /= (float)lengthPenalty;

                                newBSSList.Add(newBSS);
                            }
                        }
                    }
                }

                bssList = BeamSearch.GetTopNBSS(newBSSList, beamSearchSize);
                newBSSList.Clear();

                outputLength++;
            }

            // Convert output target word ids to real string
            List <List <string> > results = new List <List <string> >();

            for (int i = 0; i < bssList.Count; i++)
            {
                results.Add(m_modelMetaData.Vocab.ConvertTargetIdsToString(bssList[i].OutputIds));
            }

            return(results);
        }
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSnts">In training mode, they are golden target sentences, otherwise, they are target sentences generated by the decoder</param>
        /// <param name="g"></param>
        /// <param name="encOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="decoderFFLayer"></param>
        /// <param name="tgtEmbedding"></param>
        /// <returns></returns>
        private float DecodeAttentionLSTM(List <List <string> > outputSnts, IComputeGraph g, IWeightTensor encOutputs, AttentionDecoder decoder, IWeightTensor tgtEmbedding, int batchSize, bool isTraining = true)
        {
            float cost = 0.0f;

            int[] ix_inputs = new int[batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = m_modelMetaData.Vocab.GetTargetWordIndex(outputSnts[i][0]);
            }

            // Initialize variables accoridng to current mode
            List <int>    originalOutputLengths = isTraining ? ParallelCorpus.PadSentences(outputSnts) : null;
            int           seqLen       = isTraining ? outputSnts[0].Count : 64;
            float         dropoutRatio = isTraining ? m_dropoutRatio : 0.0f;
            HashSet <int> setEndSentId = isTraining ? null : new HashSet <int>();

            // Pre-process for attention model
            AttentionPreProcessResult attPreProcessResult = decoder.PreProcess(encOutputs, batchSize, g);

            for (int i = 1; i < seqLen; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int j = 0; j < batchSize; j++)
                {
                    inputs.Add(g.PeekRow(tgtEmbedding, ix_inputs[j]));
                }
                IWeightTensor inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                IWeightTensor eOutput = decoder.Decode(inputsM, attPreProcessResult, batchSize, g);

                //Softmax for output
                using (IWeightTensor probs = g.Softmax(eOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        //Calculate loss for each word in the batch
                        for (int k = 0; k < batchSize; k++)
                        {
                            using (IWeightTensor probs_k = g.PeekRow(probs, k, runGradients: false))
                            {
                                int   ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outputSnts[k][i]);
                                float score_k      = probs_k.GetWeightAt(ix_targets_k);
                                if (i < originalOutputLengths[k])
                                {
                                    cost += (float)-Math.Log(score_k);
                                }

                                probs_k.SetWeightAt(score_k - 1, ix_targets_k);
                                ix_inputs[k] = ix_targets_k;
                            }
                        }
                        eOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());
                        for (int j = 0; j < targetWords.Count; j++)
                        {
                            if (setEndSentId.Contains(j) == false)
                            {
                                outputSnts[j].Add(targetWords[j]);

                                if (targetWords[j] == ParallelCorpus.EOS)
                                {
                                    setEndSentId.Add(j);
                                }
                            }
                        }

                        if (setEndSentId.Count == batchSize)
                        {
                            // All target sentences in current batch are finished, so we exit.
                            break;
                        }

                        ix_inputs = targetIdx;
                    }
                }
            }

            return(cost);
        }
Exemple #10
0
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSentences">In training mode, they are golden target sentences, otherwise, they are target sentences generated by the decoder</param>
        /// <param name="g"></param>
        /// <param name="encodedOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="decoderFFLayer"></param>
        /// <param name="embedding"></param>
        /// <returns></returns>
        private float Decode(List <List <string> > outputSentences, IComputeGraph g, IWeightTensor encodedOutputs, AttentionDecoder decoder, IWeightTensor embedding,
                             int batchSize, bool isTraining = true)
        {
            float cost = 0.0f;

            int[] ix_inputs = new int[batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = (int)SENTTAGS.START;
            }

            // Initialize variables accoridng to current mode
            List <int>    originalOutputLengths = isTraining ? ParallelCorpus.PadSentences(outputSentences) : null;
            int           seqLen       = isTraining ? outputSentences[0].Count : 64;
            float         dropoutRatio = isTraining ? m_dropoutRatio : 0.0f;
            HashSet <int> setEndSentId = isTraining ? null : new HashSet <int>();

            if (!isTraining)
            {
                if (outputSentences.Count != 0)
                {
                    throw new ArgumentException($"The list for output sentences must be empty if current is not in training mode.");
                }
                for (int i = 0; i < batchSize; i++)
                {
                    outputSentences.Add(new List <string>());
                }
            }

            // Pre-process for attention model
            AttentionPreProcessResult attPreProcessResult = decoder.PreProcess(encodedOutputs, batchSize, g);

            for (int i = 0; i < seqLen; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightTensor> inputs = new List <IWeightTensor>();
                for (int j = 0; j < batchSize; j++)
                {
                    inputs.Add(g.PeekRow(embedding, ix_inputs[j]));
                }
                IWeightTensor inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                IWeightTensor eOutput = decoder.Decode(inputsM, attPreProcessResult, batchSize, g);

                //Softmax for output
                using (IWeightTensor probs = g.Softmax(eOutput, runGradients: false, inPlace: true))
                {
                    if (isTraining)
                    {
                        //Calculate loss for each word in the batch
                        for (int k = 0; k < batchSize; k++)
                        {
                            using (IWeightTensor probs_k = g.PeekRow(probs, k, runGradients: false))
                            {
                                int   ix_targets_k = m_modelMetaData.Vocab.GetTargetWordIndex(outputSentences[k][i]);
                                float score_k      = probs_k.GetWeightAt(ix_targets_k);
                                if (i < originalOutputLengths[k])
                                {
                                    cost += (float)-Math.Log(score_k);
                                }

                                probs_k.SetWeightAt(score_k - 1, ix_targets_k);
                                ix_inputs[k] = ix_targets_k;
                            }
                        }
                        eOutput.CopyWeightsToGradients(probs);
                    }
                    else
                    {
                        // Output "i"th target word
                        int[]         targetIdx   = g.Argmax(probs, 1);
                        List <string> targetWords = m_modelMetaData.Vocab.ConvertTargetIdsToString(targetIdx.ToList());
                        for (int j = 0; j < targetWords.Count; j++)
                        {
                            if (setEndSentId.Contains(j) == false)
                            {
                                outputSentences[j].Add(targetWords[j]);

                                if (targetWords[j] == ParallelCorpus.EOS)
                                {
                                    setEndSentId.Add(j);
                                }
                            }
                        }

                        ix_inputs = targetIdx;
                    }
                }

                if (isTraining)
                {
                    ////Hacky: Run backward for last feed forward layer and dropout layer in order to save memory usage, since it's not time sequence dependency
                    g.RunTopBackward();
                    if (m_dropoutRatio > 0.0f)
                    {
                        g.RunTopBackward();
                    }
                }
                else
                {
                    if (setEndSentId.Count == batchSize)
                    {
                        // All target sentences in current batch are finished, so we exit.
                        break;
                    }
                }
            }

            return(cost);
        }
        /// <summary>
        /// Decode output sentences in training
        /// </summary>
        /// <param name="outputSentences"></param>
        /// <param name="g"></param>
        /// <param name="encodedOutputs"></param>
        /// <param name="decoder"></param>
        /// <param name="Whd"></param>
        /// <param name="bd"></param>
        /// <param name="Embedding"></param>
        /// <param name="predictSentence"></param>
        /// <returns></returns>
        private float Decode(List <List <string> > outputSentences, IComputeGraph g, IWeightMatrix encodedOutputs, AttentionDecoder decoder, FeedForwardLayer decoderFFLayer, IWeightMatrix Embedding, out List <List <string> > predictSentence)
        {
            predictSentence = null;
            float cost = 0.0f;
            var   attPreProcessResult = decoder.PreProcess(encodedOutputs, g);

            var originalOutputLengths = PadSentences(outputSentences);
            int seqLen = outputSentences[0].Count;

            int[] ix_inputs  = new int[m_batchSize];
            int[] ix_targets = new int[m_batchSize];
            for (int i = 0; i < ix_inputs.Length; i++)
            {
                ix_inputs[i] = (int)SENTTAGS.START;
            }

            for (int i = 0; i < seqLen + 1; i++)
            {
                //Get embedding for all sentence in the batch at position i
                List <IWeightMatrix> inputs = new List <IWeightMatrix>();
                for (int j = 0; j < m_batchSize; j++)
                {
                    List <string> OutputSentence = outputSentences[j];

                    ix_targets[j] = (int)SENTTAGS.UNK;
                    if (i >= seqLen)
                    {
                        ix_targets[j] = (int)SENTTAGS.END;
                    }
                    else
                    {
                        if (m_tgtWordToIndex.ContainsKey(OutputSentence[i]))
                        {
                            ix_targets[j] = m_tgtWordToIndex[OutputSentence[i]];
                        }
                    }

                    var x = g.PeekRow(Embedding, ix_inputs[j]);

                    inputs.Add(x);
                }

                var inputsM = g.ConcatRows(inputs);

                //Decode output sentence at position i
                var eOutput = decoder.Decode(inputsM, attPreProcessResult, g);
                if (m_dropoutRatio > 0.0f)
                {
                    eOutput = g.Dropout(eOutput, m_dropoutRatio);
                }

                var o = decoderFFLayer.Process(eOutput, g);

                //Softmax for output
//                var o = g.MulAdd(eOutput, Whd, bds);
                var probs = g.Softmax(o, false);

                o.ReleaseWeight();

                //Calculate loss for each word in the batch
                List <IWeightMatrix> probs_g = g.UnFolderRow(probs, m_batchSize, false);
                for (int k = 0; k < m_batchSize; k++)
                {
                    var probs_k = probs_g[k];
                    var score_k = probs_k.GetWeightAt(ix_targets[k]);

                    if (i < originalOutputLengths[k] + 1)
                    {
                        cost += (float)-Math.Log(score_k);
                    }

                    probs_k.SetWeightAt(score_k - 1, ix_targets[k]);

                    ix_inputs[k] = ix_targets[k];
                    probs_k.Dispose();
                }

                o.SetGradientByWeight(probs);

                //Hacky: Run backward for last feed forward layer and dropout layer in order to save memory usage, since it's not time sequence dependency
                g.RunTopBackward();
                g.RunTopBackward();
                if (m_dropoutRatio > 0.0f)
                {
                    g.RunTopBackward();
                }
            }

            return(cost);
        }
Exemple #12
0
 private void Reset(Encoder encoder, Encoder reversEncoder, AttentionDecoder decoder)
 {
     encoder.Reset();
     reversEncoder.Reset();
     decoder.Reset();
 }
Exemple #13
0
        private float DecodeOutput(string[] OutputSentence, IComputeGraph g, float cost, SparseWeightMatrix sparseInput, List <WeightMatrix> encoded, AttentionDecoder decoder, WeightMatrix Whd, WeightMatrix bd, WeightMatrix Embedding)
        {
            int ix_input = (int)SENTTAGS.START;

            for (int i = 0; i < OutputSentence.Length + 1; i++)
            {
                int ix_target = (int)SENTTAGS.UNK;
                if (i == OutputSentence.Length)
                {
                    ix_target = (int)SENTTAGS.END;
                }
                else
                {
                    if (t_wordToIndex.ContainsKey(OutputSentence[i]))
                    {
                        ix_target = t_wordToIndex[OutputSentence[i]];
                    }
                }


                var x       = g.PeekRow(Embedding, ix_input);
                var eOutput = decoder.Decode(sparseInput, x, encoded, g);
                if (UseDropout)
                {
                    eOutput = g.Dropout(eOutput, 0.2f);
                }
                var o = g.muladd(eOutput, Whd, bd);
                if (UseDropout)
                {
                    o = g.Dropout(o, 0.2f);
                }

                var probs = g.SoftmaxWithCrossEntropy(o);
                cost += (float)-Math.Log(probs.Weight[ix_target]);

                o.Gradient             = probs.Weight;
                o.Gradient[ix_target] -= 1;
                ix_input = ix_target;
            }
            return(cost);
        }
Exemple #14
0
 private void Reset(IWeightFactory weightFactory, Encoder encoder, Encoder reversEncoder, AttentionDecoder decoder)
 {
     encoder.Reset(weightFactory);
     reversEncoder.Reset(weightFactory);
     decoder.Reset(weightFactory);
 }
Exemple #15
0
        private float UpdateParameters(float learningRate, Encoder encoder, Encoder ReversEncoder, AttentionDecoder decoder,
                                       IWeightMatrix Whd, IWeightMatrix bd, IWeightMatrix s_Embedding, IWeightMatrix t_Embedding, int batchSize)
        {
            var model = encoder.getParams();

            model.AddRange(decoder.getParams());
            model.AddRange(ReversEncoder.getParams());
            model.Add(s_Embedding);
            model.Add(t_Embedding);
            model.Add(Whd);
            model.Add(bd);
            return(m_solver.UpdateWeights(model, batchSize, learningRate, m_regc, m_clipvalue, m_archType));
        }