private void InitWeights()
        {
            Logger.WriteLine($"Initializing weights...");

            m_srcEmbedding = new IWeightMatrix[m_deviceIds.Length];
            m_tgtEmbedding = new IWeightMatrix[m_deviceIds.Length];

            m_biEncoder = new BiEncoder[m_deviceIds.Length];
            m_decoder   = new AttentionDecoder[m_deviceIds.Length];

            m_decoderFFLayer = new FeedForwardLayer[m_deviceIds.Length];

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                Logger.WriteLine($"Initializing weights for device '{m_deviceIds[i]}'");
                if (m_archType == ArchTypeEnums.GPU_CUDA)
                {
                    //m_Whd[i] = new WeightTensor(HiddenSize, m_tgtIndexToWord.Count + 3, m_deviceIds[i], true);
                    //m_bd[i] = new WeightTensor(1, m_tgtIndexToWord.Count + 3, 0, m_deviceIds[i]);

                    m_srcEmbedding[i] = new WeightTensor(m_srcIndexToWord.Count, WordVectorSize, m_deviceIds[i], true);
                    m_tgtEmbedding[i] = new WeightTensor(m_tgtIndexToWord.Count + 3, WordVectorSize, m_deviceIds[i], true);
                }
                else
                {
                    //m_Whd[i] = new WeightMatrix(HiddenSize, m_tgtIndexToWord.Count + 3, true);
                    //m_bd[i] = new WeightMatrix(1, m_tgtIndexToWord.Count + 3, 0);

                    m_srcEmbedding[i] = new WeightMatrix(m_srcIndexToWord.Count, WordVectorSize, true);
                    m_tgtEmbedding[i] = new WeightMatrix(m_tgtIndexToWord.Count + 3, WordVectorSize, true);
                }

                Logger.WriteLine($"Initializing encoders and decoders for device '{m_deviceIds[i]}'...");

                m_biEncoder[i] = new BiEncoder(m_batchSize, HiddenSize, WordVectorSize, Depth, m_archType, m_deviceIds[i]);
                m_decoder[i]   = new AttentionDecoder(m_batchSize, HiddenSize, WordVectorSize, HiddenSize * 2, Depth, m_archType, m_deviceIds[i]);

                m_decoderFFLayer[i] = new FeedForwardLayer(HiddenSize, m_tgtIndexToWord.Count + 3, m_archType, m_deviceIds[i]);
            }

            InitWeightsFactory();
        }
Beispiel #2
0
        private (IEncoder[], AttentionDecoder[]) CreateEncoderDecoder()
        {
            Logger.WriteLine($"Creating encoders and decoders...");

            IEncoder[]         encoder = new IEncoder[m_deviceIds.Length];
            AttentionDecoder[] decoder = new AttentionDecoder[m_deviceIds.Length];

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                if (m_encoderType == EncoderTypeEnums.BiLSTM)
                {
                    encoder[i] = new BiEncoder("BiLSTMEncoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]);
                    decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim * 2, m_decoderLayerDepth, m_deviceIds[i]);
                }
                else
                {
                    encoder[i] = new TransformerEncoder("TransformerEncoder", m_batchSize, m_multiHeadNum, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]);
                    decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim, m_decoderLayerDepth, m_deviceIds[i]);
                }
            }

            return(encoder, decoder);
        }
        /// <summary>
        /// Encode source sentences and output encoded weights
        /// </summary>
        /// <param name="g"></param>
        /// <param name="inputSentences"></param>
        /// <param name="encoder"></param>
        /// <param name="reversEncoder"></param>
        /// <param name="Embedding"></param>
        /// <returns></returns>
        private IWeightMatrix Encode(IComputeGraph g, List <List <string> > inputSentences, BiEncoder biEncoder, IWeightMatrix Embedding)
        {
            PadSentences(inputSentences);
            List <IWeightMatrix> forwardOutputs  = new List <IWeightMatrix>();
            List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>();

            int seqLen = inputSentences[0].Count;
            List <IWeightMatrix> forwardInput = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                for (int j = 0; j < inputSentences.Count; j++)
                {
                    var inputSentence = inputSentences[j];
                    int ix_source     = (int)SENTTAGS.UNK;
                    if (m_srcWordToIndex.ContainsKey(inputSentence[i]))
                    {
                        ix_source = m_srcWordToIndex[inputSentence[i]];
                    }
                    var x = g.PeekRow(Embedding, ix_source);
                    forwardInput.Add(x);
                }
            }

            var forwardInputsM = g.ConcatRows(forwardInput);
            List <IWeightMatrix> attResults = new List <IWeightMatrix>();

            for (int i = 0; i < seqLen; i++)
            {
                var emb_i = g.PeekRow(forwardInputsM, i * inputSentences.Count, inputSentences.Count);
                attResults.Add(emb_i);
            }

            var encodedOutput  = biEncoder.Encode(attResults, g);
            var encodedOutput2 = g.ConcatRows(encodedOutput);

            return(encodedOutput2);
        }