private void InitWeights() { Logger.WriteLine($"Initializing weights..."); m_srcEmbedding = new IWeightMatrix[m_deviceIds.Length]; m_tgtEmbedding = new IWeightMatrix[m_deviceIds.Length]; m_biEncoder = new BiEncoder[m_deviceIds.Length]; m_decoder = new AttentionDecoder[m_deviceIds.Length]; m_decoderFFLayer = new FeedForwardLayer[m_deviceIds.Length]; for (int i = 0; i < m_deviceIds.Length; i++) { Logger.WriteLine($"Initializing weights for device '{m_deviceIds[i]}'"); if (m_archType == ArchTypeEnums.GPU_CUDA) { //m_Whd[i] = new WeightTensor(HiddenSize, m_tgtIndexToWord.Count + 3, m_deviceIds[i], true); //m_bd[i] = new WeightTensor(1, m_tgtIndexToWord.Count + 3, 0, m_deviceIds[i]); m_srcEmbedding[i] = new WeightTensor(m_srcIndexToWord.Count, WordVectorSize, m_deviceIds[i], true); m_tgtEmbedding[i] = new WeightTensor(m_tgtIndexToWord.Count + 3, WordVectorSize, m_deviceIds[i], true); } else { //m_Whd[i] = new WeightMatrix(HiddenSize, m_tgtIndexToWord.Count + 3, true); //m_bd[i] = new WeightMatrix(1, m_tgtIndexToWord.Count + 3, 0); m_srcEmbedding[i] = new WeightMatrix(m_srcIndexToWord.Count, WordVectorSize, true); m_tgtEmbedding[i] = new WeightMatrix(m_tgtIndexToWord.Count + 3, WordVectorSize, true); } Logger.WriteLine($"Initializing encoders and decoders for device '{m_deviceIds[i]}'..."); m_biEncoder[i] = new BiEncoder(m_batchSize, HiddenSize, WordVectorSize, Depth, m_archType, m_deviceIds[i]); m_decoder[i] = new AttentionDecoder(m_batchSize, HiddenSize, WordVectorSize, HiddenSize * 2, Depth, m_archType, m_deviceIds[i]); m_decoderFFLayer[i] = new FeedForwardLayer(HiddenSize, m_tgtIndexToWord.Count + 3, m_archType, m_deviceIds[i]); } InitWeightsFactory(); }
private (IEncoder[], AttentionDecoder[]) CreateEncoderDecoder() { Logger.WriteLine($"Creating encoders and decoders..."); IEncoder[] encoder = new IEncoder[m_deviceIds.Length]; AttentionDecoder[] decoder = new AttentionDecoder[m_deviceIds.Length]; for (int i = 0; i < m_deviceIds.Length; i++) { if (m_encoderType == EncoderTypeEnums.BiLSTM) { encoder[i] = new BiEncoder("BiLSTMEncoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]); decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim * 2, m_decoderLayerDepth, m_deviceIds[i]); } else { encoder[i] = new TransformerEncoder("TransformerEncoder", m_batchSize, m_multiHeadNum, m_hiddenDim, m_embeddingDim, m_encoderLayerDepth, m_deviceIds[i]); decoder[i] = new AttentionDecoder("AttnLSTMDecoder", m_batchSize, m_hiddenDim, m_embeddingDim, m_hiddenDim, m_decoderLayerDepth, m_deviceIds[i]); } } return(encoder, decoder); }
/// <summary> /// Encode source sentences and output encoded weights /// </summary> /// <param name="g"></param> /// <param name="inputSentences"></param> /// <param name="encoder"></param> /// <param name="reversEncoder"></param> /// <param name="Embedding"></param> /// <returns></returns> private IWeightMatrix Encode(IComputeGraph g, List <List <string> > inputSentences, BiEncoder biEncoder, IWeightMatrix Embedding) { PadSentences(inputSentences); List <IWeightMatrix> forwardOutputs = new List <IWeightMatrix>(); List <IWeightMatrix> backwardOutputs = new List <IWeightMatrix>(); int seqLen = inputSentences[0].Count; List <IWeightMatrix> forwardInput = new List <IWeightMatrix>(); for (int i = 0; i < seqLen; i++) { for (int j = 0; j < inputSentences.Count; j++) { var inputSentence = inputSentences[j]; int ix_source = (int)SENTTAGS.UNK; if (m_srcWordToIndex.ContainsKey(inputSentence[i])) { ix_source = m_srcWordToIndex[inputSentence[i]]; } var x = g.PeekRow(Embedding, ix_source); forwardInput.Add(x); } } var forwardInputsM = g.ConcatRows(forwardInput); List <IWeightMatrix> attResults = new List <IWeightMatrix>(); for (int i = 0; i < seqLen; i++) { var emb_i = g.PeekRow(forwardInputsM, i * inputSentences.Count, inputSentences.Count); attResults.Add(emb_i); } var encodedOutput = biEncoder.Encode(attResults, g); var encodedOutput2 = g.ConcatRows(encodedOutput); return(encodedOutput2); }