public AttentionSeq2Seq(int inputSize, int hiddenSize, int depth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath, string srcEmbeddingFilePath, string tgtEmbeddingFilePath, bool useDropout, string modelFilePath, int batchSize, float dropoutRatio, ArchTypeEnums archType, int[] deviceIds) { CheckParameters(batchSize, archType, deviceIds); if (archType == ArchTypeEnums.GPU_CUDA) { TensorAllocator.InitDevices(deviceIds); SetDefaultDeviceIds(deviceIds.Length); } m_dropoutRatio = dropoutRatio; m_batchSize = batchSize; m_archType = archType; m_modelFilePath = modelFilePath; m_deviceIds = deviceIds; TrainCorpus = trainCorpus; Depth = depth; WordVectorSize = inputSize; HiddenSize = hiddenSize; //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false) { Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'..."); LoadVocab(srcVocabFilePath, tgtVocabFilePath); } else { Logger.WriteLine("Building vocabulary from training corpus..."); BuildVocab(trainCorpus); } //Initializng weights in encoders and decoders InitWeights(); for (int i = 0; i < m_deviceIds.Length; i++) { //If pre-trained embedding weights are speicifed, loading them from files if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false) { Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side."); LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex); } if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false) { Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side."); LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex); } } }
public AttentionSeq2Seq(int embeddingDim, int hiddenDim, int encoderLayerDepth, int decoderLayerDepth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath, string srcEmbeddingFilePath, string tgtEmbeddingFilePath, string modelFilePath, int batchSize, float dropoutRatio, int multiHeadNum, int warmupSteps, ArchTypeEnums archType, EncoderTypeEnums encoderType, int[] deviceIds) { TensorAllocator.InitDevices(archType, deviceIds); SetDefaultDeviceIds(deviceIds.Length); m_dropoutRatio = dropoutRatio; m_batchSize = batchSize; m_modelFilePath = modelFilePath; m_deviceIds = deviceIds; m_multiHeadNum = multiHeadNum; m_encoderType = encoderType; m_warmupSteps = warmupSteps + 1; TrainCorpus = trainCorpus; m_encoderLayerDepth = encoderLayerDepth; m_decoderLayerDepth = decoderLayerDepth; m_embeddingDim = embeddingDim; m_hiddenDim = hiddenDim; //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false) { Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'..."); LoadVocab(srcVocabFilePath, tgtVocabFilePath); } else { Logger.WriteLine("Building vocabulary from training corpus..."); BuildVocab(trainCorpus); } //Initializng weights in encoders and decoders CreateEncoderDecoderEmbeddings(); for (int i = 0; i < m_deviceIds.Length; i++) { //If pre-trained embedding weights are speicifed, loading them from files if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false) { Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side."); LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex); } if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false) { Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side."); LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex); } } }
public AttentionSeq2Seq(string modelFilePath, int batchSize, ArchTypeEnums archType, int[] deviceIds) { m_batchSize = batchSize; m_deviceIds = deviceIds; m_modelFilePath = modelFilePath; TensorAllocator.InitDevices(archType, deviceIds); SetDefaultDeviceIds(deviceIds.Length); Logger.WriteLine($"Loading model from '{modelFilePath}'..."); ModelAttentionMetaData modelMetaData = new ModelAttentionMetaData(); BinaryFormatter bf = new BinaryFormatter(); FileStream fs = new FileStream(m_modelFilePath, FileMode.Open, FileAccess.Read); modelMetaData = bf.Deserialize(fs) as ModelAttentionMetaData; m_clipvalue = modelMetaData.Clipval; m_encoderLayerDepth = modelMetaData.EncoderLayerDepth; m_decoderLayerDepth = modelMetaData.DecoderLayerDepth; m_hiddenDim = modelMetaData.HiddenDim; m_startLearningRate = modelMetaData.LearningRate; m_embeddingDim = modelMetaData.EmbeddingDim; m_multiHeadNum = modelMetaData.MultiHeadNum; m_encoderType = modelMetaData.EncoderType; m_regc = modelMetaData.Regc; m_dropoutRatio = modelMetaData.DropoutRatio; m_srcWordToIndex = modelMetaData.SrcWordToIndex; m_srcIndexToWord = modelMetaData.SrcIndexToWord; m_tgtWordToIndex = modelMetaData.TgtWordToIndex; m_tgtIndexToWord = modelMetaData.TgtIndexToWord; CreateEncoderDecoderEmbeddings(); m_encoder[m_encoderDefaultDeviceId].Load(fs); m_decoder[m_decoderDefaultDeviceId].Load(fs); m_srcEmbedding[m_srcEmbeddingDefaultDeviceId].Load(fs); m_tgtEmbedding[m_tgtEmbeddingDefaultDeviceId].Load(fs); m_decoderFFLayer[m_DecoderFFLayerDefaultDeviceId].Load(fs); fs.Close(); fs.Dispose(); }
public AttentionSeq2Seq(string modelFilePath, int batchSize, ArchTypeEnums archType, int[] deviceIds) { CheckParameters(batchSize, archType, deviceIds); if (archType == ArchTypeEnums.GPU_CUDA) { TensorAllocator.InitDevices(deviceIds); SetDefaultDeviceIds(deviceIds.Length); } m_archType = archType; m_deviceIds = deviceIds; Load(modelFilePath); InitWeightsFactory(); SetBatchSize(batchSize); }