public AttentionSeq2Seq(int inputSize, int hiddenSize, int depth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath, string srcEmbeddingFilePath, string tgtEmbeddingFilePath,
                                bool useDropout, string modelFilePath, int batchSize, float dropoutRatio, ArchTypeEnums archType, int[] deviceIds)
        {
            CheckParameters(batchSize, archType, deviceIds);
            if (archType == ArchTypeEnums.GPU_CUDA)
            {
                TensorAllocator.InitDevices(deviceIds);
                SetDefaultDeviceIds(deviceIds.Length);
            }

            m_dropoutRatio  = dropoutRatio;
            m_batchSize     = batchSize;
            m_archType      = archType;
            m_modelFilePath = modelFilePath;
            m_deviceIds     = deviceIds;

            TrainCorpus    = trainCorpus;
            Depth          = depth;
            WordVectorSize = inputSize;
            HiddenSize     = hiddenSize;

            //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus
            if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false)
            {
                Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'...");
                LoadVocab(srcVocabFilePath, tgtVocabFilePath);
            }
            else
            {
                Logger.WriteLine("Building vocabulary from training corpus...");
                BuildVocab(trainCorpus);
            }

            //Initializng weights in encoders and decoders
            InitWeights();

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                //If pre-trained embedding weights are speicifed, loading them from files
                if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side.");
                    LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex);
                }

                if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side.");
                    LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex);
                }
            }
        }
Esempio n. 2
0
        public AttentionSeq2Seq(int embeddingDim, int hiddenDim, int encoderLayerDepth, int decoderLayerDepth, Corpus trainCorpus, string srcVocabFilePath, string tgtVocabFilePath,
                                string srcEmbeddingFilePath, string tgtEmbeddingFilePath, string modelFilePath, int batchSize, float dropoutRatio, int multiHeadNum, int warmupSteps,
                                ArchTypeEnums archType, EncoderTypeEnums encoderType, int[] deviceIds)
        {
            TensorAllocator.InitDevices(archType, deviceIds);
            SetDefaultDeviceIds(deviceIds.Length);

            m_dropoutRatio  = dropoutRatio;
            m_batchSize     = batchSize;
            m_modelFilePath = modelFilePath;
            m_deviceIds     = deviceIds;
            m_multiHeadNum  = multiHeadNum;
            m_encoderType   = encoderType;
            m_warmupSteps   = warmupSteps + 1;

            TrainCorpus         = trainCorpus;
            m_encoderLayerDepth = encoderLayerDepth;
            m_decoderLayerDepth = decoderLayerDepth;
            m_embeddingDim      = embeddingDim;
            m_hiddenDim         = hiddenDim;

            //If vocabulary files are specified, we load them from file, otherwise, we build them from training corpus
            if (String.IsNullOrEmpty(srcVocabFilePath) == false && String.IsNullOrEmpty(tgtVocabFilePath) == false)
            {
                Logger.WriteLine($"Loading vocabulary files from '{srcVocabFilePath}' and '{tgtVocabFilePath}'...");
                LoadVocab(srcVocabFilePath, tgtVocabFilePath);
            }
            else
            {
                Logger.WriteLine("Building vocabulary from training corpus...");
                BuildVocab(trainCorpus);
            }

            //Initializng weights in encoders and decoders
            CreateEncoderDecoderEmbeddings();

            for (int i = 0; i < m_deviceIds.Length; i++)
            {
                //If pre-trained embedding weights are speicifed, loading them from files
                if (String.IsNullOrEmpty(srcEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side.");
                    LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding[i], m_srcWordToIndex);
                }

                if (String.IsNullOrEmpty(tgtEmbeddingFilePath) == false)
                {
                    Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side.");
                    LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding[i], m_tgtWordToIndex);
                }
            }
        }
Esempio n. 3
0
        public AttentionSeq2Seq(string modelFilePath, int batchSize, ArchTypeEnums archType, int[] deviceIds)
        {
            m_batchSize     = batchSize;
            m_deviceIds     = deviceIds;
            m_modelFilePath = modelFilePath;

            TensorAllocator.InitDevices(archType, deviceIds);
            SetDefaultDeviceIds(deviceIds.Length);

            Logger.WriteLine($"Loading model from '{modelFilePath}'...");

            ModelAttentionMetaData modelMetaData = new ModelAttentionMetaData();
            BinaryFormatter        bf            = new BinaryFormatter();
            FileStream             fs            = new FileStream(m_modelFilePath, FileMode.Open, FileAccess.Read);

            modelMetaData = bf.Deserialize(fs) as ModelAttentionMetaData;

            m_clipvalue         = modelMetaData.Clipval;
            m_encoderLayerDepth = modelMetaData.EncoderLayerDepth;
            m_decoderLayerDepth = modelMetaData.DecoderLayerDepth;
            m_hiddenDim         = modelMetaData.HiddenDim;
            m_startLearningRate = modelMetaData.LearningRate;
            m_embeddingDim      = modelMetaData.EmbeddingDim;
            m_multiHeadNum      = modelMetaData.MultiHeadNum;
            m_encoderType       = modelMetaData.EncoderType;
            m_regc           = modelMetaData.Regc;
            m_dropoutRatio   = modelMetaData.DropoutRatio;
            m_srcWordToIndex = modelMetaData.SrcWordToIndex;
            m_srcIndexToWord = modelMetaData.SrcIndexToWord;
            m_tgtWordToIndex = modelMetaData.TgtWordToIndex;
            m_tgtIndexToWord = modelMetaData.TgtIndexToWord;

            CreateEncoderDecoderEmbeddings();

            m_encoder[m_encoderDefaultDeviceId].Load(fs);
            m_decoder[m_decoderDefaultDeviceId].Load(fs);

            m_srcEmbedding[m_srcEmbeddingDefaultDeviceId].Load(fs);
            m_tgtEmbedding[m_tgtEmbeddingDefaultDeviceId].Load(fs);

            m_decoderFFLayer[m_DecoderFFLayerDefaultDeviceId].Load(fs);

            fs.Close();
            fs.Dispose();
        }
        public AttentionSeq2Seq(string modelFilePath, int batchSize, ArchTypeEnums archType, int[] deviceIds)
        {
            CheckParameters(batchSize, archType, deviceIds);

            if (archType == ArchTypeEnums.GPU_CUDA)
            {
                TensorAllocator.InitDevices(deviceIds);
                SetDefaultDeviceIds(deviceIds.Length);
            }

            m_archType  = archType;
            m_deviceIds = deviceIds;

            Load(modelFilePath);
            InitWeightsFactory();

            SetBatchSize(batchSize);
        }