public SeqLabel(SeqLabelOptions options, Vocab srcVocab = null, Vocab clsVocab = null) : base(options.DeviceIds, options.ProcessorType, options.ModelFilePath, options.MemoryUsageRatio, options.CompilerOptions, options.ValidIntervalHours, updateFreq: options.UpdateFreq) { m_shuffleType = options.ShuffleType; m_options = options; // Model must exist if current task is not for training if ((m_options.Task != ModeEnums.Train) && !File.Exists(m_options.ModelFilePath)) { throw new FileNotFoundException($"Model '{m_options.ModelFilePath}' doesn't exist."); } if (File.Exists(m_options.ModelFilePath)) { if (srcVocab != null || clsVocab != null) { throw new ArgumentException($"Model '{m_options.ModelFilePath}' exists and it includes vocabulary, so input vocabulary must be null."); } // Model file exists, so we load it from file. m_modelMetaData = LoadModelImpl_WITH_CONVERT(CreateTrainableParameters); //m_modelMetaData = LoadModelImpl(); //---LoadModel_As_BinaryFormatter( CreateTrainableParameters ); } else { // Model doesn't exist, we create it and initlaize parameters m_modelMetaData = new SeqLabelModel(options.HiddenSize, options.EmbeddingDim, options.EncoderLayerDepth, options.MultiHeadNum, options.EncoderType, srcVocab, clsVocab, options.MaxSegmentNum); //Initializng weights in encoders and decoders CreateTrainableParameters(m_modelMetaData); } m_modelMetaData.ShowModelInfo(); }
public ParallelCorpus(string corpusFilePath, string srcLangName, string tgtLangName, int batchSize, int shuffleBlockSize = -1, int maxSrcSentLength = 32, int maxTgtSentLength = 32, ShuffleEnums shuffleEnums = ShuffleEnums.Random, TooLongSequence tooLongSequence = TooLongSequence.Ignore) { Logger.WriteLine($"Loading parallel corpus from '{corpusFilePath}' for source side '{srcLangName}' and target side '{tgtLangName}' MaxSrcSentLength = '{maxSrcSentLength}', MaxTgtSentLength = '{maxTgtSentLength}', aggregateSrcLengthForShuffle = '{shuffleEnums}', TooLongSequence = '{tooLongSequence}'"); m_batchSize = batchSize; m_blockSize = shuffleBlockSize; m_maxSrcSentLength = maxSrcSentLength; m_maxTgtSentLength = maxTgtSentLength; m_tooLongSequence = tooLongSequence; m_shuffleEnums = shuffleEnums; CorpusName = corpusFilePath; m_srcFileList = new List <string>(); m_tgtFileList = new List <string>(); string[] files = Directory.GetFiles(corpusFilePath, $"*.*", SearchOption.TopDirectoryOnly); Dictionary <string, string> srcKey2FileName = new Dictionary <string, string>(); Dictionary <string, string> tgtKey2FileName = new Dictionary <string, string>(); string srcSuffix = $".{srcLangName}.snt"; string tgtSuffix = $".{tgtLangName}.snt"; foreach (string file in files) { if (file.EndsWith(srcSuffix, StringComparison.InvariantCultureIgnoreCase)) { string srcKey = file.Substring(0, file.Length - srcSuffix.Length); srcKey2FileName.Add(srcKey, file); Logger.WriteLine($"Add source file '{file}' to key '{srcKey}'"); } if (file.EndsWith(tgtSuffix, StringComparison.InvariantCultureIgnoreCase)) { string tgtKey = file.Substring(0, file.Length - tgtSuffix.Length); tgtKey2FileName.Add(tgtKey, file); Logger.WriteLine($"Add target file '{file}' to key '{tgtKey}'"); } } foreach (var pair in srcKey2FileName) { m_srcFileList.Add(pair.Value); m_tgtFileList.Add(tgtKey2FileName[pair.Key]); } }
public AttentionSeq2Seq(string modelFilePath, ProcessorTypeEnums processorType, int[] deviceIds, float dropoutRatio = 0.0f, bool isSrcEmbTrainable = true, bool isTgtEmbTrainable = true, bool isEncoderTrainable = true, bool isDecoderTrainable = true, int maxSrcSntSize = 128, int maxTgtSntSize = 128, float memoryUsageRatio = 0.9f, ShuffleEnums shuffleType = ShuffleEnums.Random, string[] compilerOptions = null) : base(deviceIds, processorType, modelFilePath, memoryUsageRatio, compilerOptions) { m_dropoutRatio = dropoutRatio; m_isSrcEmbTrainable = isSrcEmbTrainable; m_isTgtEmbTrainable = isTgtEmbTrainable; m_isEncoderTrainable = isEncoderTrainable; m_isDecoderTrainable = isDecoderTrainable; m_maxSrcSntSize = maxSrcSntSize; m_maxTgtSntSize = maxTgtSntSize; m_shuffleType = shuffleType; m_modelMetaData = LoadModel(CreateTrainableParameters) as Seq2SeqModelMetaData; }
public SeqLabelingCorpus(string corpusFilePath, int batchSize, int shuffleBlockSize = -1, int maxSentLength = 128, ShuffleEnums shuffleEnums = ShuffleEnums.Random) { Logger.WriteLine($"Loading sequence labeling corpus from '{corpusFilePath}' MaxSentLength = '{maxSentLength}'"); m_batchSize = batchSize; m_blockSize = shuffleBlockSize; m_maxSrcSentLength = maxSentLength; m_maxTgtSentLength = maxSentLength; m_shuffleEnums = shuffleEnums; m_srcFileList = new List <string>(); m_tgtFileList = new List <string>(); (string srcFilePath, string tgtFilePath) = ConvertSequenceLabelingFormatToParallel(corpusFilePath); m_srcFileList.Add(srcFilePath); m_tgtFileList.Add(tgtFilePath); }
public AttentionSeq2Seq(int srcEmbeddingDim, int tgtEmbeddingDim, int hiddenDim, int encoderLayerDepth, int decoderLayerDepth, Vocab vocab, string srcEmbeddingFilePath, string tgtEmbeddingFilePath, string modelFilePath, float dropoutRatio, int multiHeadNum, ProcessorTypeEnums processorType, EncoderTypeEnums encoderType, DecoderTypeEnums decoderType, bool enableCoverageModel, int[] deviceIds, bool isSrcEmbTrainable = true, bool isTgtEmbTrainable = true, bool isEncoderTrainable = true, bool isDecoderTrainable = true, int maxSrcSntSize = 128, int maxTgtSntSize = 128, float memoryUsageRatio = 0.9f, ShuffleEnums shuffleType = ShuffleEnums.Random, string[] compilerOptions = null) : base(deviceIds, processorType, modelFilePath, memoryUsageRatio, compilerOptions) { m_modelMetaData = new Seq2SeqModelMetaData(hiddenDim, srcEmbeddingDim, tgtEmbeddingDim, encoderLayerDepth, decoderLayerDepth, multiHeadNum, encoderType, decoderType, vocab, enableCoverageModel); m_dropoutRatio = dropoutRatio; m_isSrcEmbTrainable = isSrcEmbTrainable; m_isTgtEmbTrainable = isTgtEmbTrainable; m_isEncoderTrainable = isEncoderTrainable; m_isDecoderTrainable = isDecoderTrainable; m_maxSrcSntSize = maxSrcSntSize; m_maxTgtSntSize = maxTgtSntSize; m_shuffleType = shuffleType; //Initializng weights in encoders and decoders CreateTrainableParameters(m_modelMetaData); // Load external embedding from files for (int i = 0; i < DeviceIds.Length; i++) { //If pre-trained embedding weights are speicifed, loading them from files if (!string.IsNullOrEmpty(srcEmbeddingFilePath)) { Logger.WriteLine($"Loading ExtEmbedding model from '{srcEmbeddingFilePath}' for source side."); LoadWordEmbedding(srcEmbeddingFilePath, m_srcEmbedding.GetNetworkOnDevice(i), m_modelMetaData.Vocab.SrcWordToIndex); } if (!string.IsNullOrEmpty(tgtEmbeddingFilePath)) { Logger.WriteLine($"Loading ExtEmbedding model from '{tgtEmbeddingFilePath}' for target side."); LoadWordEmbedding(tgtEmbeddingFilePath, m_tgtEmbedding.GetNetworkOnDevice(i), m_modelMetaData.Vocab.TgtWordToIndex); } } }
private static void Main(string[] args) { try { Logger.LogFile = $"{nameof(Seq2SeqConsole)}_{GetTimeStamp(DateTime.Now)}.log"; ShowOptions(args); //Parse command line Options opts = new Options(); ArgParser argParser = new ArgParser(args, opts); if (string.IsNullOrEmpty(opts.ConfigFilePath) == false) { Logger.WriteLine($"Loading config file from '{opts.ConfigFilePath}'"); opts = JsonConvert.DeserializeObject <Options>(File.ReadAllText(opts.ConfigFilePath)); } AttentionSeq2Seq ss = null; ProcessorTypeEnums processorType = (ProcessorTypeEnums)Enum.Parse(typeof(ProcessorTypeEnums), opts.ProcessorType); EncoderTypeEnums encoderType = (EncoderTypeEnums)Enum.Parse(typeof(EncoderTypeEnums), opts.EncoderType); DecoderTypeEnums decoderType = (DecoderTypeEnums)Enum.Parse(typeof(DecoderTypeEnums), opts.DecoderType); ModeEnums mode = (ModeEnums)Enum.Parse(typeof(ModeEnums), opts.TaskName); ShuffleEnums shuffleType = (ShuffleEnums)Enum.Parse(typeof(ShuffleEnums), opts.ShuffleType); string[] cudaCompilerOptions = String.IsNullOrEmpty(opts.CompilerOptions) ? null : opts.CompilerOptions.Split(' ', StringSplitOptions.RemoveEmptyEntries); //Parse device ids from options int[] deviceIds = opts.DeviceIds.Split(',').Select(x => int.Parse(x)).ToArray(); if (mode == ModeEnums.Train) { // Load train corpus ParallelCorpus trainCorpus = new ParallelCorpus(corpusFilePath: opts.TrainCorpusPath, srcLangName: opts.SrcLang, tgtLangName: opts.TgtLang, batchSize: opts.BatchSize, shuffleBlockSize: opts.ShuffleBlockSize, maxSrcSentLength: opts.MaxSrcSentLength, maxTgtSentLength: opts.MaxTgtSentLength, shuffleEnums: shuffleType); // Load valid corpus ParallelCorpus validCorpus = string.IsNullOrEmpty(opts.ValidCorpusPath) ? null : new ParallelCorpus(opts.ValidCorpusPath, opts.SrcLang, opts.TgtLang, opts.ValBatchSize, opts.ShuffleBlockSize, opts.MaxSrcSentLength, opts.MaxTgtSentLength); // Create learning rate ILearningRate learningRate = new DecayLearningRate(opts.StartLearningRate, opts.WarmUpSteps, opts.WeightsUpdateCount); // Create optimizer AdamOptimizer optimizer = new AdamOptimizer(opts.GradClip, opts.Beta1, opts.Beta2); // Create metrics List <IMetric> metrics = new List <IMetric> { new BleuMetric(), new LengthRatioMetric() }; if (!String.IsNullOrEmpty(opts.ModelFilePath) && File.Exists(opts.ModelFilePath)) { //Incremental training Logger.WriteLine($"Loading model from '{opts.ModelFilePath}'..."); ss = new AttentionSeq2Seq(modelFilePath: opts.ModelFilePath, processorType: processorType, dropoutRatio: opts.DropoutRatio, deviceIds: deviceIds, isSrcEmbTrainable: opts.IsSrcEmbeddingTrainable, isTgtEmbTrainable: opts.IsTgtEmbeddingTrainable, isEncoderTrainable: opts.IsEncoderTrainable, isDecoderTrainable: opts.IsDecoderTrainable, maxSrcSntSize: opts.MaxSrcSentLength, maxTgtSntSize: opts.MaxTgtSentLength, memoryUsageRatio: opts.MemoryUsageRatio, shuffleType: shuffleType, compilerOptions: cudaCompilerOptions); } else { // Load or build vocabulary Vocab vocab = null; if (!string.IsNullOrEmpty(opts.SrcVocab) && !string.IsNullOrEmpty(opts.TgtVocab)) { // Vocabulary files are specified, so we load them vocab = new Vocab(opts.SrcVocab, opts.TgtVocab); } else { // We don't specify vocabulary, so we build it from train corpus vocab = new Vocab(trainCorpus); } //New training ss = new AttentionSeq2Seq(embeddingDim: opts.WordVectorSize, hiddenDim: opts.HiddenSize, encoderLayerDepth: opts.EncoderLayerDepth, decoderLayerDepth: opts.DecoderLayerDepth, srcEmbeddingFilePath: opts.SrcEmbeddingModelFilePath, tgtEmbeddingFilePath: opts.TgtEmbeddingModelFilePath, vocab: vocab, modelFilePath: opts.ModelFilePath, dropoutRatio: opts.DropoutRatio, processorType: processorType, deviceIds: deviceIds, multiHeadNum: opts.MultiHeadNum, encoderType: encoderType, decoderType: decoderType, maxSrcSntSize: opts.MaxSrcSentLength, maxTgtSntSize: opts.MaxTgtSentLength, enableCoverageModel: opts.EnableCoverageModel, memoryUsageRatio: opts.MemoryUsageRatio, shuffleType: shuffleType, compilerOptions: cudaCompilerOptions); } // Add event handler for monitoring ss.IterationDone += ss_IterationDone; // Kick off training ss.Train(maxTrainingEpoch: opts.MaxEpochNum, trainCorpus: trainCorpus, validCorpus: validCorpus, learningRate: learningRate, optimizer: optimizer, metrics: metrics); } else if (mode == ModeEnums.Valid) { Logger.WriteLine($"Evaluate model '{opts.ModelFilePath}' by valid corpus '{opts.ValidCorpusPath}'"); // Create metrics List <IMetric> metrics = new List <IMetric> { new BleuMetric(), new LengthRatioMetric() }; // Load valid corpus ParallelCorpus validCorpus = new ParallelCorpus(opts.ValidCorpusPath, opts.SrcLang, opts.TgtLang, opts.ValBatchSize, opts.ShuffleBlockSize, opts.MaxSrcSentLength, opts.MaxTgtSentLength); ss = new AttentionSeq2Seq(modelFilePath: opts.ModelFilePath, processorType: processorType, deviceIds: deviceIds, memoryUsageRatio: opts.MemoryUsageRatio, shuffleType: shuffleType, compilerOptions: cudaCompilerOptions); ss.Valid(validCorpus: validCorpus, metrics: metrics); } else if (mode == ModeEnums.Test) { Logger.WriteLine($"Test model '{opts.ModelFilePath}' by input corpus '{opts.InputTestFile}'"); //Test trained model ss = new AttentionSeq2Seq(modelFilePath: opts.ModelFilePath, processorType: processorType, deviceIds: deviceIds, memoryUsageRatio: opts.MemoryUsageRatio, shuffleType: shuffleType, maxSrcSntSize: opts.MaxSrcSentLength, maxTgtSntSize: opts.MaxTgtSentLength, compilerOptions: cudaCompilerOptions); List <string> outputLines = new List <string>(); string[] data_sents_raw1 = File.ReadAllLines(opts.InputTestFile); foreach (string line in data_sents_raw1) { if (opts.BeamSearch > 1) { // Below support beam search List <List <string> > outputWordsList = ss.Predict(line.ToLower().Trim().Split(' ').ToList(), opts.BeamSearch); outputLines.AddRange(outputWordsList.Select(x => string.Join(" ", x))); } else { var outputTokensBatch = ss.Test(ParallelCorpus.ConstructInputTokens(line.ToLower().Trim().Split(' ').ToList())); outputLines.AddRange(outputTokensBatch.Select(x => String.Join(" ", x))); } } File.WriteAllLines(opts.OutputTestFile, outputLines); } else if (mode == ModeEnums.DumpVocab) { ss = new AttentionSeq2Seq(modelFilePath: opts.ModelFilePath, processorType: processorType, deviceIds: deviceIds, compilerOptions: cudaCompilerOptions); ss.DumpVocabToFiles(opts.SrcVocab, opts.TgtVocab); } else { argParser.Usage(); } } catch (Exception err) { Logger.WriteLine($"Exception: '{err.Message}'"); Logger.WriteLine($"Call stack: '{err.StackTrace}'"); } }
public Seq2SeqClassificationCorpus(string corpusFilePath, string srcLangName, string tgtLangName, int batchSize, int shuffleBlockSize = -1, int maxSrcSentLength = 32, int maxTgtSentLength = 32, ShuffleEnums shuffleEnums = ShuffleEnums.Random, TooLongSequence tooLongSequence = TooLongSequence.Ignore) : base(corpusFilePath, srcLangName, tgtLangName, batchSize, shuffleBlockSize, maxSrcSentLength, maxTgtSentLength, shuffleEnums: shuffleEnums, tooLongSequence: tooLongSequence) { }
static private IWeightTensor InnerRunner(IComputeGraph computeGraph, List <List <int> > srcTokensList, float[] originalSrcLengths, ShuffleEnums shuffleType, IEncoder encoder, IModel modelMetaData, IWeightTensor srcEmbedding, IWeightTensor posEmbedding, IWeightTensor segmentEmbedding) { int batchSize = srcTokensList.Count; int srcSeqPaddedLen = srcTokensList[0].Count; IWeightTensor srcSelfMask = (shuffleType == ShuffleEnums.NoPaddingInSrc || shuffleType == ShuffleEnums.NoPadding || batchSize == 1) ? null : computeGraph.BuildPadSelfMask(srcSeqPaddedLen, originalSrcLengths); // The length of source sentences are same in a single mini-batch, so we don't have source mask. // Encoding input source sentences var encOutput = RunEncoder(computeGraph, srcTokensList, encoder, modelMetaData, srcEmbedding, srcSelfMask, posEmbedding, segmentEmbedding); if (srcSelfMask != null) { srcSelfMask.Dispose(); } return(encOutput); }
public static IWeightTensor BuildTensorForSourceTokenGroupAt(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, ShuffleEnums shuffleType, IEncoder encoder, IModel modelMetaData, IWeightTensor srcEmbedding, IWeightTensor posEmbedding, IWeightTensor segmentEmbedding, int groupId) { var contextTokens = InsertCLSToken(sntPairBatch.GetSrcTokens(groupId)); var originalSrcContextLength = BuildInTokens.PadSentences(contextTokens); var contextTokenIds = modelMetaData.SrcVocab.GetWordIndex(contextTokens); IWeightTensor encContextOutput = InnerRunner(computeGraph, contextTokenIds, originalSrcContextLength, shuffleType, encoder, modelMetaData, srcEmbedding, posEmbedding, segmentEmbedding); int contextPaddedLen = contextTokens[0].Count; float[] contextCLSIdxs = new float[sntPairBatch.BatchSize]; for (int j = 0; j < sntPairBatch.BatchSize; j++) { contextCLSIdxs[j] = j * contextPaddedLen; } IWeightTensor contextCLSOutput = computeGraph.IndexSelect(encContextOutput, contextCLSIdxs); return(contextCLSOutput); }
static public IWeightTensor Run(IComputeGraph computeGraph, ISntPairBatch sntPairBatch, IEncoder encoder, IModel modelMetaData, ShuffleEnums shuffleType, IWeightTensor srcEmbedding, IWeightTensor posEmbedding, IWeightTensor segmentEmbedding, List <List <int> > srcSntsIds, float[] originalSrcLengths) { // Reset networks encoder.Reset(computeGraph.GetWeightFactory(), srcSntsIds.Count); IWeightTensor encOutput = InnerRunner(computeGraph, srcSntsIds, originalSrcLengths, shuffleType, encoder, modelMetaData, srcEmbedding, posEmbedding, segmentEmbedding); return(encOutput); }