private static void Train() { Logger.LogFile = "RNNSharpConsole.log"; if (File.Exists(strTagFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The tag mapping file {0} isn't existed.", strTagFile); UsageTrain(); return; } //Load tag id and its name from file TagSet tagSet = new TagSet(strTagFile); //Create configuration instance and set parameters ModelSetting RNNConfig = new ModelSetting(); RNNConfig.TagFile = strTagFile; RNNConfig.Tags = tagSet; RNNConfig.ModelFile = strModelFile; RNNConfig.HiddenLayerSizeList = hiddenLayerSizeList; RNNConfig.IsCRFTraining = (iCRF == 1) ? true : false; RNNConfig.ModelDirection = iDir; RNNConfig.VQ = iVQ; RNNConfig.ModelType = ParseLayerType(hiddenLayerType); RNNConfig.OutputLayerType = ParseLayerType(outputLayerType); RNNConfig.MaxIteration = maxIter; RNNConfig.SaveStep = savestep; RNNConfig.LearningRate = alpha; RNNConfig.Dropout = dropout; RNNConfig.Bptt = bptt; RNNConfig.GradientCutoff = gradientCutoff; RNNConfig.NCESampleSize = nceSampleSize; //Dump RNN setting on console RNNConfig.DumpSetting(); if (File.Exists(strFeatureConfigFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", strFeatureConfigFile); UsageTrain(); return; } //Create feature extractors and load word embedding data from file Featurizer featurizer = new Featurizer(strFeatureConfigFile, tagSet); featurizer.ShowFeatureSize(); if (featurizer.IsRunTimeFeatureUsed() == true && iDir == 1) { Logger.WriteLine(Logger.Level.err, "FAILED: Run time feature is not available for bi-directional RNN model."); UsageTrain(); return; } if (File.Exists(strTrainFile) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist."); UsageTrain(); return; } //Create RNN encoder and save necessary parameters RNNEncoder encoder = new RNNEncoder(RNNConfig); //LoadFeatureConfig training corpus and extract feature set encoder.TrainingSet = new DataSet(tagSet.GetSize()); LoadDataset(strTrainFile, featurizer, encoder.TrainingSet); RNNConfig.TrainDataSet = encoder.TrainingSet; if (String.IsNullOrEmpty(strValidFile) == false) { //LoadFeatureConfig validated corpus and extract feature set Logger.WriteLine("Loading validated corpus from {0}", strValidFile); encoder.ValidationSet = new DataSet(tagSet.GetSize()); LoadDataset(strValidFile, featurizer, encoder.ValidationSet); } else { Logger.WriteLine("Validated corpus isn't specified."); encoder.ValidationSet = null; } if (iCRF == 1) { Logger.WriteLine("Initialize output tag bigram transition probability..."); //Build tag bigram transition matrix encoder.TrainingSet.BuildLabelBigramTransition(); } //Start to train the model encoder.Train(); }
private static void Train() { Logger.LogFile = "RNNSharpConsole.log"; if (File.Exists(tagFilePath) == false) { Logger.WriteLine(Logger.Level.err, $"FAILED: The tag mapping file {tagFilePath} doesn't exist."); UsageTrain(); return; } //Load tag id and its name from file var tagSet = new TagSet(tagFilePath); //Create configuration instance and set parameters var RNNConfig = new ModelSetting { VQ = iVQ, MaxIteration = maxIter, SaveStep = savestep, LearningRate = alpha, GradientCutoff = gradientCutoff, IsConstAlpha = constAlpha }; //Dump RNN setting on console RNNConfig.DumpSetting(); if (File.Exists(configFilePath) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The feature configuration file {0} doesn't exist.", configFilePath); UsageTrain(); return; } //Create feature extractors and load word embedding data from file var config = new Config(configFilePath, tagSet); config.ShowFeatureSize(); if (File.Exists(trainFilePath) == false) { Logger.WriteLine(Logger.Level.err, "FAILED: The training corpus doesn't exist."); UsageTrain(); return; } if (config.ModelType == MODELTYPE.SeqLabel) { //Create RNN encoder and save necessary parameters var encoder = new RNNEncoder <Sequence>(RNNConfig, config) { TrainingSet = new DataSet <Sequence>(tagSet.GetSize()) }; //LoadFeatureConfig training corpus and extract feature set LoadDataset(trainFilePath, config, encoder.TrainingSet); if (string.IsNullOrEmpty(validFilePath) == false) { //LoadFeatureConfig validated corpus and extract feature set Logger.WriteLine("Loading validated corpus from {0}", validFilePath); encoder.ValidationSet = new DataSet <Sequence>(tagSet.GetSize()); LoadDataset(validFilePath, config, encoder.ValidationSet); } else { Logger.WriteLine("Validated corpus isn't specified."); encoder.ValidationSet = null; } if (encoder.IsCRFTraining) { Logger.WriteLine("Initialize output tag bigram transition probability..."); //Build tag bigram transition matrix encoder.TrainingSet.BuildLabelBigramTransition(); } //Start to train the model encoder.Train(); } else { //Create RNN encoder and save necessary parameters var encoder = new RNNEncoder <SequencePair>(RNNConfig, config) { TrainingSet = new DataSet <SequencePair>(tagSet.GetSize()) }; //LoadFeatureConfig training corpus and extract feature set LoadSeq2SeqDataSet(trainFilePath, config, encoder.TrainingSet); if (string.IsNullOrEmpty(validFilePath) == false) { //LoadFeatureConfig validated corpus and extract feature set Logger.WriteLine("Loading validated corpus from {0}", validFilePath); encoder.ValidationSet = new DataSet <SequencePair>(tagSet.GetSize()); LoadSeq2SeqDataSet(validFilePath, config, encoder.ValidationSet); } else { Logger.WriteLine("Validated corpus isn't specified."); encoder.ValidationSet = null; } if (encoder.IsCRFTraining) { Logger.WriteLine("Initialize output tag bigram transition probability..."); //Build tag bigram transition matrix encoder.TrainingSet.BuildLabelBigramTransition(); } //Start to train the model encoder.Train(); } }