public void Train() { RNN <T> rnn; if (ModelSettings.ModelDirection == 0) { int sparseFeatureSize = TrainingSet.SparseFeatureSize; if (ModelSettings.IsSeq2SeqTraining) { //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence] sparseFeatureSize += Featurizer.AutoEncoder.Featurizer.SparseFeatureSize; Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}", TrainingSet.SparseFeatureSize, Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, sparseFeatureSize); } List <SimpleLayer> hiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++) { SimpleLayer layer = null; if (ModelSettings.ModelType == LayerType.BPTT) { BPTTLayer bpttLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); layer = bpttLayer; } else if (ModelSettings.ModelType == LayerType.LSTM) { LSTMLayer lstmLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); layer = lstmLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, TrainingSet.DenseFeatureSize); if (ModelSettings.IsSeq2SeqTraining) { Logger.WriteLine("For seq2seq training, we have {0} sprase feature and {1} dense feature from source sequence.", Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, Featurizer.AutoEncoder.GetTopHiddenLayerSize()); } layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize)); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, hiddenLayers[i - 1].LayerSize); layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize)); } hiddenLayers.Add(layer); } if (ModelSettings.Dropout > 0) { Logger.WriteLine("Adding dropout layer"); DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, ModelSettings); dropoutLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); hiddenLayers.Add(dropoutLayer); } SimpleLayer outputLayer; if (ModelSettings.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer"); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings); nceOutputLayer.BuildStatisticData <T>(TrainingSet); nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); outputLayer = nceOutputLayer; } else if (ModelSettings.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString())); } rnn = new ForwardRNN <T>(hiddenLayers, outputLayer); } else { List <SimpleLayer> forwardHiddenLayers = new List <SimpleLayer>(); List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; if (ModelSettings.ModelType == LayerType.BPTT) { //For BPTT layer BPTTLayer forwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); forwardLayer = forwardBPTTLayer; BPTTLayer backwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); backwardLayer = backwardBPTTLayer; } else if (ModelSettings.ModelType == LayerType.LSTM) { //For LSTM layer LSTMLayer forwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); forwardLayer = forwardLSTMLayer; LSTMLayer backwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); backwardLayer = backwardLSTMLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize); } forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } if (ModelSettings.Dropout > 0) { Logger.WriteLine("Adding dropout layers"); DropoutLayer forwardDropoutLayer = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, ModelSettings); DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, ModelSettings); forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize); forwardHiddenLayers.Add(forwardDropoutLayer); backwardHiddenLayers.Add(backwardDropoutLayer); } SimpleLayer outputLayer; if (ModelSettings.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings); nceOutputLayer.BuildStatisticData <T>(TrainingSet); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (ModelSettings.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString())); } rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } rnn.ModelDirection = (MODELDIRECTION)ModelSettings.ModelDirection; rnn.bVQ = (ModelSettings.VQ != 0) ? true : false; rnn.ModelFile = ModelSettings.ModelFile; rnn.SaveStep = ModelSettings.SaveStep; rnn.MaxIter = ModelSettings.MaxIteration; rnn.IsCRFTraining = ModelSettings.IsCRFTraining; rnn.ModelType = ModelSettings.IsSeq2SeqTraining ? MODELTYPE.SEQ2SEQ : MODELTYPE.SEQLABEL; if (rnn.ModelDirection == MODELDIRECTION.BI_DIRECTIONAL && rnn.ModelType == MODELTYPE.SEQ2SEQ) { throw new System.Exception("Bi-directional RNN model doesn't support sequence-to-sequence model."); } RNNHelper.LearningRate = ModelSettings.LearningRate; RNNHelper.GradientCutoff = ModelSettings.GradientCutoff; RNNHelper.IsConstAlpha = ModelSettings.IsConstAlpha; //Create tag-bigram transition probability matrix only for sequence RNN mode if (ModelSettings.IsCRFTraining) { rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(""); Logger.WriteLine("Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = RNNHelper.LearningRate; int iter = 0; while (true) { Logger.WriteLine("Cleaning training status..."); rnn.CleanStatus(); if (rnn.MaxIter > 0 && iter > rnn.MaxIter) { Logger.WriteLine("We have trained this model {0} iteration, exit."); break; } //Start to train model double ppl = rnn.TrainNet(TrainingSet, iter); if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha); break; } lastAlpha = RNNHelper.LearningRate; //Validate the model by validated corpus if (ValidationSet != null) { Logger.WriteLine("Verify model on validated corpus."); if (rnn.ValidateNet(ValidationSet, iter) == true) { //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile); rnn.SaveModel(ModelSettings.ModelFile); } } else if (ppl < lastPPL) { //We don't have validate corpus, but we get a better result on training corpus //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile); rnn.SaveModel(ModelSettings.ModelFile); } if (ppl >= lastPPL) { //We cannot get a better result on training corpus, so reduce learning rate RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f; } lastPPL = ppl; iter++; } }
private RNN <T> CreateNetwork() { RNN <T> rnn; if (modelDirection == MODELDIRECTION.Forward) { var sparseFeatureSize = TrainingSet.SparseFeatureSize; if (ModelType == MODELTYPE.Seq2Seq) { //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence] sparseFeatureSize += featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize; Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}", TrainingSet.SparseFeatureSize, featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize, sparseFeatureSize); } var hiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < hiddenLayersConfig.Count; i++) { SimpleLayer layer = null; switch (hiddenLayersConfig[i].LayerType) { case LayerType.BPTT: var bpttLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); layer = bpttLayer; Logger.WriteLine("Create BPTT layer."); break; case LayerType.LSTM: var lstmLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); layer = lstmLayer; Logger.WriteLine("Create LSTM layer."); break; case LayerType.DropOut: var dropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); layer = dropoutLayer; Logger.WriteLine("Create Dropout layer."); break; } layer.InitializeWeights(sparseFeatureSize, i == 0 ? GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize) : GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize)); Logger.WriteLine( $"Create hidden layer {i}: size = {layer.LayerSize}, sparse feature size = {layer.SparseFeatureSize}, dense feature size = {layer.DenseFeatureSize}"); hiddenLayers.Add(layer); } SimpleLayer outputLayer = null; outputLayerConfig.LayerSize = TrainingSet.TagSize; switch (outputLayerConfig.LayerType) { case LayerType.NCESoftmax: Logger.WriteLine("Create NCESoftmax layer as output layer"); var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig); nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); outputLayer = nceOutputLayer; break; case LayerType.Softmax: Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(outputLayerConfig); outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); break; } rnn = new ForwardRNN <T>(hiddenLayers, outputLayer); } else { var forwardHiddenLayers = new List <SimpleLayer>(); var backwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < hiddenLayersConfig.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; switch (hiddenLayersConfig[i].LayerType) { case LayerType.BPTT: //For BPTT layer var forwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); forwardLayer = forwardBPTTLayer; var backwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); backwardLayer = backwardBPTTLayer; Logger.WriteLine("Create BPTT layer."); break; case LayerType.LSTM: //For LSTM layer var forwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); forwardLayer = forwardLSTMLayer; var backwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); backwardLayer = backwardLSTMLayer; Logger.WriteLine("Create LSTM layer."); break; case LayerType.DropOut: var forwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); var backwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); Logger.WriteLine("Create Dropout layer."); break; } if (i == 0) { forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); } else { forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize); } Logger.WriteLine( $"Create hidden layer {i}: size = {forwardLayer.LayerSize}, sparse feature size = {forwardLayer.SparseFeatureSize}, dense feature size = {forwardLayer.DenseFeatureSize}"); forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } SimpleLayer outputLayer = null; outputLayerConfig.LayerSize = TrainingSet.TagSize; switch (outputLayerConfig.LayerType) { case LayerType.NCESoftmax: Logger.WriteLine("Create NCESoftmax layer as output layer."); var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; break; case LayerType.Softmax: Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(outputLayerConfig); outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); break; } rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } return(rnn); }
public void Train() { RNN rnn; if (m_modelSetting.ModelDirection == 0) { List <SimpleLayer> hiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++) { SimpleLayer layer = null; if (m_modelSetting.ModelType == LayerType.BPTT) { BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); layer = bpttLayer; } else if (m_modelSetting.ModelType == LayerType.LSTM) { LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); layer = lstmLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize); layer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize); } hiddenLayers.Add(layer); } if (m_modelSetting.Dropout > 0) { Logger.WriteLine("Adding dropout layer"); DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, m_modelSetting); dropoutLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize); hiddenLayers.Add(dropoutLayer); } SimpleLayer outputLayer; if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting); nceOutputLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (m_modelSetting.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[hiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString())); } rnn = new ForwardRNN(hiddenLayers, outputLayer); } else { List <SimpleLayer> forwardHiddenLayers = new List <SimpleLayer>(); List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; if (m_modelSetting.ModelType == LayerType.BPTT) { //For BPTT layer BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); forwardLayer = forwardBPTTLayer; BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); backwardLayer = backwardBPTTLayer; } else if (m_modelSetting.ModelType == LayerType.LSTM) { //For LSTM layer LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); forwardLayer = forwardLSTMLayer; LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); backwardLayer = backwardLSTMLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize); forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), backwardHiddenLayers[i - 1].LayerSize); } forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } if (m_modelSetting.Dropout > 0) { Logger.WriteLine("Adding dropout layers"); DropoutLayer forwardDropoutLayer = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, m_modelSetting); DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, m_modelSetting); forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize); forwardHiddenLayers.Add(forwardDropoutLayer); backwardHiddenLayers.Add(backwardDropoutLayer); } SimpleLayer outputLayer; if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (m_modelSetting.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString())); } rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection; rnn.bVQ = (m_modelSetting.VQ != 0) ? true : false; rnn.ModelFile = m_modelSetting.ModelFile; rnn.SaveStep = m_modelSetting.SaveStep; rnn.MaxIter = m_modelSetting.MaxIteration; rnn.IsCRFTraining = m_modelSetting.IsCRFTraining; RNNHelper.LearningRate = m_modelSetting.LearningRate; RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff; //Create tag-bigram transition probability matrix only for sequence RNN mode if (m_modelSetting.IsCRFTraining) { rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(""); Logger.WriteLine("Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = RNNHelper.LearningRate; int iter = 0; while (true) { Logger.WriteLine("Cleaning training status..."); rnn.CleanStatus(); if (rnn.MaxIter > 0 && iter > rnn.MaxIter) { Logger.WriteLine("We have trained this model {0} iteration, exit."); break; } //Start to train model double ppl = rnn.TrainNet(TrainingSet, iter); if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha); break; } lastAlpha = RNNHelper.LearningRate; //Validate the model by validated corpus if (ValidationSet != null) { Logger.WriteLine("Verify model on validated corpus."); if (rnn.ValidateNet(ValidationSet, iter) == true) { //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } } else if (ppl < lastPPL) { //We don't have validate corpus, but we get a better result on training corpus //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } if (ppl >= lastPPL) { //We cannot get a better result on training corpus, so reduce learning rate RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f; } lastPPL = ppl; iter++; } }