Example #1
0
        public void Train()
        {
            RNN <T> rnn;

            if (ModelSettings.ModelDirection == 0)
            {
                int sparseFeatureSize = TrainingSet.SparseFeatureSize;
                if (ModelSettings.IsSeq2SeqTraining)
                {
                    //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence]
                    sparseFeatureSize += Featurizer.AutoEncoder.Featurizer.SparseFeatureSize;
                    Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}",
                                     TrainingSet.SparseFeatureSize, Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, sparseFeatureSize);
                }


                List <SimpleLayer> hiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer layer = null;
                    if (ModelSettings.ModelType == LayerType.BPTT)
                    {
                        BPTTLayer bpttLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        layer = bpttLayer;
                    }
                    else if (ModelSettings.ModelType == LayerType.LSTM)
                    {
                        LSTMLayer lstmLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        layer = lstmLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, TrainingSet.DenseFeatureSize);

                        if (ModelSettings.IsSeq2SeqTraining)
                        {
                            Logger.WriteLine("For seq2seq training, we have {0} sprase feature and {1} dense feature from source sequence.",
                                             Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, Featurizer.AutoEncoder.GetTopHiddenLayerSize());
                        }

                        layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize));
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, hiddenLayers[i - 1].LayerSize);

                        layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize));
                    }
                    hiddenLayers.Add(layer);
                }

                if (ModelSettings.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layer");

                    DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, ModelSettings);
                    dropoutLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    hiddenLayers.Add(dropoutLayer);
                }

                SimpleLayer outputLayer;
                if (ModelSettings.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings);
                    nceOutputLayer.BuildStatisticData <T>(TrainingSet);
                    nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    outputLayer = nceOutputLayer;
                }
                else if (ModelSettings.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString()));
                }

                rnn = new ForwardRNN <T>(hiddenLayers, outputLayer);
            }
            else
            {
                List <SimpleLayer> forwardHiddenLayers  = new List <SimpleLayer>();
                List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    if (ModelSettings.ModelType == LayerType.BPTT)
                    {
                        //For BPTT layer
                        BPTTLayer forwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        forwardLayer = forwardBPTTLayer;

                        BPTTLayer backwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        backwardLayer = backwardBPTTLayer;
                    }
                    else if (ModelSettings.ModelType == LayerType.LSTM)
                    {
                        //For LSTM layer
                        LSTMLayer forwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        forwardLayer = forwardLSTMLayer;

                        LSTMLayer backwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        backwardLayer = backwardLSTMLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);

                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize);

                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize);
                    }

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                if (ModelSettings.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layers");
                    DropoutLayer forwardDropoutLayer  = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, ModelSettings);
                    DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, ModelSettings);

                    forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize);

                    forwardHiddenLayers.Add(forwardDropoutLayer);
                    backwardHiddenLayers.Add(backwardDropoutLayer);
                }

                SimpleLayer outputLayer;
                if (ModelSettings.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings);
                    nceOutputLayer.BuildStatisticData <T>(TrainingSet);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (ModelSettings.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString()));
                }

                rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            rnn.ModelDirection = (MODELDIRECTION)ModelSettings.ModelDirection;
            rnn.bVQ            = (ModelSettings.VQ != 0) ? true : false;
            rnn.ModelFile      = ModelSettings.ModelFile;
            rnn.SaveStep       = ModelSettings.SaveStep;
            rnn.MaxIter        = ModelSettings.MaxIteration;
            rnn.IsCRFTraining  = ModelSettings.IsCRFTraining;
            rnn.ModelType      = ModelSettings.IsSeq2SeqTraining ? MODELTYPE.SEQ2SEQ : MODELTYPE.SEQLABEL;

            if (rnn.ModelDirection == MODELDIRECTION.BI_DIRECTIONAL && rnn.ModelType == MODELTYPE.SEQ2SEQ)
            {
                throw new System.Exception("Bi-directional RNN model doesn't support sequence-to-sequence model.");
            }

            RNNHelper.LearningRate   = ModelSettings.LearningRate;
            RNNHelper.GradientCutoff = ModelSettings.GradientCutoff;
            RNNHelper.IsConstAlpha   = ModelSettings.IsConstAlpha;

            //Create tag-bigram transition probability matrix only for sequence RNN mode
            if (ModelSettings.IsCRFTraining)
            {
                rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
            }

            Logger.WriteLine("");

            Logger.WriteLine("Iterative training begins ...");
            double lastPPL   = double.MaxValue;
            double lastAlpha = RNNHelper.LearningRate;
            int    iter      = 0;

            while (true)
            {
                Logger.WriteLine("Cleaning training status...");
                rnn.CleanStatus();

                if (rnn.MaxIter > 0 && iter > rnn.MaxIter)
                {
                    Logger.WriteLine("We have trained this model {0} iteration, exit.");
                    break;
                }

                //Start to train model
                double ppl = rnn.TrainNet(TrainingSet, iter);
                if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate)
                {
                    //Although we reduce alpha value, we still cannot get better result.
                    Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
                    Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha);
                    break;
                }
                lastAlpha = RNNHelper.LearningRate;

                //Validate the model by validated corpus
                if (ValidationSet != null)
                {
                    Logger.WriteLine("Verify model on validated corpus.");
                    if (rnn.ValidateNet(ValidationSet, iter) == true)
                    {
                        //We got better result on validated corpus, save this model
                        Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile);
                        rnn.SaveModel(ModelSettings.ModelFile);
                    }
                }
                else if (ppl < lastPPL)
                {
                    //We don't have validate corpus, but we get a better result on training corpus
                    //We got better result on validated corpus, save this model
                    Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile);
                    rnn.SaveModel(ModelSettings.ModelFile);
                }

                if (ppl >= lastPPL)
                {
                    //We cannot get a better result on training corpus, so reduce learning rate
                    RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f;
                }

                lastPPL = ppl;

                iter++;
            }
        }
Example #2
0
        private RNN <T> CreateNetwork()
        {
            RNN <T> rnn;

            if (modelDirection == MODELDIRECTION.Forward)
            {
                var sparseFeatureSize = TrainingSet.SparseFeatureSize;
                if (ModelType == MODELTYPE.Seq2Seq)
                {
                    //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence]
                    sparseFeatureSize += featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize;
                    Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}",
                                     TrainingSet.SparseFeatureSize, featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize,
                                     sparseFeatureSize);
                }

                var hiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < hiddenLayersConfig.Count; i++)
                {
                    SimpleLayer layer = null;
                    switch (hiddenLayersConfig[i].LayerType)
                    {
                    case LayerType.BPTT:
                        var bpttLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        layer = bpttLayer;
                        Logger.WriteLine("Create BPTT layer.");
                        break;

                    case LayerType.LSTM:
                        var lstmLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        layer = lstmLayer;
                        Logger.WriteLine("Create LSTM layer.");
                        break;

                    case LayerType.DropOut:
                        var dropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);
                        layer = dropoutLayer;
                        Logger.WriteLine("Create Dropout layer.");
                        break;
                    }

                    layer.InitializeWeights(sparseFeatureSize,
                                            i == 0
                            ? GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize)
                            : GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize));

                    Logger.WriteLine(
                        $"Create hidden layer {i}: size = {layer.LayerSize}, sparse feature size = {layer.SparseFeatureSize}, dense feature size = {layer.DenseFeatureSize}");
                    hiddenLayers.Add(layer);
                }

                SimpleLayer outputLayer = null;
                outputLayerConfig.LayerSize = TrainingSet.TagSize;

                switch (outputLayerConfig.LayerType)
                {
                case LayerType.NCESoftmax:
                    Logger.WriteLine("Create NCESoftmax layer as output layer");
                    var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig);
                    nceOutputLayer.InitializeWeights(0,
                                                     GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    outputLayer = nceOutputLayer;
                    break;

                case LayerType.Softmax:
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(outputLayerConfig);
                    outputLayer.InitializeWeights(sparseFeatureSize,
                                                  GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    break;
                }

                rnn = new ForwardRNN <T>(hiddenLayers, outputLayer);
            }
            else
            {
                var forwardHiddenLayers  = new List <SimpleLayer>();
                var backwardHiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < hiddenLayersConfig.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    switch (hiddenLayersConfig[i].LayerType)
                    {
                    case LayerType.BPTT:
                        //For BPTT layer
                        var forwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        forwardLayer = forwardBPTTLayer;

                        var backwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        backwardLayer = backwardBPTTLayer;

                        Logger.WriteLine("Create BPTT layer.");
                        break;

                    case LayerType.LSTM:
                        //For LSTM layer
                        var forwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        forwardLayer = forwardLSTMLayer;

                        var backwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        backwardLayer = backwardLSTMLayer;

                        Logger.WriteLine("Create LSTM layer.");
                        break;

                    case LayerType.DropOut:
                        var forwardDropoutLayer  = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);
                        var backwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);

                        Logger.WriteLine("Create Dropout layer.");
                        break;
                    }

                    if (i == 0)
                    {
                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                    }
                    else
                    {
                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                       forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                        backwardHiddenLayers[i - 1].LayerSize);
                    }

                    Logger.WriteLine(
                        $"Create hidden layer {i}: size = {forwardLayer.LayerSize}, sparse feature size = {forwardLayer.SparseFeatureSize}, dense feature size = {forwardLayer.DenseFeatureSize}");

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                SimpleLayer outputLayer = null;
                outputLayerConfig.LayerSize = TrainingSet.TagSize;
                switch (outputLayerConfig.LayerType)
                {
                case LayerType.NCESoftmax:
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                    break;

                case LayerType.Softmax:
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(outputLayerConfig);
                    outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                  forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    break;
                }

                rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            return(rnn);
        }
Example #3
0
        public void Train()
        {
            RNN rnn;

            if (m_modelSetting.ModelDirection == 0)
            {
                List <SimpleLayer> hiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer layer = null;
                    if (m_modelSetting.ModelType == LayerType.BPTT)
                    {
                        BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        layer = bpttLayer;
                    }
                    else if (m_modelSetting.ModelType == LayerType.LSTM)
                    {
                        LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        layer = lstmLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize);

                        layer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize);
                    }
                    hiddenLayers.Add(layer);
                }


                if (m_modelSetting.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layer");
                    DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, m_modelSetting);
                    dropoutLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                    hiddenLayers.Add(dropoutLayer);
                }

                SimpleLayer outputLayer;
                if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting);
                    nceOutputLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (m_modelSetting.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString()));
                }

                rnn = new ForwardRNN(hiddenLayers, outputLayer);
            }
            else
            {
                List <SimpleLayer> forwardHiddenLayers  = new List <SimpleLayer>();
                List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    if (m_modelSetting.ModelType == LayerType.BPTT)
                    {
                        //For BPTT layer
                        BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        forwardLayer = forwardBPTTLayer;

                        BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        backwardLayer = backwardBPTTLayer;
                    }
                    else if (m_modelSetting.ModelType == LayerType.LSTM)
                    {
                        //For LSTM layer
                        LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        forwardLayer = forwardLSTMLayer;

                        LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        backwardLayer = backwardLSTMLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                        backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize);

                        forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), backwardHiddenLayers[i - 1].LayerSize);
                    }

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                if (m_modelSetting.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layers");
                    DropoutLayer forwardDropoutLayer  = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, m_modelSetting);
                    DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, m_modelSetting);

                    forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize);

                    forwardHiddenLayers.Add(forwardDropoutLayer);
                    backwardHiddenLayers.Add(backwardDropoutLayer);
                }

                SimpleLayer outputLayer;
                if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (m_modelSetting.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString()));
                }

                rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            rnn.ModelDirection       = (MODELDIRECTION)m_modelSetting.ModelDirection;
            rnn.bVQ                  = (m_modelSetting.VQ != 0) ? true : false;
            rnn.ModelFile            = m_modelSetting.ModelFile;
            rnn.SaveStep             = m_modelSetting.SaveStep;
            rnn.MaxIter              = m_modelSetting.MaxIteration;
            rnn.IsCRFTraining        = m_modelSetting.IsCRFTraining;
            RNNHelper.LearningRate   = m_modelSetting.LearningRate;
            RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff;

            //Create tag-bigram transition probability matrix only for sequence RNN mode
            if (m_modelSetting.IsCRFTraining)
            {
                rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
            }

            Logger.WriteLine("");

            Logger.WriteLine("Iterative training begins ...");
            double lastPPL   = double.MaxValue;
            double lastAlpha = RNNHelper.LearningRate;
            int    iter      = 0;

            while (true)
            {
                Logger.WriteLine("Cleaning training status...");
                rnn.CleanStatus();

                if (rnn.MaxIter > 0 && iter > rnn.MaxIter)
                {
                    Logger.WriteLine("We have trained this model {0} iteration, exit.");
                    break;
                }

                //Start to train model
                double ppl = rnn.TrainNet(TrainingSet, iter);
                if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate)
                {
                    //Although we reduce alpha value, we still cannot get better result.
                    Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
                    Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha);
                    break;
                }
                lastAlpha = RNNHelper.LearningRate;

                //Validate the model by validated corpus
                if (ValidationSet != null)
                {
                    Logger.WriteLine("Verify model on validated corpus.");
                    if (rnn.ValidateNet(ValidationSet, iter) == true)
                    {
                        //We got better result on validated corpus, save this model
                        Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                        rnn.SaveModel(m_modelSetting.ModelFile);
                    }
                }
                else if (ppl < lastPPL)
                {
                    //We don't have validate corpus, but we get a better result on training corpus
                    //We got better result on validated corpus, save this model
                    Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                    rnn.SaveModel(m_modelSetting.ModelFile);
                }

                if (ppl >= lastPPL)
                {
                    //We cannot get a better result on training corpus, so reduce learning rate
                    RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f;
                }

                lastPPL = ppl;

                iter++;
            }
        }