Пример #1
0
        public static LSTMLayer Load(BinaryReader br, LayerType layerType)
        {
            LSTMLayerConfig config = new LSTMLayerConfig();

            config.LayerSize = br.ReadInt32();
            config.LayerType = layerType;
            LSTMLayer layer = new LSTMLayer(config);

            layer.SparseFeatureSize = br.ReadInt32();
            layer.DenseFeatureSize  = br.ReadInt32();

            //Create cells of each layer
            layer.InitializeCellWeights(br);

            //Load weight matrix between each two layer pairs
            //weight input->hidden
            if (layer.SparseFeatureSize > 0)
            {
                Logger.WriteLine("Loading sparse feature weights...");
                layer.sparseFeatureWeights = LoadLSTMWeights(br);
            }

            if (layer.DenseFeatureSize > 0)
            {
                //weight fea->hidden
                Logger.WriteLine("Loading dense feature weights...");
                layer.wDenseInputGate  = LoadLSTMGateWeights(br);
                layer.wDenseCellGate   = LoadLSTMGateWeights(br);
                layer.wDenseForgetGate = LoadLSTMGateWeights(br);
                layer.wDenseOutputGate = LoadLSTMGateWeights(br);
            }

            return(layer);
        }
Пример #2
0
        public override SimpleLayer CreateLayerSharedWegiths()
        {
            LSTMLayer layer = new LSTMLayer(config);

            ShallowCopyWeightTo(layer);

            return(layer);
        }
Пример #3
0
        public override void LoadModel(string filename)
        {
            Logger.WriteLine("Loading SimpleRNN model: {0}", filename);

            StreamReader sr = new StreamReader(filename);
            BinaryReader br = new BinaryReader(sr.BaseStream);

            int modelType = br.ReadInt32();

            ModelDirection = (MODELDIRECTION)br.ReadInt32();

            int iflag = br.ReadInt32();

            if (iflag == 1)
            {
                IsCRFTraining = true;
            }
            else
            {
                IsCRFTraining = false;
            }

            //Create cells of each layer
            int layerSize = br.ReadInt32();

            HiddenLayerList = new List <SimpleLayer>();
            for (int i = 0; i < layerSize; i++)
            {
                SimpleLayer layer = null;
                if (modelType == 0)
                {
                    layer = new BPTTLayer();
                }
                else
                {
                    layer = new LSTMLayer();
                }

                layer.Load(br);
                HiddenLayerList.Add(layer);
            }

            OutputLayer = new SimpleLayer();
            OutputLayer.Load(br);

            if (iflag == 1)
            {
                Logger.WriteLine("Loading CRF tag trans weights...");
                CRFTagTransWeights = RNNHelper.LoadMatrix(br);
            }

            sr.Close();
        }
Пример #4
0
        public override void LoadModel(string filename)
        {
            Logger.WriteLine("Loading SimpleRNN model: {0}", filename);

            var sr = new StreamReader(filename);
            var br = new BinaryReader(sr.BaseStream);

            var layerType = (LAYERTYPE)br.ReadInt32();

            IsCRFTraining = br.ReadBoolean();

            //Create cells of each layer
            var layerSize = br.ReadInt32();

            HiddenLayerList = new List <SimpleLayer>();
            for (var i = 0; i < layerSize; i++)
            {
                SimpleLayer layer;
                if (layerType == LAYERTYPE.BPTT)
                {
                    layer = new BPTTLayer();
                }
                else
                {
                    layer = new LSTMLayer();
                }

                layer.Load(br);
                HiddenLayerList.Add(layer);
            }

            Logger.WriteLine("Create output layer");
            OutputLayer = new SimpleLayer();
            OutputLayer.Load(br);

            if (IsCRFTraining)
            {
                Logger.WriteLine("Loading CRF tag trans weights...");
                CRFTagTransWeights = RNNHelper.LoadMatrix(br);
            }

            sr.Close();
        }
Пример #5
0
        public override void ComputeLayerErr(SimpleLayer nextLayer)
        {
            LSTMLayer layer = nextLayer as LSTMLayer;

            if (layer != null)
            {
                Parallel.For(0, LayerSize, parallelOption, i =>
                {
                    er[i] = 0.0;
                    for (int k = 0; k < nextLayer.LayerSize; k++)
                    {
                        er[i] += layer.er[k] * layer.feature2hidden[k][i].W;
                    }
                });
            }
            else
            {
                base.ComputeLayerErr(nextLayer);
            }
        }
Пример #6
0
        public override void ShallowCopyWeightTo(SimpleLayer destLayer)
        {
            LSTMLayer layer = destLayer as LSTMLayer;

            layer.SparseFeatureSize = SparseFeatureSize;
            layer.DenseFeatureSize  = DenseFeatureSize;

            layer.sparseFeatureWeights      = sparseFeatureWeights;
            layer.sparseFeatureWeightsDelta = sparseFeatureWeightsDelta;
            layer.sparseFeatureLearningRate = sparseFeatureLearningRate;

            layer.wDenseCellGate   = wDenseCellGate.CloneSharedWeights();
            layer.wDenseForgetGate = wDenseForgetGate.CloneSharedWeights();
            layer.wDenseInputGate  = wDenseInputGate.CloneSharedWeights();
            layer.wDenseOutputGate = wDenseOutputGate.CloneSharedWeights();

            layer.CellWeights   = CellWeights;
            layer.cellDelta     = cellDelta;
            layer.peepholeDelta = peepholeDelta;

            layer.InitializeInternalTrainingParameters();
        }
Пример #7
0
        public override void ShallowCopyWeightTo(SimpleLayer destLayer)
        {
            LSTMLayer layer = destLayer as LSTMLayer;

            layer.SparseFeatureSize = SparseFeatureSize;
            layer.DenseFeatureSize  = DenseFeatureSize;

            layer.sparseFeatureWeights = sparseFeatureWeights;
            layer.sparseFeatureToHiddenLearningRate = sparseFeatureToHiddenLearningRate;

            layer.wDenseCellGate   = wDenseCellGate.CloneSharedWeights();
            layer.wDenseForgetGate = wDenseForgetGate.CloneSharedWeights();
            layer.wDenseInputGate  = wDenseInputGate.CloneSharedWeights();
            layer.wDenseOutputGate = wDenseOutputGate.CloneSharedWeights();

            layer.CellWeights = CellWeights;

            layer.lockerDenseFeature  = lockerDenseFeature;
            layer.lockerSparseFeature = lockerSparseFeature;
            layer.cellLockers         = cellLockers;

            layer.InitializeInternalTrainingParameters();
        }
Пример #8
0
        public override void ComputeLayerErr(SimpleLayer nextLayer, double[] destErrLayer, double[] srcErrLayer)
        {
            LSTMLayer layer = nextLayer as LSTMLayer;

            if (layer != null)
            {
                Parallel.For(0, LayerSize, parallelOption, i =>
                {
                    destErrLayer[i] = 0.0;
                    if (mask[i] == false)
                    {
                        for (int k = 0; k < nextLayer.LayerSize; k++)
                        {
                            destErrLayer[i] += srcErrLayer[k] * layer.feature2hidden[k][i].W;
                        }
                    }
                });
            }
            else
            {
                base.ComputeLayerErr(nextLayer, destErrLayer, srcErrLayer);
            }
        }
Пример #9
0
        private RNN <T> CreateNetwork()
        {
            RNN <T> rnn;

            if (modelDirection == MODELDIRECTION.Forward)
            {
                var sparseFeatureSize = TrainingSet.SparseFeatureSize;
                if (ModelType == MODELTYPE.Seq2Seq)
                {
                    //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence]
                    sparseFeatureSize += featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize;
                    Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}",
                                     TrainingSet.SparseFeatureSize, featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize,
                                     sparseFeatureSize);
                }

                var hiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < hiddenLayersConfig.Count; i++)
                {
                    SimpleLayer layer = null;
                    switch (hiddenLayersConfig[i].LayerType)
                    {
                    case LayerType.BPTT:
                        var bpttLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        layer = bpttLayer;
                        Logger.WriteLine("Create BPTT layer.");
                        break;

                    case LayerType.LSTM:
                        var lstmLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        layer = lstmLayer;
                        Logger.WriteLine("Create LSTM layer.");
                        break;

                    case LayerType.DropOut:
                        var dropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);
                        layer = dropoutLayer;
                        Logger.WriteLine("Create Dropout layer.");
                        break;
                    }

                    layer.InitializeWeights(sparseFeatureSize,
                                            i == 0
                            ? GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize)
                            : GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize));

                    Logger.WriteLine(
                        $"Create hidden layer {i}: size = {layer.LayerSize}, sparse feature size = {layer.SparseFeatureSize}, dense feature size = {layer.DenseFeatureSize}");
                    hiddenLayers.Add(layer);
                }

                SimpleLayer outputLayer = null;
                outputLayerConfig.LayerSize = TrainingSet.TagSize;

                switch (outputLayerConfig.LayerType)
                {
                case LayerType.NCESoftmax:
                    Logger.WriteLine("Create NCESoftmax layer as output layer");
                    var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig);
                    nceOutputLayer.InitializeWeights(0,
                                                     GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    outputLayer = nceOutputLayer;
                    break;

                case LayerType.Softmax:
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(outputLayerConfig);
                    outputLayer.InitializeWeights(sparseFeatureSize,
                                                  GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    break;
                }

                rnn = new ForwardRNN <T>(hiddenLayers, outputLayer);
            }
            else
            {
                var forwardHiddenLayers  = new List <SimpleLayer>();
                var backwardHiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < hiddenLayersConfig.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    switch (hiddenLayersConfig[i].LayerType)
                    {
                    case LayerType.BPTT:
                        //For BPTT layer
                        var forwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        forwardLayer = forwardBPTTLayer;

                        var backwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig);
                        backwardLayer = backwardBPTTLayer;

                        Logger.WriteLine("Create BPTT layer.");
                        break;

                    case LayerType.LSTM:
                        //For LSTM layer
                        var forwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        forwardLayer = forwardLSTMLayer;

                        var backwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig);
                        backwardLayer = backwardLSTMLayer;

                        Logger.WriteLine("Create LSTM layer.");
                        break;

                    case LayerType.DropOut:
                        var forwardDropoutLayer  = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);
                        var backwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig);

                        Logger.WriteLine("Create Dropout layer.");
                        break;
                    }

                    if (i == 0)
                    {
                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                    }
                    else
                    {
                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                       forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                        backwardHiddenLayers[i - 1].LayerSize);
                    }

                    Logger.WriteLine(
                        $"Create hidden layer {i}: size = {forwardLayer.LayerSize}, sparse feature size = {forwardLayer.SparseFeatureSize}, dense feature size = {forwardLayer.DenseFeatureSize}");

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                SimpleLayer outputLayer = null;
                outputLayerConfig.LayerSize = TrainingSet.TagSize;
                switch (outputLayerConfig.LayerType)
                {
                case LayerType.NCESoftmax:
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                    break;

                case LayerType.Softmax:
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(outputLayerConfig);
                    outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize,
                                                  forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    break;
                }

                rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            return(rnn);
        }
Пример #10
0
        public override void LoadModel(string filename)
        {
            Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename);

            using (StreamReader sr = new StreamReader(filename))
            {
                BinaryReader br = new BinaryReader(sr.BaseStream);

                int modelType = br.ReadInt32();
                ModelDirection = (MODELDIRECTION)br.ReadInt32();

                int iflag = br.ReadInt32();
                if (iflag == 1)
                {
                    IsCRFTraining = true;
                }
                else
                {
                    IsCRFTraining = false;
                }

                int layerSize = br.ReadInt32();

                //Load forward layers from file
                forwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer = null;
                    if (modelType == 0)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Crate LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    forwardHiddenLayers.Add(layer);
                }

                //Load backward layers from file
                backwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer = null;
                    if (modelType == 0)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Crate LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    backwardHiddenLayers.Add(layer);
                }

                OutputLayer = new SimpleLayer();
                OutputLayer.Load(br);

                if (iflag == 1)
                {
                    Logger.WriteLine("Loading CRF tag trans weights...");
                    CRFTagTransWeights = RNNHelper.LoadMatrix(br);
                }
            }
        }
Пример #11
0
        public void Train()
        {
            RNN <T> rnn;

            if (ModelSettings.ModelDirection == 0)
            {
                int sparseFeatureSize = TrainingSet.SparseFeatureSize;
                if (ModelSettings.IsSeq2SeqTraining)
                {
                    //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence]
                    sparseFeatureSize += Featurizer.AutoEncoder.Featurizer.SparseFeatureSize;
                    Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}",
                                     TrainingSet.SparseFeatureSize, Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, sparseFeatureSize);
                }


                List <SimpleLayer> hiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer layer = null;
                    if (ModelSettings.ModelType == LayerType.BPTT)
                    {
                        BPTTLayer bpttLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        layer = bpttLayer;
                    }
                    else if (ModelSettings.ModelType == LayerType.LSTM)
                    {
                        LSTMLayer lstmLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        layer = lstmLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, TrainingSet.DenseFeatureSize);

                        if (ModelSettings.IsSeq2SeqTraining)
                        {
                            Logger.WriteLine("For seq2seq training, we have {0} sprase feature and {1} dense feature from source sequence.",
                                             Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, Featurizer.AutoEncoder.GetTopHiddenLayerSize());
                        }

                        layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize));
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, hiddenLayers[i - 1].LayerSize);

                        layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize));
                    }
                    hiddenLayers.Add(layer);
                }

                if (ModelSettings.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layer");

                    DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, ModelSettings);
                    dropoutLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    hiddenLayers.Add(dropoutLayer);
                }

                SimpleLayer outputLayer;
                if (ModelSettings.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings);
                    nceOutputLayer.BuildStatisticData <T>(TrainingSet);
                    nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                    outputLayer = nceOutputLayer;
                }
                else if (ModelSettings.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize));
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString()));
                }

                rnn = new ForwardRNN <T>(hiddenLayers, outputLayer);
            }
            else
            {
                List <SimpleLayer> forwardHiddenLayers  = new List <SimpleLayer>();
                List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    if (ModelSettings.ModelType == LayerType.BPTT)
                    {
                        //For BPTT layer
                        BPTTLayer forwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        forwardLayer = forwardBPTTLayer;

                        BPTTLayer backwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        backwardLayer = backwardBPTTLayer;
                    }
                    else if (ModelSettings.ModelType == LayerType.LSTM)
                    {
                        //For LSTM layer
                        LSTMLayer forwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        forwardLayer = forwardLSTMLayer;

                        LSTMLayer backwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings);
                        backwardLayer = backwardLSTMLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);

                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize);
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize);

                        forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize);
                    }

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                if (ModelSettings.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layers");
                    DropoutLayer forwardDropoutLayer  = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, ModelSettings);
                    DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, ModelSettings);

                    forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize);

                    forwardHiddenLayers.Add(forwardDropoutLayer);
                    backwardHiddenLayers.Add(backwardDropoutLayer);
                }

                SimpleLayer outputLayer;
                if (ModelSettings.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings);
                    nceOutputLayer.BuildStatisticData <T>(TrainingSet);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (ModelSettings.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString()));
                }

                rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            rnn.ModelDirection = (MODELDIRECTION)ModelSettings.ModelDirection;
            rnn.bVQ            = (ModelSettings.VQ != 0) ? true : false;
            rnn.ModelFile      = ModelSettings.ModelFile;
            rnn.SaveStep       = ModelSettings.SaveStep;
            rnn.MaxIter        = ModelSettings.MaxIteration;
            rnn.IsCRFTraining  = ModelSettings.IsCRFTraining;
            rnn.ModelType      = ModelSettings.IsSeq2SeqTraining ? MODELTYPE.SEQ2SEQ : MODELTYPE.SEQLABEL;

            if (rnn.ModelDirection == MODELDIRECTION.BI_DIRECTIONAL && rnn.ModelType == MODELTYPE.SEQ2SEQ)
            {
                throw new System.Exception("Bi-directional RNN model doesn't support sequence-to-sequence model.");
            }

            RNNHelper.LearningRate   = ModelSettings.LearningRate;
            RNNHelper.GradientCutoff = ModelSettings.GradientCutoff;
            RNNHelper.IsConstAlpha   = ModelSettings.IsConstAlpha;

            //Create tag-bigram transition probability matrix only for sequence RNN mode
            if (ModelSettings.IsCRFTraining)
            {
                rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
            }

            Logger.WriteLine("");

            Logger.WriteLine("Iterative training begins ...");
            double lastPPL   = double.MaxValue;
            double lastAlpha = RNNHelper.LearningRate;
            int    iter      = 0;

            while (true)
            {
                Logger.WriteLine("Cleaning training status...");
                rnn.CleanStatus();

                if (rnn.MaxIter > 0 && iter > rnn.MaxIter)
                {
                    Logger.WriteLine("We have trained this model {0} iteration, exit.");
                    break;
                }

                //Start to train model
                double ppl = rnn.TrainNet(TrainingSet, iter);
                if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate)
                {
                    //Although we reduce alpha value, we still cannot get better result.
                    Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
                    Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha);
                    break;
                }
                lastAlpha = RNNHelper.LearningRate;

                //Validate the model by validated corpus
                if (ValidationSet != null)
                {
                    Logger.WriteLine("Verify model on validated corpus.");
                    if (rnn.ValidateNet(ValidationSet, iter) == true)
                    {
                        //We got better result on validated corpus, save this model
                        Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile);
                        rnn.SaveModel(ModelSettings.ModelFile);
                    }
                }
                else if (ppl < lastPPL)
                {
                    //We don't have validate corpus, but we get a better result on training corpus
                    //We got better result on validated corpus, save this model
                    Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile);
                    rnn.SaveModel(ModelSettings.ModelFile);
                }

                if (ppl >= lastPPL)
                {
                    //We cannot get a better result on training corpus, so reduce learning rate
                    RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f;
                }

                lastPPL = ppl;

                iter++;
            }
        }
Пример #12
0
        public void Train()
        {
            RNN rnn;

            if (m_modelSetting.ModelDirection == 0)
            {
                List <SimpleLayer> hiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer layer = null;
                    if (m_modelSetting.ModelType == LayerType.BPTT)
                    {
                        BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        layer = bpttLayer;
                    }
                    else if (m_modelSetting.ModelType == LayerType.LSTM)
                    {
                        LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        layer = lstmLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize);

                        layer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize);
                    }
                    hiddenLayers.Add(layer);
                }


                if (m_modelSetting.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layer");
                    DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, m_modelSetting);
                    dropoutLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                    hiddenLayers.Add(dropoutLayer);
                }

                SimpleLayer outputLayer;
                if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting);
                    nceOutputLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (m_modelSetting.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[hiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString()));
                }

                rnn = new ForwardRNN(hiddenLayers, outputLayer);
            }
            else
            {
                List <SimpleLayer> forwardHiddenLayers  = new List <SimpleLayer>();
                List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>();
                for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++)
                {
                    SimpleLayer forwardLayer  = null;
                    SimpleLayer backwardLayer = null;
                    if (m_modelSetting.ModelType == LayerType.BPTT)
                    {
                        //For BPTT layer
                        BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        forwardLayer = forwardBPTTLayer;

                        BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        backwardLayer = backwardBPTTLayer;
                    }
                    else if (m_modelSetting.ModelType == LayerType.LSTM)
                    {
                        //For LSTM layer
                        LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        forwardLayer = forwardLSTMLayer;

                        LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting);
                        backwardLayer = backwardLSTMLayer;
                    }
                    else
                    {
                        throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString()));
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                        backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                                         i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize);

                        forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), backwardHiddenLayers[i - 1].LayerSize);
                    }

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                if (m_modelSetting.Dropout > 0)
                {
                    Logger.WriteLine("Adding dropout layers");
                    DropoutLayer forwardDropoutLayer  = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, m_modelSetting);
                    DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, m_modelSetting);

                    forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize);

                    forwardHiddenLayers.Add(forwardDropoutLayer);
                    backwardHiddenLayers.Add(backwardDropoutLayer);
                }

                SimpleLayer outputLayer;
                if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax)
                {
                    Logger.WriteLine("Create NCESoftmax layer as output layer.");
                    NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting);
                    nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                    outputLayer = nceOutputLayer;
                }
                else if (m_modelSetting.OutputLayerType == LayerType.Softmax)
                {
                    Logger.WriteLine("Create Softmax layer as output layer.");
                    outputLayer = new SimpleLayer(TrainingSet.TagSize);
                    outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize);
                }
                else
                {
                    throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString()));
                }

                rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, outputLayer);
            }

            rnn.ModelDirection       = (MODELDIRECTION)m_modelSetting.ModelDirection;
            rnn.bVQ                  = (m_modelSetting.VQ != 0) ? true : false;
            rnn.ModelFile            = m_modelSetting.ModelFile;
            rnn.SaveStep             = m_modelSetting.SaveStep;
            rnn.MaxIter              = m_modelSetting.MaxIteration;
            rnn.IsCRFTraining        = m_modelSetting.IsCRFTraining;
            RNNHelper.LearningRate   = m_modelSetting.LearningRate;
            RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff;

            //Create tag-bigram transition probability matrix only for sequence RNN mode
            if (m_modelSetting.IsCRFTraining)
            {
                rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
            }

            Logger.WriteLine("");

            Logger.WriteLine("Iterative training begins ...");
            double lastPPL   = double.MaxValue;
            double lastAlpha = RNNHelper.LearningRate;
            int    iter      = 0;

            while (true)
            {
                Logger.WriteLine("Cleaning training status...");
                rnn.CleanStatus();

                if (rnn.MaxIter > 0 && iter > rnn.MaxIter)
                {
                    Logger.WriteLine("We have trained this model {0} iteration, exit.");
                    break;
                }

                //Start to train model
                double ppl = rnn.TrainNet(TrainingSet, iter);
                if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate)
                {
                    //Although we reduce alpha value, we still cannot get better result.
                    Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
                    Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha);
                    break;
                }
                lastAlpha = RNNHelper.LearningRate;

                //Validate the model by validated corpus
                if (ValidationSet != null)
                {
                    Logger.WriteLine("Verify model on validated corpus.");
                    if (rnn.ValidateNet(ValidationSet, iter) == true)
                    {
                        //We got better result on validated corpus, save this model
                        Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                        rnn.SaveModel(m_modelSetting.ModelFile);
                    }
                }
                else if (ppl < lastPPL)
                {
                    //We don't have validate corpus, but we get a better result on training corpus
                    //We got better result on validated corpus, save this model
                    Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                    rnn.SaveModel(m_modelSetting.ModelFile);
                }

                if (ppl >= lastPPL)
                {
                    //We cannot get a better result on training corpus, so reduce learning rate
                    RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f;
                }

                lastPPL = ppl;

                iter++;
            }
        }
Пример #13
0
        public override void LoadModel(string filename)
        {
            Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename);

            using (StreamReader sr = new StreamReader(filename))
            {
                BinaryReader br = new BinaryReader(sr.BaseStream);

                int modelType = br.ReadInt32();
                ModelDirection = (MODELDIRECTION)br.ReadInt32();

                int iflag = br.ReadInt32();
                if (iflag == 1)
                {
                    IsCRFTraining = true;
                }
                else
                {
                    IsCRFTraining = false;
                }

                int layerSize = br.ReadInt32();

                //Load forward layers from file
                forwardHiddenLayers = new List<SimpleLayer>();
                for (int i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer = null;
                    if (modelType == 0)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Crate LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    forwardHiddenLayers.Add(layer);
                }

                //Load backward layers from file
                backwardHiddenLayers = new List<SimpleLayer>();
                for (int i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer = null;
                    if (modelType == 0)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Crate LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    backwardHiddenLayers.Add(layer);
                }

                OutputLayer = new SimpleLayer();
                OutputLayer.Load(br);

                if (iflag == 1)
                {
                    Logger.WriteLine("Loading CRF tag trans weights...");
                    CRFTagTransWeights = RNNHelper.LoadMatrix(br);
                }
            }
        }
Пример #14
0
        public void Train()
        {
            RNN rnn;

            if (m_modelSetting.ModelDirection == 0)
            {
                List<SimpleLayer> hiddenLayers = new List<SimpleLayer>();
                for (int i = 0; i < m_modelSetting.NumHidden.Count; i++)
                {
                    SimpleLayer layer = null;
                    if (m_modelSetting.ModelType == 0)
                    {
                        BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.NumHidden[i]);
                        bpttLayer.bptt = m_modelSetting.Bptt + 1;
                        bpttLayer.bptt_block = 10;
                        bpttLayer.Dropout = m_modelSetting.Dropout;
                        layer = bpttLayer;
                    }
                    else
                    {
                        LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.NumHidden[i]);
                        lstmLayer.Dropout = m_modelSetting.Dropout;
                        layer = lstmLayer;
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                            i, m_modelSetting.NumHidden[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                            i, m_modelSetting.NumHidden[i], 0, hiddenLayers[i - 1].LayerSize);

                        layer.InitializeWeights(0, hiddenLayers[i - 1].LayerSize);
                    }
                    hiddenLayers.Add(layer);
                }

                rnn = new ForwardRNN(hiddenLayers, TrainingSet.TagSize);
            }
            else
            {
                List<SimpleLayer> forwardHiddenLayers = new List<SimpleLayer>();
                List<SimpleLayer> backwardHiddenLayers = new List<SimpleLayer>();
                for (int i = 0; i < m_modelSetting.NumHidden.Count; i++)
                {
                    SimpleLayer forwardLayer = null;
                    SimpleLayer backwardLayer = null;
                    if (m_modelSetting.ModelType == 0)
                    {
                        //For BPTT layer
                        BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.NumHidden[i]);
                        forwardBPTTLayer.bptt = m_modelSetting.Bptt + 1;
                        forwardBPTTLayer.bptt_block = 10;
                        forwardBPTTLayer.Dropout = m_modelSetting.Dropout;
                        forwardLayer = forwardBPTTLayer;

                        BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.NumHidden[i]);
                        backwardBPTTLayer.bptt = m_modelSetting.Bptt + 1;
                        backwardBPTTLayer.bptt_block = 10;
                        backwardBPTTLayer.Dropout = m_modelSetting.Dropout;
                        backwardLayer = backwardBPTTLayer;
                    }
                    else
                    {
                        //For LSTM layer
                        LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.NumHidden[i]);
                        forwardLSTMLayer.Dropout = m_modelSetting.Dropout;
                        forwardLayer = forwardLSTMLayer;

                        LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.NumHidden[i]);
                        backwardLSTMLayer.Dropout = m_modelSetting.Dropout;
                        backwardLayer = backwardLSTMLayer;
                    }

                    if (i == 0)
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                            i, m_modelSetting.NumHidden[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());

                        forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                        backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize());
                    }
                    else
                    {
                        Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}",
                            i, m_modelSetting.NumHidden[i], 0, forwardHiddenLayers[i - 1].LayerSize);

                        forwardLayer.InitializeWeights(0, forwardHiddenLayers[i - 1].LayerSize);
                        backwardLayer.InitializeWeights(0, backwardHiddenLayers[i - 1].LayerSize);
                    }

                    forwardHiddenLayers.Add(forwardLayer);
                    backwardHiddenLayers.Add(backwardLayer);
                }

                rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, TrainingSet.TagSize);
            }

            rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection;
            rnn.bVQ = (m_modelSetting.VQ != 0) ? true : false;
            rnn.ModelFile = m_modelSetting.ModelFile;
            rnn.SaveStep = m_modelSetting.SaveStep;
            rnn.MaxIter = m_modelSetting.MaxIteration;
            rnn.IsCRFTraining = m_modelSetting.IsCRFTraining;
            RNNHelper.LearningRate = m_modelSetting.LearningRate;
            RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff;

            //Create tag-bigram transition probability matrix only for sequence RNN mode
            if (m_modelSetting.IsCRFTraining)
            {
                rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition);
            }

            Logger.WriteLine("");

            Logger.WriteLine("Iterative training begins ...");
            double lastPPL = double.MaxValue;
            double lastAlpha = RNNHelper.LearningRate;
            int iter = 0;
            while (true)
            {
                Logger.WriteLine("Cleaning training status...");
                rnn.CleanStatus();

                if (rnn.MaxIter > 0 && iter > rnn.MaxIter)
                {
                    Logger.WriteLine("We have trained this model {0} iteration, exit.");
                    break;
                }

                //Start to train model
                double ppl = rnn.TrainNet(TrainingSet, iter);
                if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate)
                {
                    //Although we reduce alpha value, we still cannot get better result.
                    Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL);
                    Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha);
                    break;
                }
                lastAlpha = RNNHelper.LearningRate;

                //Validate the model by validated corpus
                if (ValidationSet != null)
                {
                    Logger.WriteLine("Verify model on validated corpus.");
                    if (rnn.ValidateNet(ValidationSet, iter) == true)
                    {
                        //We got better result on validated corpus, save this model
                        Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                        rnn.SaveModel(m_modelSetting.ModelFile);
                    }
                }
                else if (ppl < lastPPL)
                {
                    //We don't have validate corpus, but we get a better result on training corpus
                    //We got better result on validated corpus, save this model
                    Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile);
                    rnn.SaveModel(m_modelSetting.ModelFile);
                }

                if (ppl >= lastPPL)
                {
                    //We cannot get a better result on training corpus, so reduce learning rate
                    RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f;
                }

                lastPPL = ppl;

                iter++;
            }
        }
Пример #15
0
        public override void LoadModel(string filename)
        {
            Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename);

            using (var sr = new StreamReader(filename))
            {
                var br = new BinaryReader(sr.BaseStream);

                var layerType = (LAYERTYPE)br.ReadInt32();
                IsCRFTraining = br.ReadBoolean();

                var layerSize = br.ReadInt32();
                //Load forward layers from file
                forwardHiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer;
                    if (layerType == LAYERTYPE.BPTT)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Create LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    forwardHiddenLayers.Add(layer);
                }

                //Load backward layers from file
                backwardHiddenLayers = new List <SimpleLayer>();
                for (var i = 0; i < layerSize; i++)
                {
                    SimpleLayer layer;
                    if (layerType == LAYERTYPE.BPTT)
                    {
                        Logger.WriteLine("Create BPTT hidden layer");
                        layer = new BPTTLayer();
                    }
                    else
                    {
                        Logger.WriteLine("Create LSTM hidden layer");
                        layer = new LSTMLayer();
                    }

                    layer.Load(br);
                    backwardHiddenLayers.Add(layer);
                }

                Logger.WriteLine("Create output layer");
                OutputLayer = new SimpleLayer();
                OutputLayer.Load(br);

                if (IsCRFTraining)
                {
                    Logger.WriteLine("Loading CRF tag trans weights...");
                    CRFTagTransWeights = RNNHelper.LoadMatrix(br);
                }
            }
        }