public static LSTMLayer Load(BinaryReader br, LayerType layerType) { LSTMLayerConfig config = new LSTMLayerConfig(); config.LayerSize = br.ReadInt32(); config.LayerType = layerType; LSTMLayer layer = new LSTMLayer(config); layer.SparseFeatureSize = br.ReadInt32(); layer.DenseFeatureSize = br.ReadInt32(); //Create cells of each layer layer.InitializeCellWeights(br); //Load weight matrix between each two layer pairs //weight input->hidden if (layer.SparseFeatureSize > 0) { Logger.WriteLine("Loading sparse feature weights..."); layer.sparseFeatureWeights = LoadLSTMWeights(br); } if (layer.DenseFeatureSize > 0) { //weight fea->hidden Logger.WriteLine("Loading dense feature weights..."); layer.wDenseInputGate = LoadLSTMGateWeights(br); layer.wDenseCellGate = LoadLSTMGateWeights(br); layer.wDenseForgetGate = LoadLSTMGateWeights(br); layer.wDenseOutputGate = LoadLSTMGateWeights(br); } return(layer); }
public override SimpleLayer CreateLayerSharedWegiths() { LSTMLayer layer = new LSTMLayer(config); ShallowCopyWeightTo(layer); return(layer); }
public override void LoadModel(string filename) { Logger.WriteLine("Loading SimpleRNN model: {0}", filename); StreamReader sr = new StreamReader(filename); BinaryReader br = new BinaryReader(sr.BaseStream); int modelType = br.ReadInt32(); ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { IsCRFTraining = true; } else { IsCRFTraining = false; } //Create cells of each layer int layerSize = br.ReadInt32(); HiddenLayerList = new List <SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { layer = new BPTTLayer(); } else { layer = new LSTMLayer(); } layer.Load(br); HiddenLayerList.Add(layer); } OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (iflag == 1) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } sr.Close(); }
public override void LoadModel(string filename) { Logger.WriteLine("Loading SimpleRNN model: {0}", filename); var sr = new StreamReader(filename); var br = new BinaryReader(sr.BaseStream); var layerType = (LAYERTYPE)br.ReadInt32(); IsCRFTraining = br.ReadBoolean(); //Create cells of each layer var layerSize = br.ReadInt32(); HiddenLayerList = new List <SimpleLayer>(); for (var i = 0; i < layerSize; i++) { SimpleLayer layer; if (layerType == LAYERTYPE.BPTT) { layer = new BPTTLayer(); } else { layer = new LSTMLayer(); } layer.Load(br); HiddenLayerList.Add(layer); } Logger.WriteLine("Create output layer"); OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (IsCRFTraining) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } sr.Close(); }
public override void ComputeLayerErr(SimpleLayer nextLayer) { LSTMLayer layer = nextLayer as LSTMLayer; if (layer != null) { Parallel.For(0, LayerSize, parallelOption, i => { er[i] = 0.0; for (int k = 0; k < nextLayer.LayerSize; k++) { er[i] += layer.er[k] * layer.feature2hidden[k][i].W; } }); } else { base.ComputeLayerErr(nextLayer); } }
public override void ShallowCopyWeightTo(SimpleLayer destLayer) { LSTMLayer layer = destLayer as LSTMLayer; layer.SparseFeatureSize = SparseFeatureSize; layer.DenseFeatureSize = DenseFeatureSize; layer.sparseFeatureWeights = sparseFeatureWeights; layer.sparseFeatureWeightsDelta = sparseFeatureWeightsDelta; layer.sparseFeatureLearningRate = sparseFeatureLearningRate; layer.wDenseCellGate = wDenseCellGate.CloneSharedWeights(); layer.wDenseForgetGate = wDenseForgetGate.CloneSharedWeights(); layer.wDenseInputGate = wDenseInputGate.CloneSharedWeights(); layer.wDenseOutputGate = wDenseOutputGate.CloneSharedWeights(); layer.CellWeights = CellWeights; layer.cellDelta = cellDelta; layer.peepholeDelta = peepholeDelta; layer.InitializeInternalTrainingParameters(); }
public override void ShallowCopyWeightTo(SimpleLayer destLayer) { LSTMLayer layer = destLayer as LSTMLayer; layer.SparseFeatureSize = SparseFeatureSize; layer.DenseFeatureSize = DenseFeatureSize; layer.sparseFeatureWeights = sparseFeatureWeights; layer.sparseFeatureToHiddenLearningRate = sparseFeatureToHiddenLearningRate; layer.wDenseCellGate = wDenseCellGate.CloneSharedWeights(); layer.wDenseForgetGate = wDenseForgetGate.CloneSharedWeights(); layer.wDenseInputGate = wDenseInputGate.CloneSharedWeights(); layer.wDenseOutputGate = wDenseOutputGate.CloneSharedWeights(); layer.CellWeights = CellWeights; layer.lockerDenseFeature = lockerDenseFeature; layer.lockerSparseFeature = lockerSparseFeature; layer.cellLockers = cellLockers; layer.InitializeInternalTrainingParameters(); }
public override void ComputeLayerErr(SimpleLayer nextLayer, double[] destErrLayer, double[] srcErrLayer) { LSTMLayer layer = nextLayer as LSTMLayer; if (layer != null) { Parallel.For(0, LayerSize, parallelOption, i => { destErrLayer[i] = 0.0; if (mask[i] == false) { for (int k = 0; k < nextLayer.LayerSize; k++) { destErrLayer[i] += srcErrLayer[k] * layer.feature2hidden[k][i].W; } } }); } else { base.ComputeLayerErr(nextLayer, destErrLayer, srcErrLayer); } }
private RNN <T> CreateNetwork() { RNN <T> rnn; if (modelDirection == MODELDIRECTION.Forward) { var sparseFeatureSize = TrainingSet.SparseFeatureSize; if (ModelType == MODELTYPE.Seq2Seq) { //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence] sparseFeatureSize += featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize; Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}", TrainingSet.SparseFeatureSize, featurizer.Seq2SeqAutoEncoder.Config.SparseFeatureSize, sparseFeatureSize); } var hiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < hiddenLayersConfig.Count; i++) { SimpleLayer layer = null; switch (hiddenLayersConfig[i].LayerType) { case LayerType.BPTT: var bpttLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); layer = bpttLayer; Logger.WriteLine("Create BPTT layer."); break; case LayerType.LSTM: var lstmLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); layer = lstmLayer; Logger.WriteLine("Create LSTM layer."); break; case LayerType.DropOut: var dropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); layer = dropoutLayer; Logger.WriteLine("Create Dropout layer."); break; } layer.InitializeWeights(sparseFeatureSize, i == 0 ? GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize) : GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize)); Logger.WriteLine( $"Create hidden layer {i}: size = {layer.LayerSize}, sparse feature size = {layer.SparseFeatureSize}, dense feature size = {layer.DenseFeatureSize}"); hiddenLayers.Add(layer); } SimpleLayer outputLayer = null; outputLayerConfig.LayerSize = TrainingSet.TagSize; switch (outputLayerConfig.LayerType) { case LayerType.NCESoftmax: Logger.WriteLine("Create NCESoftmax layer as output layer"); var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig); nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); outputLayer = nceOutputLayer; break; case LayerType.Softmax: Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(outputLayerConfig); outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); break; } rnn = new ForwardRNN <T>(hiddenLayers, outputLayer); } else { var forwardHiddenLayers = new List <SimpleLayer>(); var backwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < hiddenLayersConfig.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; switch (hiddenLayersConfig[i].LayerType) { case LayerType.BPTT: //For BPTT layer var forwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); forwardLayer = forwardBPTTLayer; var backwardBPTTLayer = new BPTTLayer(hiddenLayersConfig[i] as BPTTLayerConfig); backwardLayer = backwardBPTTLayer; Logger.WriteLine("Create BPTT layer."); break; case LayerType.LSTM: //For LSTM layer var forwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); forwardLayer = forwardLSTMLayer; var backwardLSTMLayer = new LSTMLayer(hiddenLayersConfig[i] as LSTMLayerConfig); backwardLayer = backwardLSTMLayer; Logger.WriteLine("Create LSTM layer."); break; case LayerType.DropOut: var forwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); var backwardDropoutLayer = new DropoutLayer(hiddenLayersConfig[i] as DropoutLayerConfig); Logger.WriteLine("Create Dropout layer."); break; } if (i == 0) { forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); } else { forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize); } Logger.WriteLine( $"Create hidden layer {i}: size = {forwardLayer.LayerSize}, sparse feature size = {forwardLayer.SparseFeatureSize}, dense feature size = {forwardLayer.DenseFeatureSize}"); forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } SimpleLayer outputLayer = null; outputLayerConfig.LayerSize = TrainingSet.TagSize; switch (outputLayerConfig.LayerType) { case LayerType.NCESoftmax: Logger.WriteLine("Create NCESoftmax layer as output layer."); var nceOutputLayer = new NCEOutputLayer(outputLayerConfig as NCELayerConfig); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; break; case LayerType.Softmax: Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(outputLayerConfig); outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); break; } rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } return(rnn); }
public override void LoadModel(string filename) { Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename); using (StreamReader sr = new StreamReader(filename)) { BinaryReader br = new BinaryReader(sr.BaseStream); int modelType = br.ReadInt32(); ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { IsCRFTraining = true; } else { IsCRFTraining = false; } int layerSize = br.ReadInt32(); //Load forward layers from file forwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); forwardHiddenLayers.Add(layer); } //Load backward layers from file backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); backwardHiddenLayers.Add(layer); } OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (iflag == 1) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } } }
public void Train() { RNN <T> rnn; if (ModelSettings.ModelDirection == 0) { int sparseFeatureSize = TrainingSet.SparseFeatureSize; if (ModelSettings.IsSeq2SeqTraining) { //[Sparse feature set of each state in target sequence][Sparse feature set of entire source sequence] sparseFeatureSize += Featurizer.AutoEncoder.Featurizer.SparseFeatureSize; Logger.WriteLine("Sparse Feature Format: [{0}][{1}] = {2}", TrainingSet.SparseFeatureSize, Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, sparseFeatureSize); } List <SimpleLayer> hiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++) { SimpleLayer layer = null; if (ModelSettings.ModelType == LayerType.BPTT) { BPTTLayer bpttLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); layer = bpttLayer; } else if (ModelSettings.ModelType == LayerType.LSTM) { LSTMLayer lstmLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); layer = lstmLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, TrainingSet.DenseFeatureSize); if (ModelSettings.IsSeq2SeqTraining) { Logger.WriteLine("For seq2seq training, we have {0} sprase feature and {1} dense feature from source sequence.", Featurizer.AutoEncoder.Featurizer.SparseFeatureSize, Featurizer.AutoEncoder.GetTopHiddenLayerSize()); } layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(TrainingSet.DenseFeatureSize)); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], sparseFeatureSize, hiddenLayers[i - 1].LayerSize); layer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[i - 1].LayerSize)); } hiddenLayers.Add(layer); } if (ModelSettings.Dropout > 0) { Logger.WriteLine("Adding dropout layer"); DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, ModelSettings); dropoutLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); hiddenLayers.Add(dropoutLayer); } SimpleLayer outputLayer; if (ModelSettings.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer"); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings); nceOutputLayer.BuildStatisticData <T>(TrainingSet); nceOutputLayer.InitializeWeights(0, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); outputLayer = nceOutputLayer; } else if (ModelSettings.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(sparseFeatureSize, GetCurrentLayerDenseFeatureSize(hiddenLayers[hiddenLayers.Count - 1].LayerSize)); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString())); } rnn = new ForwardRNN <T>(hiddenLayers, outputLayer); } else { List <SimpleLayer> forwardHiddenLayers = new List <SimpleLayer>(); List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < ModelSettings.HiddenLayerSizeList.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; if (ModelSettings.ModelType == LayerType.BPTT) { //For BPTT layer BPTTLayer forwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); forwardLayer = forwardBPTTLayer; BPTTLayer backwardBPTTLayer = new BPTTLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); backwardLayer = backwardBPTTLayer; } else if (ModelSettings.ModelType == LayerType.LSTM) { //For LSTM layer LSTMLayer forwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); forwardLayer = forwardLSTMLayer; LSTMLayer backwardLSTMLayer = new LSTMLayer(ModelSettings.HiddenLayerSizeList[i], ModelSettings); backwardLayer = backwardLSTMLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", ModelSettings.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, TrainingSet.DenseFeatureSize); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, ModelSettings.HiddenLayerSizeList[i], TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); forwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.SparseFeatureSize, backwardHiddenLayers[i - 1].LayerSize); } forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } if (ModelSettings.Dropout > 0) { Logger.WriteLine("Adding dropout layers"); DropoutLayer forwardDropoutLayer = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, ModelSettings); DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, ModelSettings); forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize); forwardHiddenLayers.Add(forwardDropoutLayer); backwardHiddenLayers.Add(backwardDropoutLayer); } SimpleLayer outputLayer; if (ModelSettings.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, ModelSettings); nceOutputLayer.BuildStatisticData <T>(TrainingSet); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (ModelSettings.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.SparseFeatureSize, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", ModelSettings.OutputLayerType.ToString())); } rnn = new BiRNN <T>(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } rnn.ModelDirection = (MODELDIRECTION)ModelSettings.ModelDirection; rnn.bVQ = (ModelSettings.VQ != 0) ? true : false; rnn.ModelFile = ModelSettings.ModelFile; rnn.SaveStep = ModelSettings.SaveStep; rnn.MaxIter = ModelSettings.MaxIteration; rnn.IsCRFTraining = ModelSettings.IsCRFTraining; rnn.ModelType = ModelSettings.IsSeq2SeqTraining ? MODELTYPE.SEQ2SEQ : MODELTYPE.SEQLABEL; if (rnn.ModelDirection == MODELDIRECTION.BI_DIRECTIONAL && rnn.ModelType == MODELTYPE.SEQ2SEQ) { throw new System.Exception("Bi-directional RNN model doesn't support sequence-to-sequence model."); } RNNHelper.LearningRate = ModelSettings.LearningRate; RNNHelper.GradientCutoff = ModelSettings.GradientCutoff; RNNHelper.IsConstAlpha = ModelSettings.IsConstAlpha; //Create tag-bigram transition probability matrix only for sequence RNN mode if (ModelSettings.IsCRFTraining) { rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(""); Logger.WriteLine("Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = RNNHelper.LearningRate; int iter = 0; while (true) { Logger.WriteLine("Cleaning training status..."); rnn.CleanStatus(); if (rnn.MaxIter > 0 && iter > rnn.MaxIter) { Logger.WriteLine("We have trained this model {0} iteration, exit."); break; } //Start to train model double ppl = rnn.TrainNet(TrainingSet, iter); if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha); break; } lastAlpha = RNNHelper.LearningRate; //Validate the model by validated corpus if (ValidationSet != null) { Logger.WriteLine("Verify model on validated corpus."); if (rnn.ValidateNet(ValidationSet, iter) == true) { //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile); rnn.SaveModel(ModelSettings.ModelFile); } } else if (ppl < lastPPL) { //We don't have validate corpus, but we get a better result on training corpus //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", ModelSettings.ModelFile); rnn.SaveModel(ModelSettings.ModelFile); } if (ppl >= lastPPL) { //We cannot get a better result on training corpus, so reduce learning rate RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f; } lastPPL = ppl; iter++; } }
public void Train() { RNN rnn; if (m_modelSetting.ModelDirection == 0) { List <SimpleLayer> hiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++) { SimpleLayer layer = null; if (m_modelSetting.ModelType == LayerType.BPTT) { BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); layer = bpttLayer; } else if (m_modelSetting.ModelType == LayerType.LSTM) { LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); layer = lstmLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize); layer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[i - 1].LayerSize); } hiddenLayers.Add(layer); } if (m_modelSetting.Dropout > 0) { Logger.WriteLine("Adding dropout layer"); DropoutLayer dropoutLayer = new DropoutLayer(hiddenLayers[hiddenLayers.Count - 1].LayerSize, m_modelSetting); dropoutLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize); hiddenLayers.Add(dropoutLayer); } SimpleLayer outputLayer; if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting); nceOutputLayer.InitializeWeights(0, hiddenLayers[hiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (m_modelSetting.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), hiddenLayers[hiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString())); } rnn = new ForwardRNN(hiddenLayers, outputLayer); } else { List <SimpleLayer> forwardHiddenLayers = new List <SimpleLayer>(); List <SimpleLayer> backwardHiddenLayers = new List <SimpleLayer>(); for (int i = 0; i < m_modelSetting.HiddenLayerSizeList.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; if (m_modelSetting.ModelType == LayerType.BPTT) { //For BPTT layer BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); forwardLayer = forwardBPTTLayer; BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); backwardLayer = backwardBPTTLayer; } else if (m_modelSetting.ModelType == LayerType.LSTM) { //For LSTM layer LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); forwardLayer = forwardLSTMLayer; LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.HiddenLayerSizeList[i], m_modelSetting); backwardLayer = backwardLSTMLayer; } else { throw new System.Exception(string.Format("Invalidate hidden layer type: {0}", m_modelSetting.ModelType.ToString())); } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.HiddenLayerSizeList[i], TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize); forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), backwardHiddenLayers[i - 1].LayerSize); } forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } if (m_modelSetting.Dropout > 0) { Logger.WriteLine("Adding dropout layers"); DropoutLayer forwardDropoutLayer = new DropoutLayer(forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize, m_modelSetting); DropoutLayer backwardDropoutLayer = new DropoutLayer(backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize, m_modelSetting); forwardDropoutLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); backwardDropoutLayer.InitializeWeights(0, backwardHiddenLayers[backwardHiddenLayers.Count - 1].LayerSize); forwardHiddenLayers.Add(forwardDropoutLayer); backwardHiddenLayers.Add(backwardDropoutLayer); } SimpleLayer outputLayer; if (m_modelSetting.OutputLayerType == LayerType.NCESoftmax) { Logger.WriteLine("Create NCESoftmax layer as output layer."); NCEOutputLayer nceOutputLayer = new NCEOutputLayer(TrainingSet.TagSize, m_modelSetting); nceOutputLayer.InitializeWeights(0, forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); outputLayer = nceOutputLayer; } else if (m_modelSetting.OutputLayerType == LayerType.Softmax) { Logger.WriteLine("Create Softmax layer as output layer."); outputLayer = new SimpleLayer(TrainingSet.TagSize); outputLayer.InitializeWeights(TrainingSet.GetSparseDimension(), forwardHiddenLayers[forwardHiddenLayers.Count - 1].LayerSize); } else { throw new System.Exception(string.Format("Invalidate output layer type: {0}", m_modelSetting.OutputLayerType.ToString())); } rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, outputLayer); } rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection; rnn.bVQ = (m_modelSetting.VQ != 0) ? true : false; rnn.ModelFile = m_modelSetting.ModelFile; rnn.SaveStep = m_modelSetting.SaveStep; rnn.MaxIter = m_modelSetting.MaxIteration; rnn.IsCRFTraining = m_modelSetting.IsCRFTraining; RNNHelper.LearningRate = m_modelSetting.LearningRate; RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff; //Create tag-bigram transition probability matrix only for sequence RNN mode if (m_modelSetting.IsCRFTraining) { rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(""); Logger.WriteLine("Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = RNNHelper.LearningRate; int iter = 0; while (true) { Logger.WriteLine("Cleaning training status..."); rnn.CleanStatus(); if (rnn.MaxIter > 0 && iter > rnn.MaxIter) { Logger.WriteLine("We have trained this model {0} iteration, exit."); break; } //Start to train model double ppl = rnn.TrainNet(TrainingSet, iter); if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha); break; } lastAlpha = RNNHelper.LearningRate; //Validate the model by validated corpus if (ValidationSet != null) { Logger.WriteLine("Verify model on validated corpus."); if (rnn.ValidateNet(ValidationSet, iter) == true) { //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } } else if (ppl < lastPPL) { //We don't have validate corpus, but we get a better result on training corpus //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } if (ppl >= lastPPL) { //We cannot get a better result on training corpus, so reduce learning rate RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f; } lastPPL = ppl; iter++; } }
public override void LoadModel(string filename) { Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename); using (StreamReader sr = new StreamReader(filename)) { BinaryReader br = new BinaryReader(sr.BaseStream); int modelType = br.ReadInt32(); ModelDirection = (MODELDIRECTION)br.ReadInt32(); int iflag = br.ReadInt32(); if (iflag == 1) { IsCRFTraining = true; } else { IsCRFTraining = false; } int layerSize = br.ReadInt32(); //Load forward layers from file forwardHiddenLayers = new List<SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); forwardHiddenLayers.Add(layer); } //Load backward layers from file backwardHiddenLayers = new List<SimpleLayer>(); for (int i = 0; i < layerSize; i++) { SimpleLayer layer = null; if (modelType == 0) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Crate LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); backwardHiddenLayers.Add(layer); } OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (iflag == 1) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } } }
public void Train() { RNN rnn; if (m_modelSetting.ModelDirection == 0) { List<SimpleLayer> hiddenLayers = new List<SimpleLayer>(); for (int i = 0; i < m_modelSetting.NumHidden.Count; i++) { SimpleLayer layer = null; if (m_modelSetting.ModelType == 0) { BPTTLayer bpttLayer = new BPTTLayer(m_modelSetting.NumHidden[i]); bpttLayer.bptt = m_modelSetting.Bptt + 1; bpttLayer.bptt_block = 10; bpttLayer.Dropout = m_modelSetting.Dropout; layer = bpttLayer; } else { LSTMLayer lstmLayer = new LSTMLayer(m_modelSetting.NumHidden[i]); lstmLayer.Dropout = m_modelSetting.Dropout; layer = lstmLayer; } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.NumHidden[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); layer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.NumHidden[i], 0, hiddenLayers[i - 1].LayerSize); layer.InitializeWeights(0, hiddenLayers[i - 1].LayerSize); } hiddenLayers.Add(layer); } rnn = new ForwardRNN(hiddenLayers, TrainingSet.TagSize); } else { List<SimpleLayer> forwardHiddenLayers = new List<SimpleLayer>(); List<SimpleLayer> backwardHiddenLayers = new List<SimpleLayer>(); for (int i = 0; i < m_modelSetting.NumHidden.Count; i++) { SimpleLayer forwardLayer = null; SimpleLayer backwardLayer = null; if (m_modelSetting.ModelType == 0) { //For BPTT layer BPTTLayer forwardBPTTLayer = new BPTTLayer(m_modelSetting.NumHidden[i]); forwardBPTTLayer.bptt = m_modelSetting.Bptt + 1; forwardBPTTLayer.bptt_block = 10; forwardBPTTLayer.Dropout = m_modelSetting.Dropout; forwardLayer = forwardBPTTLayer; BPTTLayer backwardBPTTLayer = new BPTTLayer(m_modelSetting.NumHidden[i]); backwardBPTTLayer.bptt = m_modelSetting.Bptt + 1; backwardBPTTLayer.bptt_block = 10; backwardBPTTLayer.Dropout = m_modelSetting.Dropout; backwardLayer = backwardBPTTLayer; } else { //For LSTM layer LSTMLayer forwardLSTMLayer = new LSTMLayer(m_modelSetting.NumHidden[i]); forwardLSTMLayer.Dropout = m_modelSetting.Dropout; forwardLayer = forwardLSTMLayer; LSTMLayer backwardLSTMLayer = new LSTMLayer(m_modelSetting.NumHidden[i]); backwardLSTMLayer.Dropout = m_modelSetting.Dropout; backwardLayer = backwardLSTMLayer; } if (i == 0) { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.NumHidden[i], TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); forwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); backwardLayer.InitializeWeights(TrainingSet.GetSparseDimension(), TrainingSet.DenseFeatureSize()); } else { Logger.WriteLine("Create hidden layer {0}: size = {1}, sparse feature size = {2}, dense feature size = {3}", i, m_modelSetting.NumHidden[i], 0, forwardHiddenLayers[i - 1].LayerSize); forwardLayer.InitializeWeights(0, forwardHiddenLayers[i - 1].LayerSize); backwardLayer.InitializeWeights(0, backwardHiddenLayers[i - 1].LayerSize); } forwardHiddenLayers.Add(forwardLayer); backwardHiddenLayers.Add(backwardLayer); } rnn = new BiRNN(forwardHiddenLayers, backwardHiddenLayers, TrainingSet.TagSize); } rnn.ModelDirection = (MODELDIRECTION)m_modelSetting.ModelDirection; rnn.bVQ = (m_modelSetting.VQ != 0) ? true : false; rnn.ModelFile = m_modelSetting.ModelFile; rnn.SaveStep = m_modelSetting.SaveStep; rnn.MaxIter = m_modelSetting.MaxIteration; rnn.IsCRFTraining = m_modelSetting.IsCRFTraining; RNNHelper.LearningRate = m_modelSetting.LearningRate; RNNHelper.GradientCutoff = m_modelSetting.GradientCutoff; //Create tag-bigram transition probability matrix only for sequence RNN mode if (m_modelSetting.IsCRFTraining) { rnn.setTagBigramTransition(TrainingSet.CRFLabelBigramTransition); } Logger.WriteLine(""); Logger.WriteLine("Iterative training begins ..."); double lastPPL = double.MaxValue; double lastAlpha = RNNHelper.LearningRate; int iter = 0; while (true) { Logger.WriteLine("Cleaning training status..."); rnn.CleanStatus(); if (rnn.MaxIter > 0 && iter > rnn.MaxIter) { Logger.WriteLine("We have trained this model {0} iteration, exit."); break; } //Start to train model double ppl = rnn.TrainNet(TrainingSet, iter); if (ppl >= lastPPL && lastAlpha != RNNHelper.LearningRate) { //Although we reduce alpha value, we still cannot get better result. Logger.WriteLine("Current perplexity({0}) is larger than the previous one({1}). End training early.", ppl, lastPPL); Logger.WriteLine("Current alpha: {0}, the previous alpha: {1}", RNNHelper.LearningRate, lastAlpha); break; } lastAlpha = RNNHelper.LearningRate; //Validate the model by validated corpus if (ValidationSet != null) { Logger.WriteLine("Verify model on validated corpus."); if (rnn.ValidateNet(ValidationSet, iter) == true) { //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } } else if (ppl < lastPPL) { //We don't have validate corpus, but we get a better result on training corpus //We got better result on validated corpus, save this model Logger.WriteLine("Saving better model into file {0}...", m_modelSetting.ModelFile); rnn.SaveModel(m_modelSetting.ModelFile); } if (ppl >= lastPPL) { //We cannot get a better result on training corpus, so reduce learning rate RNNHelper.LearningRate = RNNHelper.LearningRate / 2.0f; } lastPPL = ppl; iter++; } }
public override void LoadModel(string filename) { Logger.WriteLine(Logger.Level.info, "Loading bi-directional model: {0}", filename); using (var sr = new StreamReader(filename)) { var br = new BinaryReader(sr.BaseStream); var layerType = (LAYERTYPE)br.ReadInt32(); IsCRFTraining = br.ReadBoolean(); var layerSize = br.ReadInt32(); //Load forward layers from file forwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < layerSize; i++) { SimpleLayer layer; if (layerType == LAYERTYPE.BPTT) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Create LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); forwardHiddenLayers.Add(layer); } //Load backward layers from file backwardHiddenLayers = new List <SimpleLayer>(); for (var i = 0; i < layerSize; i++) { SimpleLayer layer; if (layerType == LAYERTYPE.BPTT) { Logger.WriteLine("Create BPTT hidden layer"); layer = new BPTTLayer(); } else { Logger.WriteLine("Create LSTM hidden layer"); layer = new LSTMLayer(); } layer.Load(br); backwardHiddenLayers.Add(layer); } Logger.WriteLine("Create output layer"); OutputLayer = new SimpleLayer(); OutputLayer.Load(br); if (IsCRFTraining) { Logger.WriteLine("Loading CRF tag trans weights..."); CRFTagTransWeights = RNNHelper.LoadMatrix(br); } } }