/// <summary> /// Pass error from the last layer to the first layer /// </summary> /// <param name="pSequence"></param> /// <param name="seqFinalOutput"></param> /// <returns></returns> protected override void ComputeDeepErr(Sequence pSequence) { var numStates = pSequence.States.Length; var numLayers = forwardHiddenLayers.Count; //Calculate output layer error for (var curState = 0; curState < numStates; curState++) { OutputLayer.Cells = OutputCells[curState].Cells; OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeOutputLoss(CRFSeqOutput, pSequence.States[curState], curState); } ////Now we already have err in output layer, let's pass them back to other layers ////Pass error from i+1 to i layer var errLayer1 = forwardCellList[numLayers - 1]; var errLayer2 = backwardCellList[numLayers - 1]; for (var curState = 0; curState < numStates; curState++) { OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeLayerErr(errLayer1[curState].Errs); errLayer1[curState].Errs.CopyTo(errLayer2[curState].Errs, 0); } for (var i = numLayers - 2; i >= 0; i--) { var lastForwardLayer = forwardHiddenLayers[i + 1]; var errLayerF = forwardCellList[i]; var srcErrLayerF = forwardCellList[i + 1]; var lastBackwardLayer = backwardHiddenLayers[i + 1]; var errLayerB = backwardCellList[i]; var srcErrLayerB = backwardCellList[i + 1]; for (var curState = 0; curState < numStates; curState++) { var errLayerFCur = errLayerF[curState]; var errLayerBCur = errLayerB[curState]; lastForwardLayer.Errs = srcErrLayerF[curState].Errs; lastForwardLayer.ComputeLayerErr(errLayerFCur.Errs); lastBackwardLayer.Errs = srcErrLayerB[curState].Errs; lastBackwardLayer.ComputeLayerErr(errLayerBCur.Errs); } } }
protected virtual int[] TrainSequencePair(ISequence sequence, RunningMode runningMode, bool outputRawScore, out Matrix <float> m) { SequencePair pSequence = sequence as SequencePair; var tgtSequence = pSequence.tgtSequence; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(); } Sequence srcSequence; //Extract features from source sentences srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; var previousLables = new int[numStates]; m = outputRawScore ? new Matrix <float>(numStates, OutputLayer.LayerSize) : null; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList.Clear(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } CreateDenseFeatureList(); for (int i = 0; i < numLayers; i++) { srcHiddenAvgOutput.CopyTo(denseFeaturesList[i], 0); } srcHiddenAvgOutput.CopyTo(denseFeaturesList[numLayers], 0); var sparseVector = new SparseVector(); for (var curState = 0; curState < numStates; curState++) { //Build runtime features var state = tgtSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, (runningMode == RunningMode.Training) ? previousLables : predicted); //Build sparse features for all layers sparseVector.Clean(); sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(state.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer state.DenseFeature.CopyTo().CopyTo(denseFeaturesList[0], srcHiddenAvgOutput.Length); HiddenLayerList[0].ForwardPass(sparseVector, denseFeaturesList[0]); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer HiddenLayerList[i - 1].Cells.CopyTo(denseFeaturesList[i], srcHiddenAvgOutput.Length); HiddenLayerList[i].ForwardPass(sparseVector, denseFeaturesList[i]); } //Compute output layer HiddenLayerList[numLayers - 1].Cells.CopyTo(denseFeaturesList[numLayers], srcHiddenAvgOutput.Length); OutputLayer.ForwardPass(sparseVector, denseFeaturesList[numLayers]); if (m != null) { OutputLayer.Cells.CopyTo(m[curState], 0); } predicted[curState] = OutputLayer.GetBestOutputIndex(); if (runningMode == RunningMode.Training) { previousLables[curState] = state.Label; // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
public override int[] ProcessSequenceCRF(Sequence pSequence, RunningMode runningMode) { var numStates = pSequence.States.Length; var numLayers = HiddenLayerList.Count; //Get network output without CRF Matrix <float> nnOutput; ProcessSequence(pSequence, RunningMode.Test, true, out nnOutput); //Compute CRF result ForwardBackward(numStates, nnOutput); //Compute best path in CRF result var predicted = Viterbi(nnOutput, numStates); if (runningMode == RunningMode.Training) { //Update tag bigram transition for CRF model UpdateBigramTransition(pSequence); //Reset all layer states foreach (var layer in HiddenLayerList) { layer.Reset(); } for (var curState = 0; curState < numStates; curState++) { // error propogation var state = pSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, null); HiddenLayerList[0].SetRunningMode(runningMode); HiddenLayerList[0].ForwardPass(state.SparseFeature, state.DenseFeature.CopyTo()); for (var i = 1; i < numLayers; i++) { HiddenLayerList[i].SetRunningMode(runningMode); HiddenLayerList[i].ForwardPass(state.SparseFeature, HiddenLayerList[i - 1].Cells); } OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
public override int[] ProcessSequence(ISequence sequence, RunningMode runningMode, bool outputRawScore, out Matrix <float> m) { Sequence pSequence = sequence as Sequence; var numStates = pSequence.States.Length; var numLayers = HiddenLayerList.Count; m = outputRawScore ? new Matrix <float>(numStates, OutputLayer.LayerSize) : null; var predicted = new int[numStates]; var isTraining = runningMode == RunningMode.Training; //reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(); } //Set current sentence labels into short list in output layer OutputLayer.LabelShortList.Clear(); foreach (var state in pSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (var curState = 0; curState < numStates; curState++) { //Compute first layer var state = pSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); HiddenLayerList[0].ForwardPass(state.SparseFeature, state.DenseFeature.CopyTo()); //Compute each layer for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer HiddenLayerList[i].ForwardPass(state.SparseFeature, HiddenLayerList[i - 1].Cells); } //Compute output layer OutputLayer.ForwardPass(state.SparseFeature, HiddenLayerList[numLayers - 1].Cells); if (m != null) { OutputLayer.Cells.CopyTo(m[curState], 0); } predicted[curState] = OutputLayer.GetBestOutputIndex(); if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
public override int[] ProcessSequence(Sequence pSequence, RunningMode runningMode, bool outputRawScore, out Matrix <double> m) { int numStates = pSequence.States.Length; int numLayers = HiddenLayerList.Count; if (outputRawScore == true) { m = new Matrix <double>(numStates, OutputLayer.LayerSize); } else { m = null; } int[] predicted = new int[numStates]; bool isTraining = true; if (runningMode == RunningMode.Training) { isTraining = true; } else { isTraining = false; } //reset all layers foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(isTraining); } for (int curState = 0; curState < numStates; curState++) { //Compute first layer State state = pSequence.States[curState]; SetInputLayer(state, curState, numStates, predicted); HiddenLayerList[0].computeLayer(state.SparseData, state.DenseData.CopyTo(), isTraining); //Compute each layer for (int i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer HiddenLayerList[i].computeLayer(state.SparseData, HiddenLayerList[i - 1].cellOutput, isTraining); } //Compute output layer OutputLayer.CurrentLabelId = state.Label; OutputLayer.computeLayer(state.SparseData, HiddenLayerList[numLayers - 1].cellOutput, isTraining); if (m != null) { OutputLayer.cellOutput.CopyTo(m[curState], 0); } OutputLayer.Softmax(isTraining); predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining); if (runningMode != RunningMode.Test) { logp += Math.Log10(OutputLayer.cellOutput[state.Label] + 0.0001); } if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (int i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.LearnFeatureWeights(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].LearnFeatureWeights(numStates, curState); }); }); } } return(predicted); }
public override int[] ProcessSequenceCRF(Sequence pSequence, RunningMode runningMode) { int numStates = pSequence.States.Length; int numLayers = HiddenLayerList.Count; //Get network output without CRF Matrix <double> nnOutput; ProcessSequence(pSequence, RunningMode.Test, true, out nnOutput); //Compute CRF result ForwardBackward(numStates, nnOutput); if (runningMode != RunningMode.Test) { //Get the best result for (int i = 0; i < numStates; i++) { logp += Math.Log10(CRFSeqOutput[i][pSequence.States[i].Label] + 0.0001); } } //Compute best path in CRF result int[] predicted = Viterbi(nnOutput, numStates); if (runningMode == RunningMode.Training) { //Update tag bigram transition for CRF model UpdateBigramTransition(pSequence); //Reset all layer states foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(true); } for (int curState = 0; curState < numStates; curState++) { // error propogation State state = pSequence.States[curState]; SetInputLayer(state, curState, numStates, null); HiddenLayerList[0].computeLayer(state.SparseData, state.DenseData.CopyTo()); for (int i = 1; i < numLayers; i++) { HiddenLayerList[i].computeLayer(state.SparseData, HiddenLayerList[i - 1].cellOutput); } OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (int i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.LearnFeatureWeights(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].LearnFeatureWeights(numStates, curState); }); }); } } return(predicted); }
private int[] TrainSequencePair(ISequence sequence, RunningMode runningMode, bool outputRawScore, out Matrix <float> m) { SequencePair pSequence = sequence as SequencePair; var tgtSequence = pSequence.tgtSequence; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(); } Sequence srcSequence; //Extract features from source sentences srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); List <float[]> srcDenseFeatureGorups = new List <float[]>(); SparseVector srcSparseFeatures = new SparseVector(); ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, srcDenseFeatureGorups, srcSparseFeatures); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; m = outputRawScore ? new Matrix <float>(numStates, OutputLayer.LayerSize) : null; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList.Clear(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } //Set sparse feature group from source sequence sparseFeatureGorups.Clear(); sparseFeatureGorups.Add(srcSparseFeatures); sparseFeatureGorups.Add(null); int targetSparseFeatureIndex = sparseFeatureGorups.Count - 1; //Set dense feature groups from source sequence for (var i = 0; i < numLayers; i++) { denseFeatureGroupsList[i].Clear(); denseFeatureGroupsList[i].AddRange(srcDenseFeatureGorups); denseFeatureGroupsList[i].Add(null); } denseFeatureGroupsOutputLayer.Clear(); denseFeatureGroupsOutputLayer.AddRange(srcDenseFeatureGorups); denseFeatureGroupsOutputLayer.Add(null); int targetDenseFeatureIndex = denseFeatureGroupsOutputLayer.Count - 1; for (var curState = 0; curState < numStates; curState++) { var state = tgtSequence.States[curState]; //Set sparse feature groups sparseFeatureGorups[targetSparseFeatureIndex] = state.SparseFeature; //Compute first layer denseFeatureGroupsList[0][targetDenseFeatureIndex] = state.DenseFeature.CopyTo(); HiddenLayerList[0].ForwardPass(sparseFeatureGorups, denseFeatureGroupsList[0]); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatureGroupsList[i][targetDenseFeatureIndex] = HiddenLayerList[i - 1].Cells; HiddenLayerList[i].ForwardPass(sparseFeatureGorups, denseFeatureGroupsList[i]); } //Compute output layer denseFeatureGroupsOutputLayer[targetDenseFeatureIndex] = HiddenLayerList[numLayers - 1].Cells; OutputLayer.ForwardPass(sparseFeatureGorups, denseFeatureGroupsOutputLayer); if (m != null) { OutputLayer.Cells.CopyTo(m[curState], 0); } predicted[curState] = OutputLayer.GetBestOutputIndex(); if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode) { var tgtSequence = pSequence.tgtSequence; var isTraining = runningMode == RunningMode.Training; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(isTraining); } //Extract features from source sentences var srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); float[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, out srcHiddenAvgOutput, out srcSparseFeatures); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList = new List <int>(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (var curState = 0; curState < numStates; curState++) { //Build runtime features var state = tgtSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); //Build sparse features for all layers var sparseVector = new SparseVector(); sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(state.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer var denseFeatures = RNNHelper.ConcatenateVector(state.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, isTraining); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput); HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, isTraining); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell, srcHiddenAvgOutput); OutputLayer.ForwardPass(sparseVector, denseFeatures, isTraining); OutputLayer.Softmax(isTraining); predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining); if (runningMode != RunningMode.Test) { logp += Math.Log10(OutputLayer.Cell[state.Label] + 0.0001); } if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (var i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.BackwardPass(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].BackwardPass(numStates, curState); }); }); } } return(predicted); }
/// <summary> /// Pass error from the last layer to the first layer /// </summary> /// <param name="pSequence"></param> /// <param name="seqFinalOutput"></param> /// <returns></returns> protected virtual void ComputeDeepErr(Sequence pSequence) { var numStates = pSequence.States.Length; var numLayers = forwardHiddenLayers.Count; //Calculate output layer error for (var curState = 0; curState < numStates; curState++) { OutputLayer.Cells = OutputCells[curState].Cells; OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeOutputLoss(CRFSeqOutput, pSequence.States[curState], curState); } ////Now we already have err in output layer, let's pass them back to other layers ////Pass error from i+1 to i layer var errLayer1 = forwardCellList[numLayers - 1]; var errLayer2 = backwardCellList[numLayers - 1]; for (var curState = 0; curState < numStates; curState++) { List <float[]> destErrsList = new List <float[]>(); destErrsList.Add(errLayer1[curState].Errs); destErrsList.Add(errLayer2[curState].Errs); OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeLayerErr(destErrsList); } Vector <float> vecTwo = new Vector <float>(2.0f); for (var i = numLayers - 2; i >= 0; i--) { var lastForwardLayer = forwardHiddenLayers[i + 1]; var errLayerF = forwardCellList[i]; var srcErrLayerF = forwardCellList[i + 1]; var lastBackwardLayer = backwardHiddenLayers[i + 1]; var errLayerB = backwardCellList[i]; var srcErrLayerB = backwardCellList[i + 1]; for (var curState = 0; curState < numStates; curState++) { var errLayerFCur = errLayerF[curState]; var errLayerBCur = errLayerB[curState]; List <float[]> destErrList = new List <float[]>(); destErrList.Add(errLayerFCur.Errs); destErrList.Add(errLayerBCur.Errs); lastForwardLayer.Errs = srcErrLayerF[curState].Errs; lastForwardLayer.ComputeLayerErr(destErrList); lastBackwardLayer.Errs = srcErrLayerB[curState].Errs; lastBackwardLayer.ComputeLayerErr(destErrList, false); int j = 0; int errLength = errLayerFCur.Errs.Length; var moreItems = (errLength % Vector <float> .Count); while (j < errLength - moreItems) { Vector <float> vecErrLayerF = new Vector <float>(errLayerFCur.Errs, j); Vector <float> vecErrLayerB = new Vector <float>(errLayerBCur.Errs, j); vecErrLayerF /= vecTwo; vecErrLayerB /= vecTwo; vecErrLayerF.CopyTo(errLayerFCur.Errs, j); vecErrLayerB.CopyTo(errLayerBCur.Errs, j); j += Vector <float> .Count; } while (j < errLength) { errLayerFCur.Errs[j] /= 2.0f; errLayerBCur.Errs[j] /= 2.0f; j++; } } } }