/// <summary> /// Pass error from the last layer to the first layer /// </summary> /// <param name="pSequence"></param> /// <param name="seqFinalOutput"></param> /// <returns></returns> protected override void ComputeDeepErr(Sequence pSequence) { var numStates = pSequence.States.Length; var numLayers = forwardHiddenLayers.Count; //Calculate output layer error for (var curState = 0; curState < numStates; curState++) { OutputLayer.Cells = OutputCells[curState].Cells; OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeOutputLoss(CRFSeqOutput, pSequence.States[curState], curState); } ////Now we already have err in output layer, let's pass them back to other layers ////Pass error from i+1 to i layer var errLayer1 = forwardCellList[numLayers - 1]; var errLayer2 = backwardCellList[numLayers - 1]; for (var curState = 0; curState < numStates; curState++) { OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeLayerErr(errLayer1[curState].Errs); errLayer1[curState].Errs.CopyTo(errLayer2[curState].Errs, 0); } for (var i = numLayers - 2; i >= 0; i--) { var lastForwardLayer = forwardHiddenLayers[i + 1]; var errLayerF = forwardCellList[i]; var srcErrLayerF = forwardCellList[i + 1]; var lastBackwardLayer = backwardHiddenLayers[i + 1]; var errLayerB = backwardCellList[i]; var srcErrLayerB = backwardCellList[i + 1]; for (var curState = 0; curState < numStates; curState++) { var errLayerFCur = errLayerF[curState]; var errLayerBCur = errLayerB[curState]; lastForwardLayer.Errs = srcErrLayerF[curState].Errs; lastForwardLayer.ComputeLayerErr(errLayerFCur.Errs); lastBackwardLayer.Errs = srcErrLayerB[curState].Errs; lastBackwardLayer.ComputeLayerErr(errLayerBCur.Errs); } } }
public override int[] ProcessSequenceCRF(Sequence pSequence, RunningMode runningMode) { var numStates = pSequence.States.Length; var numLayers = HiddenLayerList.Count; //Get network output without CRF Matrix <float> nnOutput; ProcessSequence(pSequence, RunningMode.Test, true, out nnOutput); //Compute CRF result ForwardBackward(numStates, nnOutput); //Compute best path in CRF result var predicted = Viterbi(nnOutput, numStates); if (runningMode == RunningMode.Training) { //Update tag bigram transition for CRF model UpdateBigramTransition(pSequence); //Reset all layer states foreach (var layer in HiddenLayerList) { layer.Reset(); } for (var curState = 0; curState < numStates; curState++) { // error propogation var state = pSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, null); HiddenLayerList[0].SetRunningMode(runningMode); HiddenLayerList[0].ForwardPass(state.SparseFeature, state.DenseFeature.CopyTo()); for (var i = 1; i < numLayers; i++) { HiddenLayerList[i].SetRunningMode(runningMode); HiddenLayerList[i].ForwardPass(state.SparseFeature, HiddenLayerList[i - 1].Cells); } OutputLayer.ComputeOutputLoss(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer OutputLayer.ComputeLayerErr(HiddenLayerList[numLayers - 1]); for (var i = numLayers - 1; i > 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i - 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
public override int[] ProcessSequence(ISequence sequence, RunningMode runningMode, bool outputRawScore, out Matrix <float> m) { Sequence pSequence = sequence as Sequence; var numStates = pSequence.States.Length; var numLayers = HiddenLayerList.Count; m = outputRawScore ? new Matrix <float>(numStates, OutputLayer.LayerSize) : null; var predicted = new int[numStates]; var isTraining = runningMode == RunningMode.Training; //reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(); } //Set current sentence labels into short list in output layer OutputLayer.LabelShortList.Clear(); foreach (var state in pSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (var curState = 0; curState < numStates; curState++) { //Compute first layer var state = pSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); HiddenLayerList[0].ForwardPass(state.SparseFeature, state.DenseFeature.CopyTo()); //Compute each layer for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer HiddenLayerList[i].ForwardPass(state.SparseFeature, HiddenLayerList[i - 1].Cells); } //Compute output layer OutputLayer.ForwardPass(state.SparseFeature, HiddenLayerList[numLayers - 1].Cells); if (m != null) { OutputLayer.Cells.CopyTo(m[curState], 0); } predicted[curState] = OutputLayer.GetBestOutputIndex(); if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeOutputLoss(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer OutputLayer.ComputeLayerErr(HiddenLayerList[numLayers - 1]); for (var i = numLayers - 1; i > 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i - 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }
/// <summary> /// Pass error from the last layer to the first layer /// </summary> /// <param name="pSequence"></param> /// <param name="seqFinalOutput"></param> /// <returns></returns> protected virtual void ComputeDeepErr(Sequence pSequence) { var numStates = pSequence.States.Length; var numLayers = forwardHiddenLayers.Count; //Calculate output layer error for (var curState = 0; curState < numStates; curState++) { OutputLayer.Cells = OutputCells[curState].Cells; OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeOutputLoss(CRFSeqOutput, pSequence.States[curState], curState); } ////Now we already have err in output layer, let's pass them back to other layers ////Pass error from i+1 to i layer var errLayer1 = forwardCellList[numLayers - 1]; var errLayer2 = backwardCellList[numLayers - 1]; for (var curState = 0; curState < numStates; curState++) { List <float[]> destErrsList = new List <float[]>(); destErrsList.Add(errLayer1[curState].Errs); destErrsList.Add(errLayer2[curState].Errs); OutputLayer.Errs = OutputCells[curState].Errs; OutputLayer.ComputeLayerErr(destErrsList); } Vector <float> vecTwo = new Vector <float>(2.0f); for (var i = numLayers - 2; i >= 0; i--) { var lastForwardLayer = forwardHiddenLayers[i + 1]; var errLayerF = forwardCellList[i]; var srcErrLayerF = forwardCellList[i + 1]; var lastBackwardLayer = backwardHiddenLayers[i + 1]; var errLayerB = backwardCellList[i]; var srcErrLayerB = backwardCellList[i + 1]; for (var curState = 0; curState < numStates; curState++) { var errLayerFCur = errLayerF[curState]; var errLayerBCur = errLayerB[curState]; List <float[]> destErrList = new List <float[]>(); destErrList.Add(errLayerFCur.Errs); destErrList.Add(errLayerBCur.Errs); lastForwardLayer.Errs = srcErrLayerF[curState].Errs; lastForwardLayer.ComputeLayerErr(destErrList); lastBackwardLayer.Errs = srcErrLayerB[curState].Errs; lastBackwardLayer.ComputeLayerErr(destErrList, false); int j = 0; int errLength = errLayerFCur.Errs.Length; var moreItems = (errLength % Vector <float> .Count); while (j < errLength - moreItems) { Vector <float> vecErrLayerF = new Vector <float>(errLayerFCur.Errs, j); Vector <float> vecErrLayerB = new Vector <float>(errLayerBCur.Errs, j); vecErrLayerF /= vecTwo; vecErrLayerB /= vecTwo; vecErrLayerF.CopyTo(errLayerFCur.Errs, j); vecErrLayerB.CopyTo(errLayerBCur.Errs, j); j += Vector <float> .Count; } while (j < errLength) { errLayerFCur.Errs[j] /= 2.0f; errLayerBCur.Errs[j] /= 2.0f; j++; } } } }
private int[] TrainSequencePair(ISequence sequence, RunningMode runningMode, bool outputRawScore, out Matrix <float> m) { SequencePair pSequence = sequence as SequencePair; var tgtSequence = pSequence.tgtSequence; //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(); } Sequence srcSequence; //Extract features from source sentences srcSequence = pSequence.autoEncoder.Config.BuildSequence(pSequence.srcSentence); List <float[]> srcDenseFeatureGorups = new List <float[]>(); SparseVector srcSparseFeatures = new SparseVector(); ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, srcDenseFeatureGorups, srcSparseFeatures); var numStates = pSequence.tgtSequence.States.Length; var numLayers = HiddenLayerList.Count; var predicted = new int[numStates]; m = outputRawScore ? new Matrix <float>(numStates, OutputLayer.LayerSize) : null; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList.Clear(); foreach (var state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } //Set sparse feature group from source sequence sparseFeatureGorups.Clear(); sparseFeatureGorups.Add(srcSparseFeatures); sparseFeatureGorups.Add(null); int targetSparseFeatureIndex = sparseFeatureGorups.Count - 1; //Set dense feature groups from source sequence for (var i = 0; i < numLayers; i++) { denseFeatureGroupsList[i].Clear(); denseFeatureGroupsList[i].AddRange(srcDenseFeatureGorups); denseFeatureGroupsList[i].Add(null); } denseFeatureGroupsOutputLayer.Clear(); denseFeatureGroupsOutputLayer.AddRange(srcDenseFeatureGorups); denseFeatureGroupsOutputLayer.Add(null); int targetDenseFeatureIndex = denseFeatureGroupsOutputLayer.Count - 1; for (var curState = 0; curState < numStates; curState++) { var state = tgtSequence.States[curState]; //Set sparse feature groups sparseFeatureGorups[targetSparseFeatureIndex] = state.SparseFeature; //Compute first layer denseFeatureGroupsList[0][targetDenseFeatureIndex] = state.DenseFeature.CopyTo(); HiddenLayerList[0].ForwardPass(sparseFeatureGorups, denseFeatureGroupsList[0]); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatureGroupsList[i][targetDenseFeatureIndex] = HiddenLayerList[i - 1].Cells; HiddenLayerList[i].ForwardPass(sparseFeatureGorups, denseFeatureGroupsList[i]); } //Compute output layer denseFeatureGroupsOutputLayer[targetDenseFeatureIndex] = HiddenLayerList[numLayers - 1].Cells; OutputLayer.ForwardPass(sparseFeatureGorups, denseFeatureGroupsOutputLayer); if (m != null) { OutputLayer.Cells.CopyTo(m[curState], 0); } predicted[curState] = OutputLayer.GetBestOutputIndex(); if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeOutputLoss(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer OutputLayer.ComputeLayerErr(HiddenLayerList[numLayers - 1]); for (var i = numLayers - 1; i > 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i - 1]); } //Update net weights OutputLayer.BackwardPass(); for (var i = 0; i < numLayers; i++) { HiddenLayerList[i].BackwardPass(); } } } return(predicted); }