Esempio n. 1
0
        // forward process. output layer consists of tag value
        public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
        {
            //keep last hidden layer and erase activations
            cellOutput.CopyTo(previousCellOutput, 0);

            //Apply previous feature to current time
            //hidden(t-1) -> hidden(t)
            RNNHelper.matrixXvectorADD(cellOutput, previousCellOutput, BpttWeights, LayerSize, LayerSize);

            //Apply features on hidden layer
            SparseFeature = sparseFeature;
            DenseFeature = denseFeature;

            if (SparseFeatureSize > 0)
            {
                //Apply sparse features
                Parallel.For(0, LayerSize, parallelOption, b =>
                {
                    double score = 0;
                    if (SparseFeatureSize > 0)
                    {
                        double[] vector_b = SparseWeights[b];
                        for (int i = 0; i < SparseFeature.Count; i++)
                        {
                            var entry = SparseFeature.GetEntry(i);
                            score += entry.Value * vector_b[entry.Key];
                        }
                    }
                    cellOutput[b] += score;
                });
            }

            if (DenseFeatureSize > 0)
            {
                //Apply dense features
                RNNHelper.matrixXvectorADD(cellOutput, DenseFeature, DenseWeights, LayerSize, DenseFeatureSize, false);
            }

            //activate layer
            activityLayer(isTrain);
        }
Esempio n. 2
0
        public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature)
        {
            if (runningMode == RunningMode.Training)
            {
                negativeSampleWordList.Clear();

                foreach (var labelId in LabelShortList)
                {
                    negativeSampleWordList.Add(labelId);
                }

                for (var i = 0; i < NegativeSampleSize; i++)
                {
                    var wordId = rand.Next() % LayerSize;
                    while (negativeSampleWordList.Contains(wordId))
                    {
                        wordId = (wordId + 1) % LayerSize;
                    }
                    negativeSampleWordList.Add(wordId);
                }

                if (DenseFeatureSize > 0)
                {
                    DenseFeature = denseFeature;
                    RNNHelper.matrixXvectorADD(Cells, denseFeature, DenseWeights, negativeSampleWordList, DenseFeatureSize);
                }

                if (SparseFeatureSize > 0)
                {
                    //Apply sparse features
                    SparseFeature = sparseFeature;

                    foreach (var b in negativeSampleWordList)
                    {
                        float score    = 0;
                        var   vector_b = SparseWeights[b];
                        foreach (var pair in SparseFeature)
                        {
                            score += pair.Value * vector_b[pair.Key];
                        }
                        Cells[b] += score;
                    }
                }

                //Softmax
                double sum = 0;
                foreach (var c in negativeSampleWordList)
                {
                    var cell = Cells[c];
                    if (cell > 50)
                    {
                        cell = 50;
                    }
                    if (cell < -50)
                    {
                        cell = -50;
                    }
                    var val = (float)Math.Exp(cell);
                    sum     += val;
                    Cells[c] = val;
                }

                foreach (var c in negativeSampleWordList)
                {
                    Cells[c] /= (float)sum;
                }
            }
            else
            {
                base.ForwardPass(sparseFeature, denseFeature);
            }
        }
Esempio n. 3
0
        void ExtractSparseFeature(int currentState, int numStates, List <string[]> features, State pState)
        {
            Dictionary <int, float> sparseFeature = new Dictionary <int, float>();
            int start = 0;
            var fc    = FeatureContext;

            //Extract TFeatures in given context window
            if (TFeaturizer != null)
            {
                if (fc.ContainsKey(TFEATURE_CONTEXT) == true)
                {
                    List <int> v = fc[TFEATURE_CONTEXT];
                    for (int j = 0; j < v.Count; j++)
                    {
                        int offset = TruncPosition(currentState + v[j], 0, numStates);

                        List <int> tfeatureList = TFeaturizer.GetFeatureIds(features, offset);
                        foreach (int featureId in tfeatureList)
                        {
                            if (TFeatureWeightType == TFEATURE_WEIGHT_TYPE_ENUM.BINARY)
                            {
                                sparseFeature[start + featureId] = 1;
                            }
                            else
                            {
                                if (sparseFeature.ContainsKey(start + featureId) == false)
                                {
                                    sparseFeature.Add(start + featureId, 1);
                                }
                                else
                                {
                                    sparseFeature[start + featureId]++;
                                }
                            }
                        }
                        start += TFeaturizer.GetFeatureSize();
                    }
                }
            }

            // Create place hold for run time feature
            // The real feature value is calculated at run time
            if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true)
            {
                List <int> v = fc[RT_FEATURE_CONTEXT];
                pState.RuntimeFeatures = new PriviousLabelFeature[v.Count];
                for (int j = 0; j < v.Count; j++)
                {
                    if (v[j] < 0)
                    {
                        pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start);
                        sparseFeature[start] = 0; //Placehold a position
                        start += TagSet.GetSize();
                    }
                    else
                    {
                        throw new Exception("The offset of run time feature should be negative.");
                    }
                }
            }

            SparseVector spSparseFeature = pState.SparseFeature;

            spSparseFeature.SetLength(SparseFeatureSize);
            spSparseFeature.AddKeyValuePairData(sparseFeature);
        }
Esempio n. 4
0
 public virtual void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
 {
     DenseFeature = denseFeature;
     RNNHelper.matrixXvectorADD(cellOutput, denseFeature, DenseWeights, LayerSize, DenseFeatureSize);
 }
Esempio n. 5
0
        // forward process. output layer consists of tag value
        public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature)
        {
            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            SparseFeature = sparseFeature;
            DenseFeature  = denseFeature;

            for (var j = 0; j < LayerSize; j++)
            {
                var cell_j       = LSTMCells[j];
                var cellWeight_j = CellWeights[j];

                //hidden(t-1) -> hidden(t)
                cell_j.previousCellState  = cell_j.cellState;
                cell_j.previousCellOutput = Cells[j];

                var vecCell_j = Vector4.Zero;

                if (SparseFeatureSize > 0)
                {
                    //Apply sparse weights
                    var weights = sparseFeatureWeights[j];
                    var deri    = sparseFeatureToHiddenDeri[j];
                    foreach (var pair in SparseFeature)
                    {
                        vecCell_j += weights[pair.Key] * pair.Value;
                        if (deri.ContainsKey(pair.Key) == false)
                        {
                            deri.Add(pair.Key, new Vector3(0));
                        }
                    }
                }

                if (DenseFeatureSize > 0)
                {
                    //Apply dense weights
                    var     k = 0;
                    float[] denseInputGateWeight_j  = wDenseInputGate.weights[j];
                    float[] denseForgetGateWeight_j = wDenseForgetGate.weights[j];
                    float[] denseCellGateWeight_j   = wDenseCellGate.weights[j];
                    float[] denseOutputGateWeight_j = wDenseOutputGate.weights[j];

                    var moreItems = (DenseFeatureSize % Vector <float> .Count);
                    while (k < DenseFeatureSize - moreItems)
                    {
                        var vX       = new Vector <float>(denseInputGateWeight_j, k);
                        var vY       = new Vector <float>(denseForgetGateWeight_j, k);
                        var vZ       = new Vector <float>(denseCellGateWeight_j, k);
                        var vW       = new Vector <float>(denseOutputGateWeight_j, k);
                        var vFeature = new Vector <float>(DenseFeature, k);

                        vecCell_j.X += Vector.Dot(vX, vFeature);
                        vecCell_j.Y += Vector.Dot(vY, vFeature);
                        vecCell_j.Z += Vector.Dot(vZ, vFeature);
                        vecCell_j.W += Vector.Dot(vW, vFeature);

                        k += Vector <float> .Count;
                    }

                    while (k < DenseFeatureSize)
                    {
                        vecCell_j.X += denseInputGateWeight_j[k] * DenseFeature[k];
                        vecCell_j.Y += denseForgetGateWeight_j[k] * DenseFeature[k];
                        vecCell_j.Z += denseCellGateWeight_j[k] * DenseFeature[k];
                        vecCell_j.W += denseOutputGateWeight_j[k] * DenseFeature[k];
                        k++;
                    }
                }

                //rest the value of the net input to zero
                cell_j.netIn     = vecCell_j.X;
                cell_j.netForget = vecCell_j.Y;
                //reset each netCell state to zero
                cell_j.netCellState = vecCell_j.Z;
                //reset each netOut to zero
                cell_j.netOut = vecCell_j.W;

                //include internal connection multiplied by the previous cell state
                cell_j.netIn += cell_j.previousCellState * cellWeight_j.wPeepholeIn + cell_j.previousCellOutput * cellWeight_j.wCellIn;
                //squash input
                cell_j.yIn = Sigmoid(cell_j.netIn);

                //include internal connection multiplied by the previous cell state
                cell_j.netForget += cell_j.previousCellState * cellWeight_j.wPeepholeForget +
                                    cell_j.previousCellOutput * cellWeight_j.wCellForget;
                cell_j.yForget = Sigmoid(cell_j.netForget);

                cell_j.netCellState += cell_j.previousCellOutput * cellWeight_j.wCellState;
                cell_j.yCellState    = TanH(cell_j.netCellState);

                //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
                cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState;

                ////include the internal connection multiplied by the CURRENT cell state
                cell_j.netOut += cell_j.cellState * cellWeight_j.wPeepholeOut + cell_j.previousCellOutput * cellWeight_j.wCellOut;

                //squash output gate
                cell_j.yOut = Sigmoid(cell_j.netOut);

                Cells[j] = (float)(TanH(cell_j.cellState) * cell_j.yOut);

                LSTMCells[j] = cell_j;
            }
        }
Esempio n. 6
0
        // forward process. output layer consists of tag value
        public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
        {
            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            SparseFeature = sparseFeature;
            DenseFeature  = denseFeature;

            Parallel.For(0, LayerSize, parallelOption, j =>
            {
                LSTMCell cell_j = cell[j];

                //hidden(t-1) -> hidden(t)
                cell_j.previousCellState = cell_j.cellState;
                previousCellOutput[j]    = cellOutput[j];

                Vector4 vecCell_j = Vector4.Zero;

                if (SparseFeatureSize > 0)
                {
                    //Apply sparse weights
                    Vector4[] weights = input2hidden[j];
                    for (int i = 0; i < SparseFeature.Count; i++)
                    {
                        var entry  = SparseFeature.GetEntry(i);
                        vecCell_j += weights[entry.Key] * entry.Value;
                    }
                }

                //Apply dense weights
                if (DenseFeatureSize > 0)
                {
                    Vector4[] weights = feature2hidden[j];
                    for (int i = 0; i < DenseFeatureSize; i++)
                    {
                        vecCell_j += weights[i] * (float)DenseFeature[i];
                    }
                }

                //rest the value of the net input to zero
                cell_j.netIn     = vecCell_j.X;
                cell_j.netForget = vecCell_j.Y;
                //reset each netCell state to zero
                cell_j.netCellState = vecCell_j.Z;
                //reset each netOut to zero
                cell_j.netOut = vecCell_j.W;

                double cell_j_previousCellOutput = previousCellOutput[j];

                //include internal connection multiplied by the previous cell state
                cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j_previousCellOutput * cell_j.wCellIn;
                //squash input
                cell_j.yIn = Sigmoid(cell_j.netIn);

                //include internal connection multiplied by the previous cell state
                cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j_previousCellOutput * cell_j.wCellForget;
                cell_j.yForget    = Sigmoid(cell_j.netForget);

                cell_j.netCellState += cell_j_previousCellOutput * cell_j.wCellState;
                cell_j.yCellState    = TanH(cell_j.netCellState);

                //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
                cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState;

                ////include the internal connection multiplied by the CURRENT cell state
                cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j_previousCellOutput * cell_j.wCellOut;

                //squash output gate
                cell_j.yOut = Sigmoid(cell_j.netOut);

                cellOutput[j] = TanH(cell_j.cellState) * cell_j.yOut;

                cell[j] = cell_j;
            });
        }
Esempio n. 7
0
        // forward process. output layer consists of tag value
        public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
        {
            //inputs(t) -> hidden(t)
            //Get sparse feature and apply it into hidden layer
            SparseFeature = sparseFeature;
            DenseFeature = denseFeature;

            Parallel.For(0, LayerSize, parallelOption, j =>
            {
                LSTMCell cell_j = cell[j];

                //hidden(t-1) -> hidden(t)
                cell_j.previousCellState = cell_j.cellState;
                previousCellOutput[j] = cellOutput[j];

                Vector4 vecCell_j = Vector4.Zero;

                if (SparseFeatureSize > 0)
                {
                    //Apply sparse weights
                    Vector4[] weights = input2hidden[j];
                    for (int i = 0; i < SparseFeature.Count; i++)
                    {
                        var entry = SparseFeature.GetEntry(i);
                        vecCell_j += weights[entry.Key] * entry.Value;
                    }
                }

                //Apply dense weights
                if (DenseFeatureSize > 0)
                {
                    Vector4[] weights = feature2hidden[j];
                    for (int i = 0; i < DenseFeatureSize; i++)
                    {
                        vecCell_j += weights[i] * (float)DenseFeature[i];
                    }
                }

                //rest the value of the net input to zero
                cell_j.netIn = vecCell_j.X;
                cell_j.netForget = vecCell_j.Y;
                //reset each netCell state to zero
                cell_j.netCellState = vecCell_j.Z;
                //reset each netOut to zero
                cell_j.netOut = vecCell_j.W;

                double cell_j_previousCellOutput = previousCellOutput[j];

                //include internal connection multiplied by the previous cell state
                cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j_previousCellOutput * cell_j.wCellIn;
                //squash input
                cell_j.yIn = Sigmoid(cell_j.netIn);

                //include internal connection multiplied by the previous cell state
                cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j_previousCellOutput * cell_j.wCellForget;
                cell_j.yForget = Sigmoid(cell_j.netForget);

                cell_j.netCellState += cell_j_previousCellOutput * cell_j.wCellState;
                cell_j.yCellState = TanH(cell_j.netCellState);

                if (mask[j] == true)
                {
                    cell_j.cellState = 0;
                }
                else
                {
                    //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate
                    cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState;
                }

                if (isTrain == false)
                {
                    cell_j.cellState = cell_j.cellState * (1.0 - Dropout);
                }

                ////include the internal connection multiplied by the CURRENT cell state
                cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j_previousCellOutput * cell_j.wCellOut;

                //squash output gate
                cell_j.yOut = Sigmoid(cell_j.netOut);

                cellOutput[j] = TanH(cell_j.cellState) * cell_j.yOut;

                cell[j] = cell_j;
            });
        }
Esempio n. 8
0
        public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode)
        {
            Sequence tgtSequence = pSequence.tgtSequence;
            bool     isTraining  = true;

            if (runningMode == RunningMode.Training)
            {
                isTraining = true;
            }
            else
            {
                isTraining = false;
            }

            //Reset all layers
            foreach (SimpleLayer layer in HiddenLayerList)
            {
                layer.netReset(isTraining);
            }

            //Extract features from source sentences
            Sequence srcSequence = pSequence.autoEncoder.Featurizer.ExtractFeatures(pSequence.srcSentence);

            double[] srcHiddenAvgOutput;
            Dictionary <int, float> srcSparseFeatures;

            ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, out srcHiddenAvgOutput, out srcSparseFeatures);

            int numStates = pSequence.tgtSequence.States.Length;
            int numLayers = HiddenLayerList.Count;

            int[] predicted = new int[numStates];

            //Set target sentence labels into short list in output layer
            OutputLayer.LabelShortList = new List <int>();
            foreach (State state in tgtSequence.States)
            {
                OutputLayer.LabelShortList.Add(state.Label);
            }

            for (int curState = 0; curState < numStates; curState++)
            {
                //Build runtime features
                State state = tgtSequence.States[curState];
                SetRuntimeFeatures(state, curState, numStates, predicted);

                //Build sparse features for all layers
                SparseVector sparseVector = new SparseVector();
                sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize);
                sparseVector.AddKeyValuePairData(state.SparseFeature);
                sparseVector.AddKeyValuePairData(srcSparseFeatures);

                //Compute first layer
                double[] denseFeatures = RNNHelper.ConcatenateVector(state.DenseFeature, srcHiddenAvgOutput);
                HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, isTraining);

                //Compute middle layers
                for (int i = 1; i < numLayers; i++)
                {
                    //We use previous layer's output as dense feature for current layer
                    denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput);
                    HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, isTraining);
                }

                //Compute output layer
                denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput);
                OutputLayer.computeLayer(sparseVector, denseFeatures, isTraining);

                OutputLayer.Softmax(isTraining);

                predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining);

                if (runningMode != RunningMode.Test)
                {
                    logp += Math.Log10(OutputLayer.cellOutput[state.Label] + 0.0001);
                }

                if (runningMode == RunningMode.Training)
                {
                    // error propogation
                    OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState);

                    //propogate errors to each layer from output layer to input layer
                    HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer);
                    for (int i = numLayers - 2; i >= 0; i--)
                    {
                        HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]);
                    }

                    //Update net weights
                    Parallel.Invoke(() =>
                    {
                        OutputLayer.LearnFeatureWeights(numStates, curState);
                    },
                                    () =>
                    {
                        Parallel.For(0, numLayers, parallelOption, i =>
                        {
                            HiddenLayerList[i].LearnFeatureWeights(numStates, curState);
                        });
                    });
                }
            }

            return(predicted);
        }
Esempio n. 9
0
        public override int[] TestSeq2Seq(Sentence srcSentence, Featurizer featurizer)
        {
            State curState = featurizer.ExtractFeatures(new string[] { "<s>" });

            curState.Label = featurizer.TagSet.GetIndex("<s>");

            //Reset all layers
            foreach (SimpleLayer layer in HiddenLayerList)
            {
                layer.netReset(false);
            }

            //Extract features from source sentence
            Sequence srcSequence = featurizer.AutoEncoder.Featurizer.ExtractFeatures(srcSentence);

            double[] srcHiddenAvgOutput;
            Dictionary <int, float> srcSparseFeatures;

            ExtractSourceSentenceFeature(featurizer.AutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures);

            int        numLayers = HiddenLayerList.Count;
            List <int> predicted = new List <int>();

            predicted.Add(curState.Label);
            while (true)
            {
                //Build sparse features
                SparseVector sparseVector = new SparseVector();
                sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize);
                sparseVector.AddKeyValuePairData(curState.SparseFeature);
                sparseVector.AddKeyValuePairData(srcSparseFeatures);

                //Compute first layer
                double[] denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput);
                HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, false);

                //Compute middle layers
                for (int i = 1; i < numLayers; i++)
                {
                    //We use previous layer's output as dense feature for current layer
                    denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput);
                    HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, false);
                }

                //Compute output layer
                denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput);
                OutputLayer.computeLayer(sparseVector, denseFeatures, false);

                OutputLayer.Softmax(false);

                int    nextTagId = OutputLayer.GetBestOutputIndex(false);
                string nextWord  = featurizer.TagSet.GetTagName(nextTagId);

                curState       = featurizer.ExtractFeatures(new string[] { nextWord });
                curState.Label = nextTagId;

                predicted.Add(nextTagId);

                if (nextWord == "</s>" || predicted.Count >= 100)
                {
                    break;
                }
            }

            return(predicted.ToArray());
        }
Esempio n. 10
0
 public virtual void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true)
 {
     DenseFeature = denseFeature;
     RNNHelper.matrixXvectorADD(cellOutput, denseFeature, DenseWeights, LayerSize, DenseFeatureSize);
 }
Esempio n. 11
0
        public override int[] TestSeq2Seq(Sentence srcSentence, Config featurizer)
        {
            var curState = featurizer.BuildState(new[] { "<s>" });

            curState.Label = featurizer.TagSet.GetIndex("<s>");

            //Reset all layers
            foreach (var layer in HiddenLayerList)
            {
                layer.Reset(false);
            }

            //Extract features from source sentence
            var srcSequence = featurizer.Seq2SeqAutoEncoder.Config.BuildSequence(srcSentence);

            float[] srcHiddenAvgOutput;
            Dictionary <int, float> srcSparseFeatures;

            ExtractSourceSentenceFeature(featurizer.Seq2SeqAutoEncoder, srcSequence, curState.SparseFeature.Length,
                                         out srcHiddenAvgOutput, out srcSparseFeatures);

            var numLayers = HiddenLayerList.Count;
            var predicted = new List <int> {
                curState.Label
            };

            while (true)
            {
                //Build sparse features
                var sparseVector = new SparseVector();
                sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize);
                sparseVector.AddKeyValuePairData(curState.SparseFeature);
                sparseVector.AddKeyValuePairData(srcSparseFeatures);

                //Compute first layer
                var denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput);
                HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, false);

                //Compute middle layers
                for (var i = 1; i < numLayers; i++)
                {
                    //We use previous layer's output as dense feature for current layer
                    denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput);
                    HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, false);
                }

                //Compute output layer
                denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell,
                                                            srcHiddenAvgOutput);
                OutputLayer.ForwardPass(sparseVector, denseFeatures, false);

                OutputLayer.Softmax(false);

                var nextTagId = OutputLayer.GetBestOutputIndex(false);
                var nextWord  = featurizer.TagSet.GetTagName(nextTagId);

                curState       = featurizer.BuildState(new[] { nextWord });
                curState.Label = nextTagId;

                predicted.Add(nextTagId);

                if (nextWord == "</s>" || predicted.Count >= 100)
                {
                    break;
                }
            }

            return(predicted.ToArray());
        }