// forward process. output layer consists of tag value public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { //keep last hidden layer and erase activations cellOutput.CopyTo(previousCellOutput, 0); //Apply previous feature to current time //hidden(t-1) -> hidden(t) RNNHelper.matrixXvectorADD(cellOutput, previousCellOutput, BpttWeights, LayerSize, LayerSize); //Apply features on hidden layer SparseFeature = sparseFeature; DenseFeature = denseFeature; if (SparseFeatureSize > 0) { //Apply sparse features Parallel.For(0, LayerSize, parallelOption, b => { double score = 0; if (SparseFeatureSize > 0) { double[] vector_b = SparseWeights[b]; for (int i = 0; i < SparseFeature.Count; i++) { var entry = SparseFeature.GetEntry(i); score += entry.Value * vector_b[entry.Key]; } } cellOutput[b] += score; }); } if (DenseFeatureSize > 0) { //Apply dense features RNNHelper.matrixXvectorADD(cellOutput, DenseFeature, DenseWeights, LayerSize, DenseFeatureSize, false); } //activate layer activityLayer(isTrain); }
public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature) { if (runningMode == RunningMode.Training) { negativeSampleWordList.Clear(); foreach (var labelId in LabelShortList) { negativeSampleWordList.Add(labelId); } for (var i = 0; i < NegativeSampleSize; i++) { var wordId = rand.Next() % LayerSize; while (negativeSampleWordList.Contains(wordId)) { wordId = (wordId + 1) % LayerSize; } negativeSampleWordList.Add(wordId); } if (DenseFeatureSize > 0) { DenseFeature = denseFeature; RNNHelper.matrixXvectorADD(Cells, denseFeature, DenseWeights, negativeSampleWordList, DenseFeatureSize); } if (SparseFeatureSize > 0) { //Apply sparse features SparseFeature = sparseFeature; foreach (var b in negativeSampleWordList) { float score = 0; var vector_b = SparseWeights[b]; foreach (var pair in SparseFeature) { score += pair.Value * vector_b[pair.Key]; } Cells[b] += score; } } //Softmax double sum = 0; foreach (var c in negativeSampleWordList) { var cell = Cells[c]; if (cell > 50) { cell = 50; } if (cell < -50) { cell = -50; } var val = (float)Math.Exp(cell); sum += val; Cells[c] = val; } foreach (var c in negativeSampleWordList) { Cells[c] /= (float)sum; } } else { base.ForwardPass(sparseFeature, denseFeature); } }
void ExtractSparseFeature(int currentState, int numStates, List <string[]> features, State pState) { Dictionary <int, float> sparseFeature = new Dictionary <int, float>(); int start = 0; var fc = FeatureContext; //Extract TFeatures in given context window if (TFeaturizer != null) { if (fc.ContainsKey(TFEATURE_CONTEXT) == true) { List <int> v = fc[TFEATURE_CONTEXT]; for (int j = 0; j < v.Count; j++) { int offset = TruncPosition(currentState + v[j], 0, numStates); List <int> tfeatureList = TFeaturizer.GetFeatureIds(features, offset); foreach (int featureId in tfeatureList) { if (TFeatureWeightType == TFEATURE_WEIGHT_TYPE_ENUM.BINARY) { sparseFeature[start + featureId] = 1; } else { if (sparseFeature.ContainsKey(start + featureId) == false) { sparseFeature.Add(start + featureId, 1); } else { sparseFeature[start + featureId]++; } } } start += TFeaturizer.GetFeatureSize(); } } } // Create place hold for run time feature // The real feature value is calculated at run time if (fc.ContainsKey(RT_FEATURE_CONTEXT) == true) { List <int> v = fc[RT_FEATURE_CONTEXT]; pState.RuntimeFeatures = new PriviousLabelFeature[v.Count]; for (int j = 0; j < v.Count; j++) { if (v[j] < 0) { pState.AddRuntimeFeaturePlacehold(j, v[j], sparseFeature.Count, start); sparseFeature[start] = 0; //Placehold a position start += TagSet.GetSize(); } else { throw new Exception("The offset of run time feature should be negative."); } } } SparseVector spSparseFeature = pState.SparseFeature; spSparseFeature.SetLength(SparseFeatureSize); spSparseFeature.AddKeyValuePairData(sparseFeature); }
public virtual void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { DenseFeature = denseFeature; RNNHelper.matrixXvectorADD(cellOutput, denseFeature, DenseWeights, LayerSize, DenseFeatureSize); }
// forward process. output layer consists of tag value public override void ForwardPass(SparseVector sparseFeature, float[] denseFeature) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer SparseFeature = sparseFeature; DenseFeature = denseFeature; for (var j = 0; j < LayerSize; j++) { var cell_j = LSTMCells[j]; var cellWeight_j = CellWeights[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; cell_j.previousCellOutput = Cells[j]; var vecCell_j = Vector4.Zero; if (SparseFeatureSize > 0) { //Apply sparse weights var weights = sparseFeatureWeights[j]; var deri = sparseFeatureToHiddenDeri[j]; foreach (var pair in SparseFeature) { vecCell_j += weights[pair.Key] * pair.Value; if (deri.ContainsKey(pair.Key) == false) { deri.Add(pair.Key, new Vector3(0)); } } } if (DenseFeatureSize > 0) { //Apply dense weights var k = 0; float[] denseInputGateWeight_j = wDenseInputGate.weights[j]; float[] denseForgetGateWeight_j = wDenseForgetGate.weights[j]; float[] denseCellGateWeight_j = wDenseCellGate.weights[j]; float[] denseOutputGateWeight_j = wDenseOutputGate.weights[j]; var moreItems = (DenseFeatureSize % Vector <float> .Count); while (k < DenseFeatureSize - moreItems) { var vX = new Vector <float>(denseInputGateWeight_j, k); var vY = new Vector <float>(denseForgetGateWeight_j, k); var vZ = new Vector <float>(denseCellGateWeight_j, k); var vW = new Vector <float>(denseOutputGateWeight_j, k); var vFeature = new Vector <float>(DenseFeature, k); vecCell_j.X += Vector.Dot(vX, vFeature); vecCell_j.Y += Vector.Dot(vY, vFeature); vecCell_j.Z += Vector.Dot(vZ, vFeature); vecCell_j.W += Vector.Dot(vW, vFeature); k += Vector <float> .Count; } while (k < DenseFeatureSize) { vecCell_j.X += denseInputGateWeight_j[k] * DenseFeature[k]; vecCell_j.Y += denseForgetGateWeight_j[k] * DenseFeature[k]; vecCell_j.Z += denseCellGateWeight_j[k] * DenseFeature[k]; vecCell_j.W += denseOutputGateWeight_j[k] * DenseFeature[k]; k++; } } //rest the value of the net input to zero cell_j.netIn = vecCell_j.X; cell_j.netForget = vecCell_j.Y; //reset each netCell state to zero cell_j.netCellState = vecCell_j.Z; //reset each netOut to zero cell_j.netOut = vecCell_j.W; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cellWeight_j.wPeepholeIn + cell_j.previousCellOutput * cellWeight_j.wCellIn; //squash input cell_j.yIn = Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cellWeight_j.wPeepholeForget + cell_j.previousCellOutput * cellWeight_j.wCellForget; cell_j.yForget = Sigmoid(cell_j.netForget); cell_j.netCellState += cell_j.previousCellOutput * cellWeight_j.wCellState; cell_j.yCellState = TanH(cell_j.netCellState); //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState; ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cellWeight_j.wPeepholeOut + cell_j.previousCellOutput * cellWeight_j.wCellOut; //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); Cells[j] = (float)(TanH(cell_j.cellState) * cell_j.yOut); LSTMCells[j] = cell_j; } }
// forward process. output layer consists of tag value public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer SparseFeature = sparseFeature; DenseFeature = denseFeature; Parallel.For(0, LayerSize, parallelOption, j => { LSTMCell cell_j = cell[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; previousCellOutput[j] = cellOutput[j]; Vector4 vecCell_j = Vector4.Zero; if (SparseFeatureSize > 0) { //Apply sparse weights Vector4[] weights = input2hidden[j]; for (int i = 0; i < SparseFeature.Count; i++) { var entry = SparseFeature.GetEntry(i); vecCell_j += weights[entry.Key] * entry.Value; } } //Apply dense weights if (DenseFeatureSize > 0) { Vector4[] weights = feature2hidden[j]; for (int i = 0; i < DenseFeatureSize; i++) { vecCell_j += weights[i] * (float)DenseFeature[i]; } } //rest the value of the net input to zero cell_j.netIn = vecCell_j.X; cell_j.netForget = vecCell_j.Y; //reset each netCell state to zero cell_j.netCellState = vecCell_j.Z; //reset each netOut to zero cell_j.netOut = vecCell_j.W; double cell_j_previousCellOutput = previousCellOutput[j]; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j_previousCellOutput * cell_j.wCellIn; //squash input cell_j.yIn = Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j_previousCellOutput * cell_j.wCellForget; cell_j.yForget = Sigmoid(cell_j.netForget); cell_j.netCellState += cell_j_previousCellOutput * cell_j.wCellState; cell_j.yCellState = TanH(cell_j.netCellState); //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState; ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j_previousCellOutput * cell_j.wCellOut; //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); cellOutput[j] = TanH(cell_j.cellState) * cell_j.yOut; cell[j] = cell_j; }); }
// forward process. output layer consists of tag value public override void computeLayer(SparseVector sparseFeature, double[] denseFeature, bool isTrain = true) { //inputs(t) -> hidden(t) //Get sparse feature and apply it into hidden layer SparseFeature = sparseFeature; DenseFeature = denseFeature; Parallel.For(0, LayerSize, parallelOption, j => { LSTMCell cell_j = cell[j]; //hidden(t-1) -> hidden(t) cell_j.previousCellState = cell_j.cellState; previousCellOutput[j] = cellOutput[j]; Vector4 vecCell_j = Vector4.Zero; if (SparseFeatureSize > 0) { //Apply sparse weights Vector4[] weights = input2hidden[j]; for (int i = 0; i < SparseFeature.Count; i++) { var entry = SparseFeature.GetEntry(i); vecCell_j += weights[entry.Key] * entry.Value; } } //Apply dense weights if (DenseFeatureSize > 0) { Vector4[] weights = feature2hidden[j]; for (int i = 0; i < DenseFeatureSize; i++) { vecCell_j += weights[i] * (float)DenseFeature[i]; } } //rest the value of the net input to zero cell_j.netIn = vecCell_j.X; cell_j.netForget = vecCell_j.Y; //reset each netCell state to zero cell_j.netCellState = vecCell_j.Z; //reset each netOut to zero cell_j.netOut = vecCell_j.W; double cell_j_previousCellOutput = previousCellOutput[j]; //include internal connection multiplied by the previous cell state cell_j.netIn += cell_j.previousCellState * cell_j.wPeepholeIn + cell_j_previousCellOutput * cell_j.wCellIn; //squash input cell_j.yIn = Sigmoid(cell_j.netIn); //include internal connection multiplied by the previous cell state cell_j.netForget += cell_j.previousCellState * cell_j.wPeepholeForget + cell_j_previousCellOutput * cell_j.wCellForget; cell_j.yForget = Sigmoid(cell_j.netForget); cell_j.netCellState += cell_j_previousCellOutput * cell_j.wCellState; cell_j.yCellState = TanH(cell_j.netCellState); if (mask[j] == true) { cell_j.cellState = 0; } else { //cell state is equal to the previous cell state multipled by the forget gate and the cell inputs multiplied by the input gate cell_j.cellState = cell_j.yForget * cell_j.previousCellState + cell_j.yIn * cell_j.yCellState; } if (isTrain == false) { cell_j.cellState = cell_j.cellState * (1.0 - Dropout); } ////include the internal connection multiplied by the CURRENT cell state cell_j.netOut += cell_j.cellState * cell_j.wPeepholeOut + cell_j_previousCellOutput * cell_j.wCellOut; //squash output gate cell_j.yOut = Sigmoid(cell_j.netOut); cellOutput[j] = TanH(cell_j.cellState) * cell_j.yOut; cell[j] = cell_j; }); }
public override int[] ProcessSeq2Seq(SequencePair pSequence, RunningMode runningMode) { Sequence tgtSequence = pSequence.tgtSequence; bool isTraining = true; if (runningMode == RunningMode.Training) { isTraining = true; } else { isTraining = false; } //Reset all layers foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(isTraining); } //Extract features from source sentences Sequence srcSequence = pSequence.autoEncoder.Featurizer.ExtractFeatures(pSequence.srcSentence); double[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(pSequence.autoEncoder, srcSequence, tgtSequence.SparseFeatureSize, out srcHiddenAvgOutput, out srcSparseFeatures); int numStates = pSequence.tgtSequence.States.Length; int numLayers = HiddenLayerList.Count; int[] predicted = new int[numStates]; //Set target sentence labels into short list in output layer OutputLayer.LabelShortList = new List <int>(); foreach (State state in tgtSequence.States) { OutputLayer.LabelShortList.Add(state.Label); } for (int curState = 0; curState < numStates; curState++) { //Build runtime features State state = tgtSequence.States[curState]; SetRuntimeFeatures(state, curState, numStates, predicted); //Build sparse features for all layers SparseVector sparseVector = new SparseVector(); sparseVector.SetLength(tgtSequence.SparseFeatureSize + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(state.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer double[] denseFeatures = RNNHelper.ConcatenateVector(state.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, isTraining); //Compute middle layers for (int i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput); HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, isTraining); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput); OutputLayer.computeLayer(sparseVector, denseFeatures, isTraining); OutputLayer.Softmax(isTraining); predicted[curState] = OutputLayer.GetBestOutputIndex(isTraining); if (runningMode != RunningMode.Test) { logp += Math.Log10(OutputLayer.cellOutput[state.Label] + 0.0001); } if (runningMode == RunningMode.Training) { // error propogation OutputLayer.ComputeLayerErr(CRFSeqOutput, state, curState); //propogate errors to each layer from output layer to input layer HiddenLayerList[numLayers - 1].ComputeLayerErr(OutputLayer); for (int i = numLayers - 2; i >= 0; i--) { HiddenLayerList[i].ComputeLayerErr(HiddenLayerList[i + 1]); } //Update net weights Parallel.Invoke(() => { OutputLayer.LearnFeatureWeights(numStates, curState); }, () => { Parallel.For(0, numLayers, parallelOption, i => { HiddenLayerList[i].LearnFeatureWeights(numStates, curState); }); }); } } return(predicted); }
public override int[] TestSeq2Seq(Sentence srcSentence, Featurizer featurizer) { State curState = featurizer.ExtractFeatures(new string[] { "<s>" }); curState.Label = featurizer.TagSet.GetIndex("<s>"); //Reset all layers foreach (SimpleLayer layer in HiddenLayerList) { layer.netReset(false); } //Extract features from source sentence Sequence srcSequence = featurizer.AutoEncoder.Featurizer.ExtractFeatures(srcSentence); double[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(featurizer.AutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures); int numLayers = HiddenLayerList.Count; List <int> predicted = new List <int>(); predicted.Add(curState.Label); while (true) { //Build sparse features SparseVector sparseVector = new SparseVector(); sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(curState.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer double[] denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].computeLayer(sparseVector, denseFeatures, false); //Compute middle layers for (int i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].cellOutput, srcHiddenAvgOutput); HiddenLayerList[i].computeLayer(sparseVector, denseFeatures, false); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].cellOutput, srcHiddenAvgOutput); OutputLayer.computeLayer(sparseVector, denseFeatures, false); OutputLayer.Softmax(false); int nextTagId = OutputLayer.GetBestOutputIndex(false); string nextWord = featurizer.TagSet.GetTagName(nextTagId); curState = featurizer.ExtractFeatures(new string[] { nextWord }); curState.Label = nextTagId; predicted.Add(nextTagId); if (nextWord == "</s>" || predicted.Count >= 100) { break; } } return(predicted.ToArray()); }
public override int[] TestSeq2Seq(Sentence srcSentence, Config featurizer) { var curState = featurizer.BuildState(new[] { "<s>" }); curState.Label = featurizer.TagSet.GetIndex("<s>"); //Reset all layers foreach (var layer in HiddenLayerList) { layer.Reset(false); } //Extract features from source sentence var srcSequence = featurizer.Seq2SeqAutoEncoder.Config.BuildSequence(srcSentence); float[] srcHiddenAvgOutput; Dictionary <int, float> srcSparseFeatures; ExtractSourceSentenceFeature(featurizer.Seq2SeqAutoEncoder, srcSequence, curState.SparseFeature.Length, out srcHiddenAvgOutput, out srcSparseFeatures); var numLayers = HiddenLayerList.Count; var predicted = new List <int> { curState.Label }; while (true) { //Build sparse features var sparseVector = new SparseVector(); sparseVector.SetLength(curState.SparseFeature.Length + srcSequence.SparseFeatureSize); sparseVector.AddKeyValuePairData(curState.SparseFeature); sparseVector.AddKeyValuePairData(srcSparseFeatures); //Compute first layer var denseFeatures = RNNHelper.ConcatenateVector(curState.DenseFeature, srcHiddenAvgOutput); HiddenLayerList[0].ForwardPass(sparseVector, denseFeatures, false); //Compute middle layers for (var i = 1; i < numLayers; i++) { //We use previous layer's output as dense feature for current layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[i - 1].Cell, srcHiddenAvgOutput); HiddenLayerList[i].ForwardPass(sparseVector, denseFeatures, false); } //Compute output layer denseFeatures = RNNHelper.ConcatenateVector(HiddenLayerList[numLayers - 1].Cell, srcHiddenAvgOutput); OutputLayer.ForwardPass(sparseVector, denseFeatures, false); OutputLayer.Softmax(false); var nextTagId = OutputLayer.GetBestOutputIndex(false); var nextWord = featurizer.TagSet.GetTagName(nextTagId); curState = featurizer.BuildState(new[] { nextWord }); curState.Label = nextTagId; predicted.Add(nextTagId); if (nextWord == "</s>" || predicted.Count >= 100) { break; } } return(predicted.ToArray()); }