/// <summary>Compute dot product between two vectors.</summary> public static double Dot(SimpleMatrix vector1, SimpleMatrix vector2) { if (vector1.NumRows() == 1) { // vector1: row vector, assume that vector2 is a row vector too return(vector1.Mult(vector2.Transpose()).Get(0)); } else { if (vector1.NumCols() == 1) { // vector1: col vector, assume that vector2 is also a column vector. return(vector1.Transpose().Mult(vector2).Get(0)); } else { throw new AssertionError("Error in neural.Utils.dot: vector1 is a matrix " + vector1.NumRows() + " x " + vector1.NumCols()); } } }
/// <summary> /// Returns a column vector where each entry is the nth bilinear /// product of the nth slices of the two tensors. /// </summary> public virtual SimpleMatrix BilinearProducts(SimpleMatrix @in) { if (@in.NumCols() != 1) { throw new AssertionError("Expected a column vector"); } if (@in.NumRows() != numCols) { throw new AssertionError("Number of rows in the input does not match number of columns in tensor"); } if (numRows != numCols) { throw new AssertionError("Can only perform this operation on a SimpleTensor with square slices"); } SimpleMatrix inT = @in.Transpose(); SimpleMatrix @out = new SimpleMatrix(numSlices, 1); for (int slice = 0; slice < numSlices; ++slice) { double result = inT.Mult(slices[slice]).Mult(@in).Get(0); @out.Set(slice, result); } return(@out); }
public virtual SimpleMatrix GetAntecedentEmbedding(SimpleMatrix mentionEmbedding) { return(antecedentMatrix.Mult(mentionEmbedding)); }
public virtual SimpleMatrix GetAnaphorEmbedding(SimpleMatrix mentionEmbedding) { return(anaphorMatrix.Mult(mentionEmbedding)); }
public virtual double GetPairwiseScore(SimpleMatrix antecedentEmbedding, SimpleMatrix anaphorEmbedding, SimpleMatrix pairFeatures) { SimpleMatrix firstLayerOutput = NeuralUtils.ElementwiseApplyReLU(antecedentEmbedding.Plus(anaphorEmbedding).Plus(pairFeaturesMatrix.Mult(pairFeatures)).Plus(pairwiseFirstLayerBias)); return(Score(firstLayerOutput, pairwiseModel)); }
/// <summary> /// This is the method to call for assigning labels and node vectors /// to the Tree. /// </summary> /// <remarks> /// This is the method to call for assigning labels and node vectors /// to the Tree. After calling this, each of the non-leaf nodes will /// have the node vector and the predictions of their classes /// assigned to that subtree's node. The annotations filled in are /// the RNNCoreAnnotations.NodeVector, Predictions, and /// PredictedClass. In general, PredictedClass will be the most /// useful annotation except when training. /// </remarks> public virtual void ForwardPropagateTree(Tree tree) { SimpleMatrix nodeVector; // initialized below or Exception thrown // = null; SimpleMatrix classification; // initialized below or Exception thrown // = null; if (tree.IsLeaf()) { // We do nothing for the leaves. The preterminals will // calculate the classification for this word/tag. In fact, the // recursion should not have gotten here (unless there are // degenerate trees of just one leaf) log.Info("SentimentCostAndGradient: warning: We reached leaves in forwardPropagate: " + tree); throw new AssertionError("We should not have reached leaves in forwardPropagate"); } else { if (tree.IsPreTerminal()) { classification = model.GetUnaryClassification(tree.Label().Value()); string word = tree.Children()[0].Label().Value(); SimpleMatrix wordVector = model.GetWordVector(word); nodeVector = NeuralUtils.ElementwiseApplyTanh(wordVector); } else { if (tree.Children().Length == 1) { log.Info("SentimentCostAndGradient: warning: Non-preterminal nodes of size 1: " + tree); throw new AssertionError("Non-preterminal nodes of size 1 should have already been collapsed"); } else { if (tree.Children().Length == 2) { ForwardPropagateTree(tree.Children()[0]); ForwardPropagateTree(tree.Children()[1]); string leftCategory = tree.Children()[0].Label().Value(); string rightCategory = tree.Children()[1].Label().Value(); SimpleMatrix W = model.GetBinaryTransform(leftCategory, rightCategory); classification = model.GetBinaryClassification(leftCategory, rightCategory); SimpleMatrix leftVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[0]); SimpleMatrix rightVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[1]); SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); if (model.op.useTensors) { SimpleTensor tensor = model.GetBinaryTensor(leftCategory, rightCategory); SimpleMatrix tensorIn = NeuralUtils.Concatenate(leftVector, rightVector); SimpleMatrix tensorOut = tensor.BilinearProducts(tensorIn); nodeVector = NeuralUtils.ElementwiseApplyTanh(W.Mult(childrenVector).Plus(tensorOut)); } else { nodeVector = NeuralUtils.ElementwiseApplyTanh(W.Mult(childrenVector)); } } else { log.Info("SentimentCostAndGradient: warning: Tree not correctly binarized: " + tree); throw new AssertionError("Tree not correctly binarized"); } } } } SimpleMatrix predictions = NeuralUtils.Softmax(classification.Mult(NeuralUtils.ConcatenateWithBias(nodeVector))); int index = GetPredictedClass(predictions); if (!(tree.Label() is CoreLabel)) { log.Info("SentimentCostAndGradient: warning: No CoreLabels in nodes: " + tree); throw new AssertionError("Expected CoreLabels in the nodes"); } CoreLabel label = (CoreLabel)tree.Label(); label.Set(typeof(RNNCoreAnnotations.Predictions), predictions); label.Set(typeof(RNNCoreAnnotations.PredictedClass), index); label.Set(typeof(RNNCoreAnnotations.NodeVector), nodeVector); }
private void BackpropDerivativesAndError(Tree tree, TwoDimensionalMap <string, string, SimpleMatrix> binaryTD, TwoDimensionalMap <string, string, SimpleMatrix> binaryCD, TwoDimensionalMap <string, string, SimpleTensor> binaryTensorTD, IDictionary <string, SimpleMatrix> unaryCD, IDictionary <string, SimpleMatrix> wordVectorD, SimpleMatrix deltaUp) { if (tree.IsLeaf()) { return; } SimpleMatrix currentVector = RNNCoreAnnotations.GetNodeVector(tree); string category = tree.Label().Value(); category = model.BasicCategory(category); // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class SimpleMatrix goldLabel = new SimpleMatrix(model.numClasses, 1); int goldClass = RNNCoreAnnotations.GetGoldClass(tree); if (goldClass >= 0) { goldLabel.Set(goldClass, 1.0); } double nodeWeight = model.op.trainOptions.GetClassWeight(goldClass); SimpleMatrix predictions = RNNCoreAnnotations.GetPredictions(tree); // If this is an unlabeled class, set deltaClass to 0. We could // make this more efficient by eliminating various of the below // calculations, but this would be the easiest way to handle the // unlabeled class SimpleMatrix deltaClass = goldClass >= 0 ? predictions.Minus(goldLabel).Scale(nodeWeight) : new SimpleMatrix(predictions.NumRows(), predictions.NumCols()); SimpleMatrix localCD = deltaClass.Mult(NeuralUtils.ConcatenateWithBias(currentVector).Transpose()); double error = -(NeuralUtils.ElementwiseApplyLog(predictions).ElementMult(goldLabel).ElementSum()); error = error * nodeWeight; RNNCoreAnnotations.SetPredictionError(tree, error); if (tree.IsPreTerminal()) { // below us is a word vector unaryCD[category] = unaryCD[category].Plus(localCD); string word = tree.Children()[0].Label().Value(); word = model.GetVocabWord(word); //SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector); //SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass); //SimpleMatrix deltaFull = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).plus(deltaUp); //SimpleMatrix wordDerivative = deltaFull.elementMult(currentVectorDerivative); //wordVectorD.put(word, wordVectorD.get(word).plus(wordDerivative)); SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetUnaryClassification(category).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix oldWordVectorD = wordVectorD[word]; if (oldWordVectorD == null) { wordVectorD[word] = deltaFull; } else { wordVectorD[word] = oldWordVectorD.Plus(deltaFull); } } else { // Otherwise, this must be a binary node string leftCategory = model.BasicCategory(tree.Children()[0].Label().Value()); string rightCategory = model.BasicCategory(tree.Children()[1].Label().Value()); if (model.op.combineClassification) { unaryCD[string.Empty] = unaryCD[string.Empty].Plus(localCD); } else { binaryCD.Put(leftCategory, rightCategory, binaryCD.Get(leftCategory, rightCategory).Plus(localCD)); } SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetBinaryClassification(leftCategory, rightCategory).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix leftVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[0]); SimpleMatrix rightVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[1]); SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); SimpleMatrix W_df = deltaFull.Mult(childrenVector.Transpose()); binaryTD.Put(leftCategory, rightCategory, binaryTD.Get(leftCategory, rightCategory).Plus(W_df)); SimpleMatrix deltaDown; if (model.op.useTensors) { SimpleTensor Wt_df = GetTensorGradient(deltaFull, leftVector, rightVector); binaryTensorTD.Put(leftCategory, rightCategory, binaryTensorTD.Get(leftCategory, rightCategory).Plus(Wt_df)); deltaDown = ComputeTensorDeltaDown(deltaFull, leftVector, rightVector, model.GetBinaryTransform(leftCategory, rightCategory), model.GetBinaryTensor(leftCategory, rightCategory)); } else { deltaDown = model.GetBinaryTransform(leftCategory, rightCategory).Transpose().Mult(deltaFull); } SimpleMatrix leftDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftDeltaDown = deltaDown.ExtractMatrix(0, deltaFull.NumRows(), 0, 1); SimpleMatrix rightDeltaDown = deltaDown.ExtractMatrix(deltaFull.NumRows(), deltaFull.NumRows() * 2, 0, 1); BackpropDerivativesAndError(tree.Children()[0], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, leftDerivative.ElementMult(leftDeltaDown)); BackpropDerivativesAndError(tree.Children()[1], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, rightDerivative.ElementMult(rightDeltaDown)); } }
public virtual void BackpropDerivative(Tree tree, IList <string> words, IdentityHashMap <Tree, SimpleMatrix> nodeVectors, TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfs, IDictionary <string, SimpleMatrix> unaryW_dfs, TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivatives, IDictionary <string, SimpleMatrix> unaryScoreDerivatives, IDictionary <string, SimpleMatrix> wordVectorDerivatives, SimpleMatrix deltaUp) { if (tree.IsLeaf()) { return; } if (tree.IsPreTerminal()) { if (op.trainOptions.trainWordVectors) { string word = tree.Children()[0].Label().Value(); word = dvModel.GetVocabWord(word); // SimpleMatrix currentVector = nodeVectors.get(tree); // SimpleMatrix currentVectorDerivative = nonlinearityVectorToDerivative(currentVector); // SimpleMatrix derivative = deltaUp.elementMult(currentVectorDerivative); SimpleMatrix derivative = deltaUp; wordVectorDerivatives[word] = wordVectorDerivatives[word].Plus(derivative); } return; } SimpleMatrix currentVector = nodeVectors[tree]; SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix scoreW = dvModel.GetScoreWForNode(tree); currentVectorDerivative = currentVectorDerivative.ElementMult(scoreW.Transpose()); // the delta that is used at the current nodes SimpleMatrix deltaCurrent = deltaUp.Plus(currentVectorDerivative); SimpleMatrix W = dvModel.GetWForNode(tree); SimpleMatrix WTdelta = W.Transpose().Mult(deltaCurrent); if (tree.Children().Length == 2) { //TODO: RS: Change to the nice "getWForNode" setup? string leftLabel = dvModel.BasicCategory(tree.Children()[0].Label().Value()); string rightLabel = dvModel.BasicCategory(tree.Children()[1].Label().Value()); binaryScoreDerivatives.Put(leftLabel, rightLabel, binaryScoreDerivatives.Get(leftLabel, rightLabel).Plus(currentVector.Transpose())); SimpleMatrix leftVector = nodeVectors[tree.Children()[0]]; SimpleMatrix rightVector = nodeVectors[tree.Children()[1]]; SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); if (op.trainOptions.useContextWords) { childrenVector = ConcatenateContextWords(childrenVector, tree.GetSpan(), words); } SimpleMatrix W_df = deltaCurrent.Mult(childrenVector.Transpose()); binaryW_dfs.Put(leftLabel, rightLabel, binaryW_dfs.Get(leftLabel, rightLabel).Plus(W_df)); // and then recurse SimpleMatrix leftDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftWTDelta = WTdelta.ExtractMatrix(0, deltaCurrent.NumRows(), 0, 1); SimpleMatrix rightWTDelta = WTdelta.ExtractMatrix(deltaCurrent.NumRows(), deltaCurrent.NumRows() * 2, 0, 1); BackpropDerivative(tree.Children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, leftDerivative.ElementMult(leftWTDelta)); BackpropDerivative(tree.Children()[1], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, rightDerivative.ElementMult(rightWTDelta)); } else { if (tree.Children().Length == 1) { string childLabel = dvModel.BasicCategory(tree.Children()[0].Label().Value()); unaryScoreDerivatives[childLabel] = unaryScoreDerivatives[childLabel].Plus(currentVector.Transpose()); SimpleMatrix childVector = nodeVectors[tree.Children()[0]]; SimpleMatrix childVectorWithBias = NeuralUtils.ConcatenateWithBias(childVector); if (op.trainOptions.useContextWords) { childVectorWithBias = ConcatenateContextWords(childVectorWithBias, tree.GetSpan(), words); } SimpleMatrix W_df = deltaCurrent.Mult(childVectorWithBias.Transpose()); // System.out.println("unary backprop derivative for " + childLabel); // System.out.println("Old transform:"); // System.out.println(unaryW_dfs.get(childLabel)); // System.out.println(" Delta:"); // System.out.println(W_df.scale(scale)); unaryW_dfs[childLabel] = unaryW_dfs[childLabel].Plus(W_df); // and then recurse SimpleMatrix childDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(childVector); //SimpleMatrix childDerivative = childVector; SimpleMatrix childWTDelta = WTdelta.ExtractMatrix(0, deltaCurrent.NumRows(), 0, 1); BackpropDerivative(tree.Children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, childDerivative.ElementMult(childWTDelta)); } } }
private void ForwardPropagateTree(Tree tree, IList <string> words, IdentityHashMap <Tree, SimpleMatrix> nodeVectors, IdentityHashMap <Tree, double> scores) { if (tree.IsLeaf()) { return; } if (tree.IsPreTerminal()) { Tree wordNode = tree.Children()[0]; string word = wordNode.Label().Value(); SimpleMatrix wordVector = dvModel.GetWordVector(word); wordVector = NeuralUtils.ElementwiseApplyTanh(wordVector); nodeVectors[tree] = wordVector; return; } foreach (Tree child in tree.Children()) { ForwardPropagateTree(child, words, nodeVectors, scores); } // at this point, nodeVectors contains the vectors for all of // the children of tree SimpleMatrix childVec; if (tree.Children().Length == 2) { childVec = NeuralUtils.ConcatenateWithBias(nodeVectors[tree.Children()[0]], nodeVectors[tree.Children()[1]]); } else { childVec = NeuralUtils.ConcatenateWithBias(nodeVectors[tree.Children()[0]]); } if (op.trainOptions.useContextWords) { childVec = ConcatenateContextWords(childVec, tree.GetSpan(), words); } SimpleMatrix W = dvModel.GetWForNode(tree); if (W == null) { string error = "Could not find W for tree " + tree; if (op.testOptions.verbose) { log.Info(error); } throw new NoSuchParseException(error); } SimpleMatrix currentVector = W.Mult(childVec); currentVector = NeuralUtils.ElementwiseApplyTanh(currentVector); nodeVectors[tree] = currentVector; SimpleMatrix scoreW = dvModel.GetScoreWForNode(tree); if (scoreW == null) { string error = "Could not find scoreW for tree " + tree; if (op.testOptions.verbose) { log.Info(error); } throw new NoSuchParseException(error); } double score = scoreW.Dot(currentVector); //score = NeuralUtils.sigmoid(score); scores[tree] = score; }