/// <summary> /// Use the given /// <paramref name="matrix"/> /// in place of /// <paramref name="slice"/> /// . /// Does not copy the /// <paramref name="matrix"/> /// , but rather uses the actual object. /// </summary> public virtual void SetSlice(int slice, SimpleMatrix matrix) { if (slice < 0 || slice >= numSlices) { throw new ArgumentException("Unexpected slice number " + slice + " for tensor with " + numSlices + " slices"); } if (matrix.NumCols() != numCols) { throw new ArgumentException("Incompatible matrix size. Has " + matrix.NumCols() + " columns, tensor has " + numCols); } if (matrix.NumRows() != numRows) { throw new ArgumentException("Incompatible matrix size. Has " + matrix.NumRows() + " columns, tensor has " + numRows); } slices[slice] = matrix; }
private static double ScaleAndRegularize(IDictionary <string, SimpleMatrix> derivatives, IDictionary <string, SimpleMatrix> currentMatrices, double scale, double regCost, bool activeMatricesOnly, bool dropBiasColumn) { double cost = 0.0; // the regularization cost foreach (KeyValuePair <string, SimpleMatrix> entry in currentMatrices) { SimpleMatrix D = derivatives[entry.Key]; if (activeMatricesOnly && D == null) { // Fill in an emptpy matrix so the length of theta can match. // TODO: might want to allow for sparse parameter vectors derivatives[entry.Key] = new SimpleMatrix(entry.Value.NumRows(), entry.Value.NumCols()); continue; } SimpleMatrix regMatrix = entry.Value; if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1)); } D = D.Scale(scale).Plus(regMatrix.Scale(regCost)); derivatives[entry.Key] = D; cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0; } return(cost); }
/// <summary>Applies the derivative of tanh to each of the elements in the vector.</summary> /// <remarks>Applies the derivative of tanh to each of the elements in the vector. Returns a new matrix.</remarks> public static SimpleMatrix ElementwiseApplyTanhDerivative(SimpleMatrix input) { SimpleMatrix output = new SimpleMatrix(input.NumRows(), input.NumCols()); output.Set(1.0); output = output.Minus(input.ElementMult(input)); return(output); }
/// <summary>Applies tanh to each of the entries in the matrix.</summary> /// <remarks>Applies tanh to each of the entries in the matrix. Returns a new matrix.</remarks> public static SimpleMatrix ElementwiseApplyTanh(SimpleMatrix input) { SimpleMatrix output = new SimpleMatrix(input); for (int i = 0; i < output.NumRows(); ++i) { for (int j = 0; j < output.NumCols(); ++j) { output.Set(i, j, Math.Tanh(output.Get(i, j))); } } return(output); }
/// <summary>Applies softmax to all of the elements of the matrix.</summary> /// <remarks> /// Applies softmax to all of the elements of the matrix. The return /// matrix will have all of its elements sum to 1. If your matrix is /// not already a vector, be sure this is what you actually want. /// </remarks> public static SimpleMatrix Softmax(SimpleMatrix input) { SimpleMatrix output = new SimpleMatrix(input); for (int i = 0; i < output.NumRows(); ++i) { for (int j = 0; j < output.NumCols(); ++j) { output.Set(i, j, Math.Exp(output.Get(i, j))); } } double sum = output.ElementSum(); // will be safe, since exp should never return 0 return(output.Scale(1.0 / sum)); }
private static double ScaleAndRegularize(TwoDimensionalMap <string, string, SimpleMatrix> derivatives, TwoDimensionalMap <string, string, SimpleMatrix> currentMatrices, double scale, double regCost, bool dropBiasColumn) { double cost = 0.0; // the regularization cost foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in currentMatrices) { SimpleMatrix D = derivatives.Get(entry.GetFirstKey(), entry.GetSecondKey()); SimpleMatrix regMatrix = entry.GetValue(); if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1)); } D = D.Scale(scale).Plus(regMatrix.Scale(regCost)); derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), D); cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0; } return(cost); }
/// <summary>Compute dot product between two vectors.</summary> public static double Dot(SimpleMatrix vector1, SimpleMatrix vector2) { if (vector1.NumRows() == 1) { // vector1: row vector, assume that vector2 is a row vector too return(vector1.Mult(vector2.Transpose()).Get(0)); } else { if (vector1.NumCols() == 1) { // vector1: col vector, assume that vector2 is also a column vector. return(vector1.Transpose().Mult(vector2).Get(0)); } else { throw new AssertionError("Error in neural.Utils.dot: vector1 is a matrix " + vector1.NumRows() + " x " + vector1.NumCols()); } } }
/// <summary> /// Returns a column vector where each entry is the nth bilinear /// product of the nth slices of the two tensors. /// </summary> public virtual SimpleMatrix BilinearProducts(SimpleMatrix @in) { if (@in.NumCols() != 1) { throw new AssertionError("Expected a column vector"); } if (@in.NumRows() != numCols) { throw new AssertionError("Number of rows in the input does not match number of columns in tensor"); } if (numRows != numCols) { throw new AssertionError("Can only perform this operation on a SimpleTensor with square slices"); } SimpleMatrix inT = @in.Transpose(); SimpleMatrix @out = new SimpleMatrix(numSlices, 1); for (int slice = 0; slice < numSlices; ++slice) { double result = inT.Mult(slices[slice]).Mult(@in).Get(0); @out.Set(slice, result); } return(@out); }
private void BackpropDerivativesAndError(Tree tree, TwoDimensionalMap <string, string, SimpleMatrix> binaryTD, TwoDimensionalMap <string, string, SimpleMatrix> binaryCD, TwoDimensionalMap <string, string, SimpleTensor> binaryTensorTD, IDictionary <string, SimpleMatrix> unaryCD, IDictionary <string, SimpleMatrix> wordVectorD, SimpleMatrix deltaUp) { if (tree.IsLeaf()) { return; } SimpleMatrix currentVector = RNNCoreAnnotations.GetNodeVector(tree); string category = tree.Label().Value(); category = model.BasicCategory(category); // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class SimpleMatrix goldLabel = new SimpleMatrix(model.numClasses, 1); int goldClass = RNNCoreAnnotations.GetGoldClass(tree); if (goldClass >= 0) { goldLabel.Set(goldClass, 1.0); } double nodeWeight = model.op.trainOptions.GetClassWeight(goldClass); SimpleMatrix predictions = RNNCoreAnnotations.GetPredictions(tree); // If this is an unlabeled class, set deltaClass to 0. We could // make this more efficient by eliminating various of the below // calculations, but this would be the easiest way to handle the // unlabeled class SimpleMatrix deltaClass = goldClass >= 0 ? predictions.Minus(goldLabel).Scale(nodeWeight) : new SimpleMatrix(predictions.NumRows(), predictions.NumCols()); SimpleMatrix localCD = deltaClass.Mult(NeuralUtils.ConcatenateWithBias(currentVector).Transpose()); double error = -(NeuralUtils.ElementwiseApplyLog(predictions).ElementMult(goldLabel).ElementSum()); error = error * nodeWeight; RNNCoreAnnotations.SetPredictionError(tree, error); if (tree.IsPreTerminal()) { // below us is a word vector unaryCD[category] = unaryCD[category].Plus(localCD); string word = tree.Children()[0].Label().Value(); word = model.GetVocabWord(word); //SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector); //SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass); //SimpleMatrix deltaFull = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).plus(deltaUp); //SimpleMatrix wordDerivative = deltaFull.elementMult(currentVectorDerivative); //wordVectorD.put(word, wordVectorD.get(word).plus(wordDerivative)); SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetUnaryClassification(category).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix oldWordVectorD = wordVectorD[word]; if (oldWordVectorD == null) { wordVectorD[word] = deltaFull; } else { wordVectorD[word] = oldWordVectorD.Plus(deltaFull); } } else { // Otherwise, this must be a binary node string leftCategory = model.BasicCategory(tree.Children()[0].Label().Value()); string rightCategory = model.BasicCategory(tree.Children()[1].Label().Value()); if (model.op.combineClassification) { unaryCD[string.Empty] = unaryCD[string.Empty].Plus(localCD); } else { binaryCD.Put(leftCategory, rightCategory, binaryCD.Get(leftCategory, rightCategory).Plus(localCD)); } SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetBinaryClassification(leftCategory, rightCategory).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix leftVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[0]); SimpleMatrix rightVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[1]); SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); SimpleMatrix W_df = deltaFull.Mult(childrenVector.Transpose()); binaryTD.Put(leftCategory, rightCategory, binaryTD.Get(leftCategory, rightCategory).Plus(W_df)); SimpleMatrix deltaDown; if (model.op.useTensors) { SimpleTensor Wt_df = GetTensorGradient(deltaFull, leftVector, rightVector); binaryTensorTD.Put(leftCategory, rightCategory, binaryTensorTD.Get(leftCategory, rightCategory).Plus(Wt_df)); deltaDown = ComputeTensorDeltaDown(deltaFull, leftVector, rightVector, model.GetBinaryTransform(leftCategory, rightCategory), model.GetBinaryTensor(leftCategory, rightCategory)); } else { deltaDown = model.GetBinaryTransform(leftCategory, rightCategory).Transpose().Mult(deltaFull); } SimpleMatrix leftDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftDeltaDown = deltaDown.ExtractMatrix(0, deltaFull.NumRows(), 0, 1); SimpleMatrix rightDeltaDown = deltaDown.ExtractMatrix(deltaFull.NumRows(), deltaFull.NumRows() * 2, 0, 1); BackpropDerivativesAndError(tree.Children()[0], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, leftDerivative.ElementMult(leftDeltaDown)); BackpropDerivativesAndError(tree.Children()[1], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, rightDerivative.ElementMult(rightDeltaDown)); } }
/// <exception cref="System.IO.IOException"/> public static void Main(string[] args) { string basePath = "/user/socherr/scr/projects/semComp/RNTN/src/params/"; int numSlices = 25; bool useEscapedParens = false; for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-slices")) { numSlices = System.Convert.ToInt32(args[argIndex + 1]); argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-path")) { basePath = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-useEscapedParens")) { useEscapedParens = true; argIndex += 1; } else { log.Info("Unknown argument " + args[argIndex]); System.Environment.Exit(2); } } } } SimpleMatrix[] slices = new SimpleMatrix[numSlices]; for (int i = 0; i < numSlices; ++i) { slices[i] = LoadMatrix(basePath + "bin/Wt_" + (i + 1) + ".bin", basePath + "Wt_" + (i + 1) + ".txt"); } SimpleTensor tensor = new SimpleTensor(slices); log.Info("W tensor size: " + tensor.NumRows() + "x" + tensor.NumCols() + "x" + tensor.NumSlices()); SimpleMatrix W = LoadMatrix(basePath + "bin/W.bin", basePath + "W.txt"); log.Info("W matrix size: " + W.NumRows() + "x" + W.NumCols()); SimpleMatrix Wcat = LoadMatrix(basePath + "bin/Wcat.bin", basePath + "Wcat.txt"); log.Info("W cat size: " + Wcat.NumRows() + "x" + Wcat.NumCols()); SimpleMatrix combinedWV = LoadMatrix(basePath + "bin/Wv.bin", basePath + "Wv.txt"); log.Info("Word matrix size: " + combinedWV.NumRows() + "x" + combinedWV.NumCols()); File vocabFile = new File(basePath + "vocab_1.txt"); if (!vocabFile.Exists()) { vocabFile = new File(basePath + "words.txt"); } IList <string> lines = Generics.NewArrayList(); foreach (string line in IOUtils.ReadLines(vocabFile)) { lines.Add(line.Trim()); } log.Info("Lines in vocab file: " + lines.Count); IDictionary <string, SimpleMatrix> wordVectors = Generics.NewTreeMap(); for (int i_1 = 0; i_1 < lines.Count && i_1 < combinedWV.NumCols(); ++i_1) { string[] pieces = lines[i_1].Split(" +"); if (pieces.Length == 0 || pieces.Length > 1) { continue; } wordVectors[pieces[0]] = combinedWV.ExtractMatrix(0, numSlices, i_1, i_1 + 1); if (pieces[0].Equals("UNK")) { wordVectors[SentimentModel.UnknownWord] = wordVectors["UNK"]; } } // If there is no ",", we first try to look for an HTML escaping, // then fall back to "." as better than just a random word vector. // Same for "``" and ";" CopyWordVector(wordVectors, ",", ","); CopyWordVector(wordVectors, ".", ","); CopyWordVector(wordVectors, ";", ";"); CopyWordVector(wordVectors, ".", ";"); CopyWordVector(wordVectors, "``", "``"); CopyWordVector(wordVectors, "''", "``"); if (useEscapedParens) { ReplaceWordVector(wordVectors, "(", "-LRB-"); ReplaceWordVector(wordVectors, ")", "-RRB-"); } RNNOptions op = new RNNOptions(); op.numHid = numSlices; op.lowercaseWordVectors = false; if (Wcat.NumRows() == 2) { op.classNames = new string[] { "Negative", "Positive" }; op.equivalenceClasses = new int[][] { new int[] { 0 }, new int[] { 1 } }; // TODO: set to null once old models are updated op.numClasses = 2; } if (!wordVectors.Contains(SentimentModel.UnknownWord)) { wordVectors[SentimentModel.UnknownWord] = SimpleMatrix.Random(numSlices, 1, -0.00001, 0.00001, new Random()); } SentimentModel model = SentimentModel.ModelFromMatrices(W, Wcat, tensor, wordVectors, op); model.SaveSerialized("matlab.ser.gz"); }