public static TwoDimensionalMap <string, string, SimpleMatrix> AverageBinaryMatrices(IList <TwoDimensionalMap <string, string, SimpleMatrix> > maps) { TwoDimensionalMap <string, string, SimpleMatrix> averages = TwoDimensionalMap.TreeMap(); foreach (Pair <string, string> binary in GetBinaryMatrixNames(maps)) { int count = 0; SimpleMatrix matrix = null; foreach (TwoDimensionalMap <string, string, SimpleMatrix> map in maps) { if (!map.Contains(binary.First(), binary.Second())) { continue; } SimpleMatrix original = map.Get(binary.First(), binary.Second()); ++count; if (matrix == null) { matrix = original; } else { matrix = matrix.Plus(original); } } matrix = matrix.Divide(count); averages.Put(binary.First(), binary.Second(), matrix); } return(averages); }
private void BackpropDerivativesAndError(Tree tree, TwoDimensionalMap <string, string, SimpleMatrix> binaryTD, TwoDimensionalMap <string, string, SimpleMatrix> binaryCD, TwoDimensionalMap <string, string, SimpleTensor> binaryTensorTD, IDictionary <string, SimpleMatrix> unaryCD, IDictionary <string, SimpleMatrix> wordVectorD) { SimpleMatrix delta = new SimpleMatrix(model.op.numHid, 1); BackpropDerivativesAndError(tree, binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, delta); }
public virtual void FilterRulesForBatch(TwoDimensionalSet <string, string> binaryRules, ICollection <string> unaryRules, ICollection <string> words) { TwoDimensionalMap <string, string, SimpleMatrix> newBinaryTransforms = TwoDimensionalMap.TreeMap(); TwoDimensionalMap <string, string, SimpleMatrix> newBinaryScores = TwoDimensionalMap.TreeMap(); foreach (Pair <string, string> binaryRule in binaryRules) { SimpleMatrix transform = binaryTransform.Get(binaryRule.First(), binaryRule.Second()); if (transform != null) { newBinaryTransforms.Put(binaryRule.First(), binaryRule.Second(), transform); } SimpleMatrix score = binaryScore.Get(binaryRule.First(), binaryRule.Second()); if (score != null) { newBinaryScores.Put(binaryRule.First(), binaryRule.Second(), score); } if ((transform == null && score != null) || (transform != null && score == null)) { throw new AssertionError(); } } binaryTransform = newBinaryTransforms; binaryScore = newBinaryScores; numBinaryMatrices = binaryTransform.Size(); IDictionary <string, SimpleMatrix> newUnaryTransforms = Generics.NewTreeMap(); IDictionary <string, SimpleMatrix> newUnaryScores = Generics.NewTreeMap(); foreach (string unaryRule in unaryRules) { SimpleMatrix transform = unaryTransform[unaryRule]; if (transform != null) { newUnaryTransforms[unaryRule] = transform; } SimpleMatrix score = unaryScore[unaryRule]; if (score != null) { newUnaryScores[unaryRule] = score; } if ((transform == null && score != null) || (transform != null && score == null)) { throw new AssertionError(); } } unaryTransform = newUnaryTransforms; unaryScore = newUnaryScores; numUnaryMatrices = unaryTransform.Count; IDictionary <string, SimpleMatrix> newWordVectors = Generics.NewTreeMap(); foreach (string word in words) { SimpleMatrix wordVector = wordVectors[word]; if (wordVector != null) { newWordVectors[word] = wordVector; } } wordVectors = newWordVectors; }
/// <summary>Init a TwoDimensionalMap with 0 matrices for all the matrices in the original map.</summary> private static TwoDimensionalMap <string, string, SimpleMatrix> InitDerivatives(TwoDimensionalMap <string, string, SimpleMatrix> map) { TwoDimensionalMap <string, string, SimpleMatrix> derivatives = TwoDimensionalMap.TreeMap(); foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in map) { int numRows = entry.GetValue().NumRows(); int numCols = entry.GetValue().NumCols(); derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols)); } return(derivatives); }
private static double ScaleAndRegularizeTensor(TwoDimensionalMap <string, string, SimpleTensor> derivatives, TwoDimensionalMap <string, string, SimpleTensor> currentMatrices, double scale, double regCost) { double cost = 0.0; // the regularization cost foreach (TwoDimensionalMap.Entry <string, string, SimpleTensor> entry in currentMatrices) { SimpleTensor D = derivatives.Get(entry.GetFirstKey(), entry.GetSecondKey()); D = D.Scale(scale).Plus(entry.GetValue().Scale(regCost)); derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), D); cost += entry.GetValue().ElementMult(entry.GetValue()).ElementSum() * regCost / 2.0; } return(cost); }
/// <param name="op">the parameters of the parser</param> public DVModel(Options op, IIndex <string> stateIndex, UnaryGrammar unaryGrammar, BinaryGrammar binaryGrammar) { this.op = op; rand = new Random(op.trainOptions.randomSeed); ReadWordVectors(); // Binary matrices will be n*2n+1, unary matrices will be n*n+1 numRows = op.lexOptions.numHid; numCols = op.lexOptions.numHid; // Build one matrix for each basic category. // We assume that each state that has the same basic // category is using the same transformation matrix. // Use TreeMap for because we want values to be // sorted by key later on when building theta vectors binaryTransform = TwoDimensionalMap.TreeMap(); unaryTransform = Generics.NewTreeMap(); binaryScore = TwoDimensionalMap.TreeMap(); unaryScore = Generics.NewTreeMap(); numBinaryMatrices = 0; numUnaryMatrices = 0; binaryTransformSize = numRows * (numCols * 2 + 1); unaryTransformSize = numRows * (numCols + 1); binaryScoreSize = numCols; unaryScoreSize = numCols; if (op.trainOptions.useContextWords) { binaryTransformSize += numRows * numCols * 2; unaryTransformSize += numRows * numCols * 2; } identity = SimpleMatrix.Identity(numRows); foreach (UnaryRule unaryRule in unaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string childState = stateIndex.Get(unaryRule.child); string childBasic = BasicCategory(childState); AddRandomUnaryMatrix(childBasic); } foreach (BinaryRule binaryRule in binaryGrammar) { // only make one matrix for each parent state, and only use the // basic category for that string leftState = stateIndex.Get(binaryRule.leftChild); string leftBasic = BasicCategory(leftState); string rightState = stateIndex.Get(binaryRule.rightChild); string rightBasic = BasicCategory(rightState); AddRandomBinaryMatrix(leftBasic, rightBasic); } }
public Document() { // mentions may be removed from this due to post processing // all mentions (mentions will not be removed from this) positions = Generics.NewHashMap(); mentionheadPositions = Generics.NewHashMap(); roleSet = Generics.NewHashSet(); corefClusters = Generics.NewHashMap(); goldCorefClusters = null; allPredictedMentions = Generics.NewHashMap(); allGoldMentions = Generics.NewHashMap(); speakers = Generics.NewHashMap(); speakerPairs = Generics.NewHashSet(); incompatibles = TwoDimensionalSet.HashSet(); incompatibleClusters = TwoDimensionalSet.HashSet(); acronymCache = TwoDimensionalMap.HashMap(); }
/// <summary>Add tensors from the second map to the first map, in place.</summary> public static void AddTensors(TwoDimensionalMap <string, string, SimpleTensor> first, TwoDimensionalMap <string, string, SimpleTensor> second) { foreach (TwoDimensionalMap.Entry <string, string, SimpleTensor> entry in first) { if (second.Contains(entry.GetFirstKey(), entry.GetSecondKey())) { first.Put(entry.GetFirstKey(), entry.GetSecondKey(), entry.GetValue().Plus(second.Get(entry.GetFirstKey(), entry.GetSecondKey()))); } } foreach (TwoDimensionalMap.Entry <string, string, SimpleTensor> entry_1 in second) { if (!first.Contains(entry_1.GetFirstKey(), entry_1.GetSecondKey())) { first.Put(entry_1.GetFirstKey(), entry_1.GetSecondKey(), entry_1.GetValue()); } } }
/* * // An example of how you could read in old models with readObject to fix the serialization * // You would first read in the old model, then reserialize it * private void readObject(ObjectInputStream in) * throws IOException, ClassNotFoundException * { * ObjectInputStream.GetField fields = in.readFields(); * binaryTransform = ErasureUtils.uncheckedCast(fields.get("binaryTransform", null)); * * // transform binaryTensors * binaryTensors = TwoDimensionalMap.treeMap(); * TwoDimensionalMap<String, String, edu.stanford.nlp.rnn.SimpleTensor> oldTensors = ErasureUtils.uncheckedCast(fields.get("binaryTensors", null)); * for (String first : oldTensors.firstKeySet()) { * for (String second : oldTensors.get(first).keySet()) { * binaryTensors.put(first, second, new SimpleTensor(oldTensors.get(first, second).slices)); * } * } * * binaryClassification = ErasureUtils.uncheckedCast(fields.get("binaryClassification", null)); * unaryClassification = ErasureUtils.uncheckedCast(fields.get("unaryClassification", null)); * wordVectors = ErasureUtils.uncheckedCast(fields.get("wordVectors", null)); * * if (fields.defaulted("numClasses")) { * throw new RuntimeException(); * } * numClasses = fields.get("numClasses", 0); * * if (fields.defaulted("numHid")) { * throw new RuntimeException(); * } * numHid = fields.get("numHid", 0); * * if (fields.defaulted("numBinaryMatrices")) { * throw new RuntimeException(); * } * numBinaryMatrices = fields.get("numBinaryMatrices", 0); * * if (fields.defaulted("binaryTransformSize")) { * throw new RuntimeException(); * } * binaryTransformSize = fields.get("binaryTransformSize", 0); * * if (fields.defaulted("binaryTensorSize")) { * throw new RuntimeException(); * } * binaryTensorSize = fields.get("binaryTensorSize", 0); * * if (fields.defaulted("binaryClassificationSize")) { * throw new RuntimeException(); * } * binaryClassificationSize = fields.get("binaryClassificationSize", 0); * * if (fields.defaulted("numUnaryMatrices")) { * throw new RuntimeException(); * } * numUnaryMatrices = fields.get("numUnaryMatrices", 0); * * if (fields.defaulted("unaryClassificationSize")) { * throw new RuntimeException(); * } * unaryClassificationSize = fields.get("unaryClassificationSize", 0); * * rand = ErasureUtils.uncheckedCast(fields.get("rand", null)); * op = ErasureUtils.uncheckedCast(fields.get("op", null)); * op.classNames = op.DEFAULT_CLASS_NAMES; * op.equivalenceClasses = op.APPROXIMATE_EQUIVALENCE_CLASSES; * op.equivalenceClassNames = op.DEFAULT_EQUIVALENCE_CLASS_NAMES; * } */ /// <summary> /// Given single matrices and sets of options, create the /// corresponding SentimentModel. /// </summary> /// <remarks> /// Given single matrices and sets of options, create the /// corresponding SentimentModel. Useful for creating a Java version /// of a model trained in some other manner, such as using the /// original Matlab code. /// </remarks> internal static Edu.Stanford.Nlp.Sentiment.SentimentModel ModelFromMatrices(SimpleMatrix W, SimpleMatrix Wcat, SimpleTensor Wt, IDictionary <string, SimpleMatrix> wordVectors, RNNOptions op) { if (!op.combineClassification || !op.simplifiedModel) { throw new ArgumentException("Can only create a model using this method if combineClassification and simplifiedModel are turned on"); } TwoDimensionalMap <string, string, SimpleMatrix> binaryTransform = TwoDimensionalMap.TreeMap(); binaryTransform.Put(string.Empty, string.Empty, W); TwoDimensionalMap <string, string, SimpleTensor> binaryTensors = TwoDimensionalMap.TreeMap(); binaryTensors.Put(string.Empty, string.Empty, Wt); TwoDimensionalMap <string, string, SimpleMatrix> binaryClassification = TwoDimensionalMap.TreeMap(); IDictionary <string, SimpleMatrix> unaryClassification = Generics.NewTreeMap(); unaryClassification[string.Empty] = Wcat; return(new Edu.Stanford.Nlp.Sentiment.SentimentModel(binaryTransform, binaryTensors, binaryClassification, unaryClassification, wordVectors, op)); }
public ModelDerivatives(SentimentModel model) { // We use TreeMap for each of these so that they stay in a canonical sorted order // binaryTD stands for Transform Derivatives (see the SentimentModel) // the derivatives of the tensors for the binary nodes // will be empty if we aren't using tensors // binaryCD stands for Classification Derivatives // if we combined classification derivatives, we just use an empty map // unaryCD stands for Classification Derivatives // word vector derivatives // will be filled on an as-needed basis, as opposed to having all // the words with a lot of empty vectors binaryTD = InitDerivatives(model.binaryTransform); binaryTensorTD = (model.op.useTensors) ? InitTensorDerivatives(model.binaryTensors) : TwoDimensionalMap.TreeMap(); binaryCD = (!model.op.combineClassification) ? InitDerivatives(model.binaryClassification) : TwoDimensionalMap.TreeMap(); unaryCD = InitDerivatives(model.unaryClassification); // wordVectorD will be filled on an as-needed basis wordVectorD = Generics.NewTreeMap(); }
private static double ScaleAndRegularize(TwoDimensionalMap <string, string, SimpleMatrix> derivatives, TwoDimensionalMap <string, string, SimpleMatrix> currentMatrices, double scale, double regCost, bool dropBiasColumn) { double cost = 0.0; // the regularization cost foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in currentMatrices) { SimpleMatrix D = derivatives.Get(entry.GetFirstKey(), entry.GetSecondKey()); SimpleMatrix regMatrix = entry.GetValue(); if (dropBiasColumn) { regMatrix = new SimpleMatrix(regMatrix); regMatrix.InsertIntoThis(0, regMatrix.NumCols() - 1, new SimpleMatrix(regMatrix.NumRows(), 1)); } D = D.Scale(scale).Plus(regMatrix.Scale(regCost)); derivatives.Put(entry.GetFirstKey(), entry.GetSecondKey(), D); cost += regMatrix.ElementMult(regMatrix).ElementSum() * regCost / 2.0; } return(cost); }
private SentimentModel(TwoDimensionalMap <string, string, SimpleMatrix> binaryTransform, TwoDimensionalMap <string, string, SimpleTensor> binaryTensors, TwoDimensionalMap <string, string, SimpleMatrix> binaryClassification, IDictionary <string, SimpleMatrix> unaryClassification, IDictionary <string, SimpleMatrix> wordVectors, RNNOptions op) { this.op = op; this.binaryTransform = binaryTransform; this.binaryTensors = binaryTensors; this.binaryClassification = binaryClassification; this.unaryClassification = unaryClassification; this.wordVectors = wordVectors; this.numClasses = op.numClasses; if (op.numHid <= 0) { int nh = 0; foreach (SimpleMatrix wv in wordVectors.Values) { nh = wv.GetNumElements(); } this.numHid = nh; } else { this.numHid = op.numHid; } this.numBinaryMatrices = binaryTransform.Size(); binaryTransformSize = numHid * (2 * numHid + 1); if (op.useTensors) { binaryTensorSize = numHid * numHid * numHid * 4; } else { binaryTensorSize = 0; } binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1); numUnaryMatrices = unaryClassification.Count; unaryClassificationSize = numClasses * (numHid + 1); rand = new Random(op.randomSeed); identity = SimpleMatrix.Identity(numHid); }
public DVModel(TwoDimensionalMap <string, string, SimpleMatrix> binaryTransform, IDictionary <string, SimpleMatrix> unaryTransform, TwoDimensionalMap <string, string, SimpleMatrix> binaryScore, IDictionary <string, SimpleMatrix> unaryScore, IDictionary <string, SimpleMatrix> wordVectors, Options op) { this.op = op; this.binaryTransform = binaryTransform; this.unaryTransform = unaryTransform; this.binaryScore = binaryScore; this.unaryScore = unaryScore; this.wordVectors = wordVectors; this.numBinaryMatrices = binaryTransform.Size(); this.numUnaryMatrices = unaryTransform.Count; if (numBinaryMatrices > 0) { this.binaryTransformSize = binaryTransform.GetEnumerator().Current.GetValue().GetNumElements(); this.binaryScoreSize = binaryScore.GetEnumerator().Current.GetValue().GetNumElements(); } else { this.binaryTransformSize = 0; this.binaryScoreSize = 0; } if (numUnaryMatrices > 0) { this.unaryTransformSize = unaryTransform.Values.GetEnumerator().Current.GetNumElements(); this.unaryScoreSize = unaryScore.Values.GetEnumerator().Current.GetNumElements(); } else { this.unaryTransformSize = 0; this.unaryScoreSize = 0; } this.numRows = op.lexOptions.numHid; this.numCols = op.lexOptions.numHid; this.identity = SimpleMatrix.Identity(numRows); this.rand = new Random(op.trainOptions.randomSeed); }
/// <summary> /// Command line arguments for this program: /// <br /> /// -output: the model file to output /// -input: a list of model files to input /// </summary> public static void Main(string[] args) { string outputModelFilename = null; IList <string> inputModelFilenames = Generics.NewArrayList(); for (int argIndex = 0; argIndex < args.Length;) { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-output")) { outputModelFilename = args[argIndex + 1]; argIndex += 2; } else { if (Sharpen.Runtime.EqualsIgnoreCase(args[argIndex], "-input")) { for (++argIndex; argIndex < args.Length && !args[argIndex].StartsWith("-"); ++argIndex) { Sharpen.Collections.AddAll(inputModelFilenames, Arrays.AsList(args[argIndex].Split(","))); } } else { throw new Exception("Unknown argument " + args[argIndex]); } } } if (outputModelFilename == null) { log.Info("Need to specify output model name with -output"); System.Environment.Exit(2); } if (inputModelFilenames.Count == 0) { log.Info("Need to specify input model names with -input"); System.Environment.Exit(2); } log.Info("Averaging " + inputModelFilenames); log.Info("Outputting result to " + outputModelFilename); LexicalizedParser lexparser = null; IList <DVModel> models = Generics.NewArrayList(); foreach (string filename in inputModelFilenames) { LexicalizedParser parser = ((LexicalizedParser)LexicalizedParser.LoadModel(filename)); if (lexparser == null) { lexparser = parser; } models.Add(DVParser.GetModelFromLexicalizedParser(parser)); } IList <TwoDimensionalMap <string, string, SimpleMatrix> > binaryTransformMaps = CollectionUtils.TransformAsList(models, null); IList <TwoDimensionalMap <string, string, SimpleMatrix> > binaryScoreMaps = CollectionUtils.TransformAsList(models, null); IList <IDictionary <string, SimpleMatrix> > unaryTransformMaps = CollectionUtils.TransformAsList(models, null); IList <IDictionary <string, SimpleMatrix> > unaryScoreMaps = CollectionUtils.TransformAsList(models, null); IList <IDictionary <string, SimpleMatrix> > wordMaps = CollectionUtils.TransformAsList(models, null); TwoDimensionalMap <string, string, SimpleMatrix> binaryTransformAverages = AverageBinaryMatrices(binaryTransformMaps); TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreAverages = AverageBinaryMatrices(binaryScoreMaps); IDictionary <string, SimpleMatrix> unaryTransformAverages = AverageUnaryMatrices(unaryTransformMaps); IDictionary <string, SimpleMatrix> unaryScoreAverages = AverageUnaryMatrices(unaryScoreMaps); IDictionary <string, SimpleMatrix> wordAverages = AverageUnaryMatrices(wordMaps); DVModel newModel = new DVModel(binaryTransformAverages, unaryTransformAverages, binaryScoreAverages, unaryScoreAverages, wordAverages, lexparser.GetOp()); DVParser newParser = new DVParser(newModel, lexparser); newParser.SaveModel(outputModelFilename); }
/// <summary>The traditional way of initializing an empty model suitable for training.</summary> public SentimentModel(RNNOptions op, IList <Tree> trainingTrees) { this.op = op; rand = new Random(op.randomSeed); if (op.randomWordVectors) { InitRandomWordVectors(trainingTrees); } else { ReadWordVectors(); } if (op.numHid > 0) { this.numHid = op.numHid; } else { int size = 0; foreach (SimpleMatrix vector in wordVectors.Values) { size = vector.GetNumElements(); break; } this.numHid = size; } TwoDimensionalSet <string, string> binaryProductions = TwoDimensionalSet.HashSet(); if (op.simplifiedModel) { binaryProductions.Add(string.Empty, string.Empty); } else { // TODO // figure out what binary productions we have in these trees // Note: the current sentiment training data does not actually // have any constituent labels throw new NotSupportedException("Not yet implemented"); } ICollection <string> unaryProductions = Generics.NewHashSet(); if (op.simplifiedModel) { unaryProductions.Add(string.Empty); } else { // TODO // figure out what unary productions we have in these trees (preterminals only, after the collapsing) throw new NotSupportedException("Not yet implemented"); } this.numClasses = op.numClasses; identity = SimpleMatrix.Identity(numHid); binaryTransform = TwoDimensionalMap.TreeMap(); binaryTensors = TwoDimensionalMap.TreeMap(); binaryClassification = TwoDimensionalMap.TreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (Pair <string, string> binary in binaryProductions) { string left = BasicCategory(binary.first); string right = BasicCategory(binary.second); if (binaryTransform.Contains(left, right)) { continue; } binaryTransform.Put(left, right, RandomTransformMatrix()); if (op.useTensors) { binaryTensors.Put(left, right, RandomBinaryTensor()); } if (!op.combineClassification) { binaryClassification.Put(left, right, RandomClassificationMatrix()); } } numBinaryMatrices = binaryTransform.Size(); binaryTransformSize = numHid * (2 * numHid + 1); if (op.useTensors) { binaryTensorSize = numHid * numHid * numHid * 4; } else { binaryTensorSize = 0; } binaryClassificationSize = (op.combineClassification) ? 0 : numClasses * (numHid + 1); unaryClassification = Generics.NewTreeMap(); // When making a flat model (no symantic untying) the // basicCategory function will return the same basic category for // all labels, so all entries will map to the same matrix foreach (string unary in unaryProductions) { unary = BasicCategory(unary); if (unaryClassification.Contains(unary)) { continue; } unaryClassification[unary] = RandomClassificationMatrix(); } numUnaryMatrices = unaryClassification.Count; unaryClassificationSize = numClasses * (numHid + 1); }
private void BackpropDerivativesAndError(Tree tree, TwoDimensionalMap <string, string, SimpleMatrix> binaryTD, TwoDimensionalMap <string, string, SimpleMatrix> binaryCD, TwoDimensionalMap <string, string, SimpleTensor> binaryTensorTD, IDictionary <string, SimpleMatrix> unaryCD, IDictionary <string, SimpleMatrix> wordVectorD, SimpleMatrix deltaUp) { if (tree.IsLeaf()) { return; } SimpleMatrix currentVector = RNNCoreAnnotations.GetNodeVector(tree); string category = tree.Label().Value(); category = model.BasicCategory(category); // Build a vector that looks like 0,0,1,0,0 with an indicator for the correct class SimpleMatrix goldLabel = new SimpleMatrix(model.numClasses, 1); int goldClass = RNNCoreAnnotations.GetGoldClass(tree); if (goldClass >= 0) { goldLabel.Set(goldClass, 1.0); } double nodeWeight = model.op.trainOptions.GetClassWeight(goldClass); SimpleMatrix predictions = RNNCoreAnnotations.GetPredictions(tree); // If this is an unlabeled class, set deltaClass to 0. We could // make this more efficient by eliminating various of the below // calculations, but this would be the easiest way to handle the // unlabeled class SimpleMatrix deltaClass = goldClass >= 0 ? predictions.Minus(goldLabel).Scale(nodeWeight) : new SimpleMatrix(predictions.NumRows(), predictions.NumCols()); SimpleMatrix localCD = deltaClass.Mult(NeuralUtils.ConcatenateWithBias(currentVector).Transpose()); double error = -(NeuralUtils.ElementwiseApplyLog(predictions).ElementMult(goldLabel).ElementSum()); error = error * nodeWeight; RNNCoreAnnotations.SetPredictionError(tree, error); if (tree.IsPreTerminal()) { // below us is a word vector unaryCD[category] = unaryCD[category].Plus(localCD); string word = tree.Children()[0].Label().Value(); word = model.GetVocabWord(word); //SimpleMatrix currentVectorDerivative = NeuralUtils.elementwiseApplyTanhDerivative(currentVector); //SimpleMatrix deltaFromClass = model.getUnaryClassification(category).transpose().mult(deltaClass); //SimpleMatrix deltaFull = deltaFromClass.extractMatrix(0, model.op.numHid, 0, 1).plus(deltaUp); //SimpleMatrix wordDerivative = deltaFull.elementMult(currentVectorDerivative); //wordVectorD.put(word, wordVectorD.get(word).plus(wordDerivative)); SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetUnaryClassification(category).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix oldWordVectorD = wordVectorD[word]; if (oldWordVectorD == null) { wordVectorD[word] = deltaFull; } else { wordVectorD[word] = oldWordVectorD.Plus(deltaFull); } } else { // Otherwise, this must be a binary node string leftCategory = model.BasicCategory(tree.Children()[0].Label().Value()); string rightCategory = model.BasicCategory(tree.Children()[1].Label().Value()); if (model.op.combineClassification) { unaryCD[string.Empty] = unaryCD[string.Empty].Plus(localCD); } else { binaryCD.Put(leftCategory, rightCategory, binaryCD.Get(leftCategory, rightCategory).Plus(localCD)); } SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix deltaFromClass = model.GetBinaryClassification(leftCategory, rightCategory).Transpose().Mult(deltaClass); deltaFromClass = deltaFromClass.ExtractMatrix(0, model.op.numHid, 0, 1).ElementMult(currentVectorDerivative); SimpleMatrix deltaFull = deltaFromClass.Plus(deltaUp); SimpleMatrix leftVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[0]); SimpleMatrix rightVector = RNNCoreAnnotations.GetNodeVector(tree.Children()[1]); SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); SimpleMatrix W_df = deltaFull.Mult(childrenVector.Transpose()); binaryTD.Put(leftCategory, rightCategory, binaryTD.Get(leftCategory, rightCategory).Plus(W_df)); SimpleMatrix deltaDown; if (model.op.useTensors) { SimpleTensor Wt_df = GetTensorGradient(deltaFull, leftVector, rightVector); binaryTensorTD.Put(leftCategory, rightCategory, binaryTensorTD.Get(leftCategory, rightCategory).Plus(Wt_df)); deltaDown = ComputeTensorDeltaDown(deltaFull, leftVector, rightVector, model.GetBinaryTransform(leftCategory, rightCategory), model.GetBinaryTensor(leftCategory, rightCategory)); } else { deltaDown = model.GetBinaryTransform(leftCategory, rightCategory).Transpose().Mult(deltaFull); } SimpleMatrix leftDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftDeltaDown = deltaDown.ExtractMatrix(0, deltaFull.NumRows(), 0, 1); SimpleMatrix rightDeltaDown = deltaDown.ExtractMatrix(deltaFull.NumRows(), deltaFull.NumRows() * 2, 0, 1); BackpropDerivativesAndError(tree.Children()[0], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, leftDerivative.ElementMult(leftDeltaDown)); BackpropDerivativesAndError(tree.Children()[1], binaryTD, binaryCD, binaryTensorTD, unaryCD, wordVectorD, rightDerivative.ElementMult(rightDeltaDown)); } }
// fill value & derivative protected internal override void Calculate(double[] theta) { dvModel.VectorToParams(theta); double localValue = 0.0; double[] localDerivative = new double[theta.Length]; TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsG; TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsB; binaryW_dfsG = TwoDimensionalMap.TreeMap(); binaryW_dfsB = TwoDimensionalMap.TreeMap(); TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesG; TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesB; binaryScoreDerivativesG = TwoDimensionalMap.TreeMap(); binaryScoreDerivativesB = TwoDimensionalMap.TreeMap(); IDictionary <string, SimpleMatrix> unaryW_dfsG; IDictionary <string, SimpleMatrix> unaryW_dfsB; unaryW_dfsG = new SortedDictionary <string, SimpleMatrix>(); unaryW_dfsB = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> unaryScoreDerivativesG; IDictionary <string, SimpleMatrix> unaryScoreDerivativesB; unaryScoreDerivativesG = new SortedDictionary <string, SimpleMatrix>(); unaryScoreDerivativesB = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> wordVectorDerivativesG = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> wordVectorDerivativesB = new SortedDictionary <string, SimpleMatrix>(); foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in dvModel.binaryTransform) { int numRows = entry.GetValue().NumRows(); int numCols = entry.GetValue().NumCols(); binaryW_dfsG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols)); binaryW_dfsB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols)); binaryScoreDerivativesG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows)); binaryScoreDerivativesB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows)); } foreach (KeyValuePair <string, SimpleMatrix> entry_1 in dvModel.unaryTransform) { int numRows = entry_1.Value.NumRows(); int numCols = entry_1.Value.NumCols(); unaryW_dfsG[entry_1.Key] = new SimpleMatrix(numRows, numCols); unaryW_dfsB[entry_1.Key] = new SimpleMatrix(numRows, numCols); unaryScoreDerivativesG[entry_1.Key] = new SimpleMatrix(1, numRows); unaryScoreDerivativesB[entry_1.Key] = new SimpleMatrix(1, numRows); } if (op.trainOptions.trainWordVectors) { foreach (KeyValuePair <string, SimpleMatrix> entry_2 in dvModel.wordVectors) { int numRows = entry_2.Value.NumRows(); int numCols = entry_2.Value.NumCols(); wordVectorDerivativesG[entry_2.Key] = new SimpleMatrix(numRows, numCols); wordVectorDerivativesB[entry_2.Key] = new SimpleMatrix(numRows, numCols); } } // Some optimization methods prints out a line without an end, so our // debugging statements are misaligned Timing scoreTiming = new Timing(); scoreTiming.Doing("Scoring trees"); int treeNum = 0; MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> > wrapper = new MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> >(op.trainOptions.trainingThreads, new DVParserCostAndGradient.ScoringProcessor(this)); foreach (Tree tree in trainingBatch) { wrapper.Put(tree); } wrapper.Join(); scoreTiming.Done(); while (wrapper.Peek()) { Pair <DeepTree, DeepTree> result = wrapper.Poll(); DeepTree goldTree = result.first; DeepTree bestTree = result.second; StringBuilder treeDebugLine = new StringBuilder(); Formatter formatter = new Formatter(treeDebugLine); bool isDone = (Math.Abs(bestTree.GetScore() - goldTree.GetScore()) <= 0.00001 || goldTree.GetScore() > bestTree.GetScore()); string done = isDone ? "done" : string.Empty; formatter.Format("Tree %6d Highest tree: %12.4f Correct tree: %12.4f %s", treeNum, bestTree.GetScore(), goldTree.GetScore(), done); log.Info(treeDebugLine.ToString()); if (!isDone) { // if the gold tree is better than the best hypothesis tree by // a large enough margin, then the score difference will be 0 // and we ignore the tree double valueDelta = bestTree.GetScore() - goldTree.GetScore(); //double valueDelta = Math.max(0.0, - scoreGold + bestScore); localValue += valueDelta; // get the context words for this tree - should be the same // for either goldTree or bestTree IList <string> words = GetContextWords(goldTree.GetTree()); // The derivatives affected by this tree are only based on the // nodes present in this tree, eg not all matrix derivatives // will be affected by this tree BackpropDerivative(goldTree.GetTree(), words, goldTree.GetVectors(), binaryW_dfsG, unaryW_dfsG, binaryScoreDerivativesG, unaryScoreDerivativesG, wordVectorDerivativesG); BackpropDerivative(bestTree.GetTree(), words, bestTree.GetVectors(), binaryW_dfsB, unaryW_dfsB, binaryScoreDerivativesB, unaryScoreDerivativesB, wordVectorDerivativesB); } ++treeNum; } double[] localDerivativeGood; double[] localDerivativeB; if (op.trainOptions.trainWordVectors) { localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator(), wordVectorDerivativesG.Values .GetEnumerator()); localDerivativeB = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator(), wordVectorDerivativesB.Values .GetEnumerator()); } else { localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator()); localDerivativeB = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator()); } // correct - highest for (int i = 0; i < localDerivativeGood.Length; i++) { localDerivative[i] = localDerivativeB[i] - localDerivativeGood[i]; } // TODO: this is where we would combine multiple costs if we had parallelized the calculation value = localValue; derivative = localDerivative; // normalizing by training batch size value = (1.0 / trainingBatch.Count) * value; ArrayMath.MultiplyInPlace(derivative, (1.0 / trainingBatch.Count)); // add regularization to cost: double[] currentParams = dvModel.ParamsToVector(); double regCost = 0; foreach (double currentParam in currentParams) { regCost += currentParam * currentParam; } regCost = op.trainOptions.regCost * 0.5 * regCost; value += regCost; // add regularization to gradient ArrayMath.MultiplyInPlace(currentParams, op.trainOptions.regCost); ArrayMath.PairwiseAddInPlace(derivative, currentParams); }
public virtual void BackpropDerivative(Tree tree, IList <string> words, IdentityHashMap <Tree, SimpleMatrix> nodeVectors, TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfs, IDictionary <string, SimpleMatrix> unaryW_dfs, TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivatives, IDictionary <string, SimpleMatrix> unaryScoreDerivatives, IDictionary <string, SimpleMatrix> wordVectorDerivatives) { SimpleMatrix delta = new SimpleMatrix(op.lexOptions.numHid, 1); BackpropDerivative(tree, words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, delta); }
public virtual void BackpropDerivative(Tree tree, IList <string> words, IdentityHashMap <Tree, SimpleMatrix> nodeVectors, TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfs, IDictionary <string, SimpleMatrix> unaryW_dfs, TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivatives, IDictionary <string, SimpleMatrix> unaryScoreDerivatives, IDictionary <string, SimpleMatrix> wordVectorDerivatives, SimpleMatrix deltaUp) { if (tree.IsLeaf()) { return; } if (tree.IsPreTerminal()) { if (op.trainOptions.trainWordVectors) { string word = tree.Children()[0].Label().Value(); word = dvModel.GetVocabWord(word); // SimpleMatrix currentVector = nodeVectors.get(tree); // SimpleMatrix currentVectorDerivative = nonlinearityVectorToDerivative(currentVector); // SimpleMatrix derivative = deltaUp.elementMult(currentVectorDerivative); SimpleMatrix derivative = deltaUp; wordVectorDerivatives[word] = wordVectorDerivatives[word].Plus(derivative); } return; } SimpleMatrix currentVector = nodeVectors[tree]; SimpleMatrix currentVectorDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(currentVector); SimpleMatrix scoreW = dvModel.GetScoreWForNode(tree); currentVectorDerivative = currentVectorDerivative.ElementMult(scoreW.Transpose()); // the delta that is used at the current nodes SimpleMatrix deltaCurrent = deltaUp.Plus(currentVectorDerivative); SimpleMatrix W = dvModel.GetWForNode(tree); SimpleMatrix WTdelta = W.Transpose().Mult(deltaCurrent); if (tree.Children().Length == 2) { //TODO: RS: Change to the nice "getWForNode" setup? string leftLabel = dvModel.BasicCategory(tree.Children()[0].Label().Value()); string rightLabel = dvModel.BasicCategory(tree.Children()[1].Label().Value()); binaryScoreDerivatives.Put(leftLabel, rightLabel, binaryScoreDerivatives.Get(leftLabel, rightLabel).Plus(currentVector.Transpose())); SimpleMatrix leftVector = nodeVectors[tree.Children()[0]]; SimpleMatrix rightVector = nodeVectors[tree.Children()[1]]; SimpleMatrix childrenVector = NeuralUtils.ConcatenateWithBias(leftVector, rightVector); if (op.trainOptions.useContextWords) { childrenVector = ConcatenateContextWords(childrenVector, tree.GetSpan(), words); } SimpleMatrix W_df = deltaCurrent.Mult(childrenVector.Transpose()); binaryW_dfs.Put(leftLabel, rightLabel, binaryW_dfs.Get(leftLabel, rightLabel).Plus(W_df)); // and then recurse SimpleMatrix leftDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(leftVector); SimpleMatrix rightDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(rightVector); SimpleMatrix leftWTDelta = WTdelta.ExtractMatrix(0, deltaCurrent.NumRows(), 0, 1); SimpleMatrix rightWTDelta = WTdelta.ExtractMatrix(deltaCurrent.NumRows(), deltaCurrent.NumRows() * 2, 0, 1); BackpropDerivative(tree.Children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, leftDerivative.ElementMult(leftWTDelta)); BackpropDerivative(tree.Children()[1], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, rightDerivative.ElementMult(rightWTDelta)); } else { if (tree.Children().Length == 1) { string childLabel = dvModel.BasicCategory(tree.Children()[0].Label().Value()); unaryScoreDerivatives[childLabel] = unaryScoreDerivatives[childLabel].Plus(currentVector.Transpose()); SimpleMatrix childVector = nodeVectors[tree.Children()[0]]; SimpleMatrix childVectorWithBias = NeuralUtils.ConcatenateWithBias(childVector); if (op.trainOptions.useContextWords) { childVectorWithBias = ConcatenateContextWords(childVectorWithBias, tree.GetSpan(), words); } SimpleMatrix W_df = deltaCurrent.Mult(childVectorWithBias.Transpose()); // System.out.println("unary backprop derivative for " + childLabel); // System.out.println("Old transform:"); // System.out.println(unaryW_dfs.get(childLabel)); // System.out.println(" Delta:"); // System.out.println(W_df.scale(scale)); unaryW_dfs[childLabel] = unaryW_dfs[childLabel].Plus(W_df); // and then recurse SimpleMatrix childDerivative = NeuralUtils.ElementwiseApplyTanhDerivative(childVector); //SimpleMatrix childDerivative = childVector; SimpleMatrix childWTDelta = WTdelta.ExtractMatrix(0, deltaCurrent.NumRows(), 0, 1); BackpropDerivative(tree.Children()[0], words, nodeVectors, binaryW_dfs, unaryW_dfs, binaryScoreDerivatives, unaryScoreDerivatives, wordVectorDerivatives, childDerivative.ElementMult(childWTDelta)); } } }
public virtual SemanticGraph ConvertIntermediateGraph(IList <CoreLabel> sentence) { SemanticGraph graph = new SemanticGraph(); // First construct the actual nodes; keep them indexed by their index and copy count. // Sentences such as "I went over the river and through the woods" have // two copies for "went" in the collapsed dependencies. TwoDimensionalMap <int, int, IndexedWord> nodeMap = TwoDimensionalMap.HashMap(); foreach (AnnotationSerializer.IntermediateNode @in in nodes) { CoreLabel token = sentence[@in.index - 1]; // index starts at 1! IndexedWord word; if (@in.copyAnnotation > 0) { // TODO: if we make a copy wrapper CoreLabel, use it here instead word = new IndexedWord(new CoreLabel(token)); word.SetCopyCount(@in.copyAnnotation); } else { word = new IndexedWord(token); } // for backwards compatibility - new annotations should have // these fields set, but annotations older than August 2014 might not if (word.DocID() == null && @in.docId != null) { word.SetDocID(@in.docId); } if (word.SentIndex() < 0 && @in.sentIndex >= 0) { word.SetSentIndex(@in.sentIndex); } if (word.Index() < 0 && @in.index >= 0) { word.SetIndex(@in.index); } nodeMap.Put(word.Index(), word.CopyCount(), word); graph.AddVertex(word); if (@in.isRoot) { graph.AddRoot(word); } } // add all edges to the actual graph foreach (AnnotationSerializer.IntermediateEdge ie in edges) { IndexedWord source = nodeMap.Get(ie.source, ie.sourceCopy); if (source == null) { throw new RuntimeIOException("Failed to find node " + ie.source + "-" + ie.sourceCopy); } IndexedWord target = nodeMap.Get(ie.target, ie.targetCopy); if (target == null) { throw new RuntimeIOException("Failed to find node " + ie.target + "-" + ie.targetCopy); } // assert(target != null); lock (Lock) { // this is not thread-safe: there are static fields in GrammaticalRelation GrammaticalRelation rel = GrammaticalRelation.ValueOf(ie.dep); graph.AddEdge(source, target, rel, 1.0, ie.isExtra); } } // compute root nodes if they weren't stored in the graph if (!graph.IsEmpty() && graph.GetRoots().Count == 0) { graph.ResetRoots(); } return(graph); }