// fill value & derivative protected internal override void Calculate(double[] theta) { dvModel.VectorToParams(theta); double localValue = 0.0; double[] localDerivative = new double[theta.Length]; TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsG; TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsB; binaryW_dfsG = TwoDimensionalMap.TreeMap(); binaryW_dfsB = TwoDimensionalMap.TreeMap(); TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesG; TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesB; binaryScoreDerivativesG = TwoDimensionalMap.TreeMap(); binaryScoreDerivativesB = TwoDimensionalMap.TreeMap(); IDictionary <string, SimpleMatrix> unaryW_dfsG; IDictionary <string, SimpleMatrix> unaryW_dfsB; unaryW_dfsG = new SortedDictionary <string, SimpleMatrix>(); unaryW_dfsB = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> unaryScoreDerivativesG; IDictionary <string, SimpleMatrix> unaryScoreDerivativesB; unaryScoreDerivativesG = new SortedDictionary <string, SimpleMatrix>(); unaryScoreDerivativesB = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> wordVectorDerivativesG = new SortedDictionary <string, SimpleMatrix>(); IDictionary <string, SimpleMatrix> wordVectorDerivativesB = new SortedDictionary <string, SimpleMatrix>(); foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in dvModel.binaryTransform) { int numRows = entry.GetValue().NumRows(); int numCols = entry.GetValue().NumCols(); binaryW_dfsG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols)); binaryW_dfsB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols)); binaryScoreDerivativesG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows)); binaryScoreDerivativesB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows)); } foreach (KeyValuePair <string, SimpleMatrix> entry_1 in dvModel.unaryTransform) { int numRows = entry_1.Value.NumRows(); int numCols = entry_1.Value.NumCols(); unaryW_dfsG[entry_1.Key] = new SimpleMatrix(numRows, numCols); unaryW_dfsB[entry_1.Key] = new SimpleMatrix(numRows, numCols); unaryScoreDerivativesG[entry_1.Key] = new SimpleMatrix(1, numRows); unaryScoreDerivativesB[entry_1.Key] = new SimpleMatrix(1, numRows); } if (op.trainOptions.trainWordVectors) { foreach (KeyValuePair <string, SimpleMatrix> entry_2 in dvModel.wordVectors) { int numRows = entry_2.Value.NumRows(); int numCols = entry_2.Value.NumCols(); wordVectorDerivativesG[entry_2.Key] = new SimpleMatrix(numRows, numCols); wordVectorDerivativesB[entry_2.Key] = new SimpleMatrix(numRows, numCols); } } // Some optimization methods prints out a line without an end, so our // debugging statements are misaligned Timing scoreTiming = new Timing(); scoreTiming.Doing("Scoring trees"); int treeNum = 0; MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> > wrapper = new MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> >(op.trainOptions.trainingThreads, new DVParserCostAndGradient.ScoringProcessor(this)); foreach (Tree tree in trainingBatch) { wrapper.Put(tree); } wrapper.Join(); scoreTiming.Done(); while (wrapper.Peek()) { Pair <DeepTree, DeepTree> result = wrapper.Poll(); DeepTree goldTree = result.first; DeepTree bestTree = result.second; StringBuilder treeDebugLine = new StringBuilder(); Formatter formatter = new Formatter(treeDebugLine); bool isDone = (Math.Abs(bestTree.GetScore() - goldTree.GetScore()) <= 0.00001 || goldTree.GetScore() > bestTree.GetScore()); string done = isDone ? "done" : string.Empty; formatter.Format("Tree %6d Highest tree: %12.4f Correct tree: %12.4f %s", treeNum, bestTree.GetScore(), goldTree.GetScore(), done); log.Info(treeDebugLine.ToString()); if (!isDone) { // if the gold tree is better than the best hypothesis tree by // a large enough margin, then the score difference will be 0 // and we ignore the tree double valueDelta = bestTree.GetScore() - goldTree.GetScore(); //double valueDelta = Math.max(0.0, - scoreGold + bestScore); localValue += valueDelta; // get the context words for this tree - should be the same // for either goldTree or bestTree IList <string> words = GetContextWords(goldTree.GetTree()); // The derivatives affected by this tree are only based on the // nodes present in this tree, eg not all matrix derivatives // will be affected by this tree BackpropDerivative(goldTree.GetTree(), words, goldTree.GetVectors(), binaryW_dfsG, unaryW_dfsG, binaryScoreDerivativesG, unaryScoreDerivativesG, wordVectorDerivativesG); BackpropDerivative(bestTree.GetTree(), words, bestTree.GetVectors(), binaryW_dfsB, unaryW_dfsB, binaryScoreDerivativesB, unaryScoreDerivativesB, wordVectorDerivativesB); } ++treeNum; } double[] localDerivativeGood; double[] localDerivativeB; if (op.trainOptions.trainWordVectors) { localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator(), wordVectorDerivativesG.Values .GetEnumerator()); localDerivativeB = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator(), wordVectorDerivativesB.Values .GetEnumerator()); } else { localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator()); localDerivativeB = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator()); } // correct - highest for (int i = 0; i < localDerivativeGood.Length; i++) { localDerivative[i] = localDerivativeB[i] - localDerivativeGood[i]; } // TODO: this is where we would combine multiple costs if we had parallelized the calculation value = localValue; derivative = localDerivative; // normalizing by training batch size value = (1.0 / trainingBatch.Count) * value; ArrayMath.MultiplyInPlace(derivative, (1.0 / trainingBatch.Count)); // add regularization to cost: double[] currentParams = dvModel.ParamsToVector(); double regCost = 0; foreach (double currentParam in currentParams) { regCost += currentParam * currentParam; } regCost = op.trainOptions.regCost * 0.5 * regCost; value += regCost; // add regularization to gradient ArrayMath.MultiplyInPlace(currentParams, op.trainOptions.regCost); ArrayMath.PairwiseAddInPlace(derivative, currentParams); }