示例#1
0
        // fill value & derivative
        protected internal override void Calculate(double[] theta)
        {
            dvModel.VectorToParams(theta);
            double localValue = 0.0;

            double[] localDerivative = new double[theta.Length];
            TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsG;
            TwoDimensionalMap <string, string, SimpleMatrix> binaryW_dfsB;

            binaryW_dfsG = TwoDimensionalMap.TreeMap();
            binaryW_dfsB = TwoDimensionalMap.TreeMap();
            TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesG;
            TwoDimensionalMap <string, string, SimpleMatrix> binaryScoreDerivativesB;

            binaryScoreDerivativesG = TwoDimensionalMap.TreeMap();
            binaryScoreDerivativesB = TwoDimensionalMap.TreeMap();
            IDictionary <string, SimpleMatrix> unaryW_dfsG;
            IDictionary <string, SimpleMatrix> unaryW_dfsB;

            unaryW_dfsG = new SortedDictionary <string, SimpleMatrix>();
            unaryW_dfsB = new SortedDictionary <string, SimpleMatrix>();
            IDictionary <string, SimpleMatrix> unaryScoreDerivativesG;
            IDictionary <string, SimpleMatrix> unaryScoreDerivativesB;

            unaryScoreDerivativesG = new SortedDictionary <string, SimpleMatrix>();
            unaryScoreDerivativesB = new SortedDictionary <string, SimpleMatrix>();
            IDictionary <string, SimpleMatrix> wordVectorDerivativesG = new SortedDictionary <string, SimpleMatrix>();
            IDictionary <string, SimpleMatrix> wordVectorDerivativesB = new SortedDictionary <string, SimpleMatrix>();

            foreach (TwoDimensionalMap.Entry <string, string, SimpleMatrix> entry in dvModel.binaryTransform)
            {
                int numRows = entry.GetValue().NumRows();
                int numCols = entry.GetValue().NumCols();
                binaryW_dfsG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols));
                binaryW_dfsB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(numRows, numCols));
                binaryScoreDerivativesG.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows));
                binaryScoreDerivativesB.Put(entry.GetFirstKey(), entry.GetSecondKey(), new SimpleMatrix(1, numRows));
            }
            foreach (KeyValuePair <string, SimpleMatrix> entry_1 in dvModel.unaryTransform)
            {
                int numRows = entry_1.Value.NumRows();
                int numCols = entry_1.Value.NumCols();
                unaryW_dfsG[entry_1.Key]            = new SimpleMatrix(numRows, numCols);
                unaryW_dfsB[entry_1.Key]            = new SimpleMatrix(numRows, numCols);
                unaryScoreDerivativesG[entry_1.Key] = new SimpleMatrix(1, numRows);
                unaryScoreDerivativesB[entry_1.Key] = new SimpleMatrix(1, numRows);
            }
            if (op.trainOptions.trainWordVectors)
            {
                foreach (KeyValuePair <string, SimpleMatrix> entry_2 in dvModel.wordVectors)
                {
                    int numRows = entry_2.Value.NumRows();
                    int numCols = entry_2.Value.NumCols();
                    wordVectorDerivativesG[entry_2.Key] = new SimpleMatrix(numRows, numCols);
                    wordVectorDerivativesB[entry_2.Key] = new SimpleMatrix(numRows, numCols);
                }
            }
            // Some optimization methods prints out a line without an end, so our
            // debugging statements are misaligned
            Timing scoreTiming = new Timing();

            scoreTiming.Doing("Scoring trees");
            int treeNum = 0;
            MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> > wrapper = new MulticoreWrapper <Tree, Pair <DeepTree, DeepTree> >(op.trainOptions.trainingThreads, new DVParserCostAndGradient.ScoringProcessor(this));

            foreach (Tree tree in trainingBatch)
            {
                wrapper.Put(tree);
            }
            wrapper.Join();
            scoreTiming.Done();
            while (wrapper.Peek())
            {
                Pair <DeepTree, DeepTree> result = wrapper.Poll();
                DeepTree      goldTree           = result.first;
                DeepTree      bestTree           = result.second;
                StringBuilder treeDebugLine      = new StringBuilder();
                Formatter     formatter          = new Formatter(treeDebugLine);
                bool          isDone             = (Math.Abs(bestTree.GetScore() - goldTree.GetScore()) <= 0.00001 || goldTree.GetScore() > bestTree.GetScore());
                string        done = isDone ? "done" : string.Empty;
                formatter.Format("Tree %6d Highest tree: %12.4f Correct tree: %12.4f %s", treeNum, bestTree.GetScore(), goldTree.GetScore(), done);
                log.Info(treeDebugLine.ToString());
                if (!isDone)
                {
                    // if the gold tree is better than the best hypothesis tree by
                    // a large enough margin, then the score difference will be 0
                    // and we ignore the tree
                    double valueDelta = bestTree.GetScore() - goldTree.GetScore();
                    //double valueDelta = Math.max(0.0, - scoreGold + bestScore);
                    localValue += valueDelta;
                    // get the context words for this tree - should be the same
                    // for either goldTree or bestTree
                    IList <string> words = GetContextWords(goldTree.GetTree());
                    // The derivatives affected by this tree are only based on the
                    // nodes present in this tree, eg not all matrix derivatives
                    // will be affected by this tree
                    BackpropDerivative(goldTree.GetTree(), words, goldTree.GetVectors(), binaryW_dfsG, unaryW_dfsG, binaryScoreDerivativesG, unaryScoreDerivativesG, wordVectorDerivativesG);
                    BackpropDerivative(bestTree.GetTree(), words, bestTree.GetVectors(), binaryW_dfsB, unaryW_dfsB, binaryScoreDerivativesB, unaryScoreDerivativesB, wordVectorDerivativesB);
                }
                ++treeNum;
            }
            double[] localDerivativeGood;
            double[] localDerivativeB;
            if (op.trainOptions.trainWordVectors)
            {
                localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator(), wordVectorDerivativesG.Values
                                                                 .GetEnumerator());
                localDerivativeB = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator(), wordVectorDerivativesB.Values
                                                              .GetEnumerator());
            }
            else
            {
                localDerivativeGood = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsG.ValueIterator(), unaryW_dfsG.Values.GetEnumerator(), binaryScoreDerivativesG.ValueIterator(), unaryScoreDerivativesG.Values.GetEnumerator());
                localDerivativeB    = NeuralUtils.ParamsToVector(theta.Length, binaryW_dfsB.ValueIterator(), unaryW_dfsB.Values.GetEnumerator(), binaryScoreDerivativesB.ValueIterator(), unaryScoreDerivativesB.Values.GetEnumerator());
            }
            // correct - highest
            for (int i = 0; i < localDerivativeGood.Length; i++)
            {
                localDerivative[i] = localDerivativeB[i] - localDerivativeGood[i];
            }
            // TODO: this is where we would combine multiple costs if we had parallelized the calculation
            value      = localValue;
            derivative = localDerivative;
            // normalizing by training batch size
            value = (1.0 / trainingBatch.Count) * value;
            ArrayMath.MultiplyInPlace(derivative, (1.0 / trainingBatch.Count));
            // add regularization to cost:
            double[] currentParams = dvModel.ParamsToVector();
            double   regCost       = 0;

            foreach (double currentParam in currentParams)
            {
                regCost += currentParam * currentParam;
            }
            regCost = op.trainOptions.regCost * 0.5 * regCost;
            value  += regCost;
            // add regularization to gradient
            ArrayMath.MultiplyInPlace(currentParams, op.trainOptions.regCost);
            ArrayMath.PairwiseAddInPlace(derivative, currentParams);
        }