public virtual void Add(SentimentCostAndGradient.ModelDerivatives other) { AddMatrices(binaryTD, other.binaryTD); AddTensors(binaryTensorTD, other.binaryTensorTD); AddMatrices(binaryCD, other.binaryCD); AddMatrices(unaryCD, other.unaryCD); AddMatrices(wordVectorD, other.wordVectorD); error += other.error; }
protected internal override void Calculate(double[] theta) { model.VectorToParams(theta); SentimentCostAndGradient.ModelDerivatives derivatives; if (model.op.trainOptions.nThreads == 1) { derivatives = ScoreDerivatives(trainingBatch); } else { // TODO: because some addition operations happen in different // orders now, this results in slightly different values, which // over time add up to significantly different models even when // given the same random seed. Probably not a big deal. // To be more specific, for trees T1, T2, T3, ... Tn, // when using one thread, we sum the derivatives T1 + T2 ... // When using multiple threads, we first sum T1 + ... + Tk, // then sum Tk+1 + ... + T2k, etc, for split size k. // The splits are then summed in order. // This different sum order results in slightly different numbers. MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives> wrapper = new MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives>(model.op.trainOptions.nThreads, new SentimentCostAndGradient.ScoringProcessor(this )); // use wrapper.nThreads in case the number of threads was automatically changed foreach (IList <Tree> chunk in CollectionUtils.PartitionIntoFolds(trainingBatch, wrapper.NThreads())) { wrapper.Put(chunk); } wrapper.Join(); derivatives = new SentimentCostAndGradient.ModelDerivatives(model); while (wrapper.Peek()) { SentimentCostAndGradient.ModelDerivatives batchDerivatives = wrapper.Poll(); derivatives.Add(batchDerivatives); } } // scale the error by the number of sentences so that the // regularization isn't drowned out for large training batchs double scale = (1.0 / trainingBatch.Count); value = derivatives.error * scale; value += ScaleAndRegularize(derivatives.binaryTD, model.binaryTransform, scale, model.op.trainOptions.regTransformMatrix, false); value += ScaleAndRegularize(derivatives.binaryCD, model.binaryClassification, scale, model.op.trainOptions.regClassification, true); value += ScaleAndRegularizeTensor(derivatives.binaryTensorTD, model.binaryTensors, scale, model.op.trainOptions.regTransformTensor); value += ScaleAndRegularize(derivatives.unaryCD, model.unaryClassification, scale, model.op.trainOptions.regClassification, false, true); value += ScaleAndRegularize(derivatives.wordVectorD, model.wordVectors, scale, model.op.trainOptions.regWordVector, true, false); derivative = NeuralUtils.ParamsToVector(theta.Length, derivatives.binaryTD.ValueIterator(), derivatives.binaryCD.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(derivatives.binaryTensorTD.ValueIterator()), derivatives.unaryCD.Values.GetEnumerator (), derivatives.wordVectorD.Values.GetEnumerator()); }
private SentimentCostAndGradient.ModelDerivatives ScoreDerivatives(IList <Tree> trainingBatch) { // "final" makes this as fast as having separate maps declared in this function SentimentCostAndGradient.ModelDerivatives derivatives = new SentimentCostAndGradient.ModelDerivatives(model); IList <Tree> forwardPropTrees = Generics.NewArrayList(); foreach (Tree tree in trainingBatch) { Tree trainingTree = tree.DeepCopy(); // this will attach the error vectors and the node vectors // to each node in the tree ForwardPropagateTree(trainingTree); forwardPropTrees.Add(trainingTree); } foreach (Tree tree_1 in forwardPropTrees) { BackpropDerivativesAndError(tree_1, derivatives.binaryTD, derivatives.binaryCD, derivatives.binaryTensorTD, derivatives.unaryCD, derivatives.wordVectorD); derivatives.error += SumError(tree_1); } return(derivatives); }