public virtual double[] ParamsToVector() { int totalSize = TotalParamSize(); return(NeuralUtils.ParamsToVector(totalSize, binaryTransform.ValueIterator(), binaryClassification.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(binaryTensors.ValueIterator()), unaryClassification.Values.GetEnumerator(), wordVectors.Values .GetEnumerator())); }
public virtual void VectorToParams(double[] theta) { NeuralUtils.VectorToParams(theta, binaryTransform.ValueIterator(), binaryClassification.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(binaryTensors.ValueIterator()), unaryClassification.Values.GetEnumerator(), wordVectors.Values.GetEnumerator ()); }
protected internal override void Calculate(double[] theta) { model.VectorToParams(theta); SentimentCostAndGradient.ModelDerivatives derivatives; if (model.op.trainOptions.nThreads == 1) { derivatives = ScoreDerivatives(trainingBatch); } else { // TODO: because some addition operations happen in different // orders now, this results in slightly different values, which // over time add up to significantly different models even when // given the same random seed. Probably not a big deal. // To be more specific, for trees T1, T2, T3, ... Tn, // when using one thread, we sum the derivatives T1 + T2 ... // When using multiple threads, we first sum T1 + ... + Tk, // then sum Tk+1 + ... + T2k, etc, for split size k. // The splits are then summed in order. // This different sum order results in slightly different numbers. MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives> wrapper = new MulticoreWrapper <IList <Tree>, SentimentCostAndGradient.ModelDerivatives>(model.op.trainOptions.nThreads, new SentimentCostAndGradient.ScoringProcessor(this )); // use wrapper.nThreads in case the number of threads was automatically changed foreach (IList <Tree> chunk in CollectionUtils.PartitionIntoFolds(trainingBatch, wrapper.NThreads())) { wrapper.Put(chunk); } wrapper.Join(); derivatives = new SentimentCostAndGradient.ModelDerivatives(model); while (wrapper.Peek()) { SentimentCostAndGradient.ModelDerivatives batchDerivatives = wrapper.Poll(); derivatives.Add(batchDerivatives); } } // scale the error by the number of sentences so that the // regularization isn't drowned out for large training batchs double scale = (1.0 / trainingBatch.Count); value = derivatives.error * scale; value += ScaleAndRegularize(derivatives.binaryTD, model.binaryTransform, scale, model.op.trainOptions.regTransformMatrix, false); value += ScaleAndRegularize(derivatives.binaryCD, model.binaryClassification, scale, model.op.trainOptions.regClassification, true); value += ScaleAndRegularizeTensor(derivatives.binaryTensorTD, model.binaryTensors, scale, model.op.trainOptions.regTransformTensor); value += ScaleAndRegularize(derivatives.unaryCD, model.unaryClassification, scale, model.op.trainOptions.regClassification, false, true); value += ScaleAndRegularize(derivatives.wordVectorD, model.wordVectors, scale, model.op.trainOptions.regWordVector, true, false); derivative = NeuralUtils.ParamsToVector(theta.Length, derivatives.binaryTD.ValueIterator(), derivatives.binaryCD.ValueIterator(), SimpleTensor.IteratorSimpleMatrix(derivatives.binaryTensorTD.ValueIterator()), derivatives.unaryCD.Values.GetEnumerator (), derivatives.wordVectorD.Values.GetEnumerator()); }